aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/attr.c14
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/binfmt_elf.c60
-rw-r--r--fs/block_dev.c12
-rw-r--r--fs/btrfs/extent-tree.c1
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c28
-rw-r--r--fs/buffer.c12
-rw-r--r--fs/ceph/acl.c4
-rw-r--r--fs/ceph/addr.c2
-rw-r--r--fs/ceph/dir.c5
-rw-r--r--fs/ceph/inode.c27
-rw-r--r--fs/ceph/mds_client.c5
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/ceph/xattr.c3
-rw-r--r--fs/cifs/cifs_unicode.c6
-rw-r--r--fs/cifs/cifs_unicode.h5
-rw-r--r--fs/cifs/cifsglob.h12
-rw-r--r--fs/cifs/cifssmb.c7
-rw-r--r--fs/cifs/connect.c37
-rw-r--r--fs/cifs/file.c6
-rw-r--r--fs/cifs/ioctl.c2
-rw-r--r--fs/cifs/readdir.c1
-rw-r--r--fs/cifs/smb1ops.c19
-rw-r--r--fs/cifs/smb2misc.c44
-rw-r--r--fs/cifs/smb2ops.c8
-rw-r--r--fs/cifs/smb2pdu.c23
-rw-r--r--fs/cifs/smb2proto.h7
-rw-r--r--fs/cifs/smb2transport.c58
-rw-r--r--fs/cifs/transport.c2
-rw-r--r--fs/configfs/symlink.c3
-rw-r--r--fs/coredump.c20
-rw-r--r--fs/dcache.c32
-rw-r--r--fs/debugfs/inode.c10
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/exec.c37
-rw-r--r--fs/ext2/ext2.h3
-rw-r--r--fs/ext2/super.c25
-rw-r--r--fs/ext2/xattr.c143
-rw-r--r--fs/ext2/xattr.h21
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/ext4/crypto.c96
-rw-r--r--fs/ext4/crypto_fname.c5
-rw-r--r--fs/ext4/crypto_key.c129
-rw-r--r--fs/ext4/crypto_policy.c70
-rw-r--r--fs/ext4/dir.c6
-rw-r--r--fs/ext4/ext4.h24
-rw-r--r--fs/ext4/ext4_crypto.h5
-rw-r--r--fs/ext4/extents.c32
-rw-r--r--fs/ext4/file.c53
-rw-r--r--fs/ext4/inline.c26
-rw-r--r--fs/ext4/inode.c107
-rw-r--r--fs/ext4/ioctl.c7
-rw-r--r--fs/ext4/mballoc.c7
-rw-r--r--fs/ext4/move_extent.c2
-rw-r--r--fs/ext4/namei.c22
-rw-r--r--fs/ext4/page-io.c14
-rw-r--r--fs/ext4/readpage.c8
-rw-r--r--fs/ext4/resize.c3
-rw-r--r--fs/ext4/super.c25
-rw-r--r--fs/ext4/sysfs.c2
-rw-r--r--fs/ext4/xattr.c168
-rw-r--r--fs/ext4/xattr.h5
-rw-r--r--fs/f2fs/acl.c2
-rw-r--r--fs/f2fs/crypto_fname.c2
-rw-r--r--fs/f2fs/crypto_key.c28
-rw-r--r--fs/f2fs/crypto_policy.c68
-rw-r--r--fs/f2fs/data.c34
-rw-r--r--fs/f2fs/dir.c32
-rw-r--r--fs/f2fs/f2fs.h17
-rw-r--r--fs/f2fs/f2fs_crypto.h1
-rw-r--r--fs/f2fs/file.c9
-rw-r--r--fs/f2fs/hash.c7
-rw-r--r--fs/f2fs/inline.c17
-rw-r--r--fs/f2fs/super.c121
-rw-r--r--fs/fat/inode.c13
-rw-r--r--fs/fcntl.c14
-rw-r--r--fs/fs_struct.c3
-rw-r--r--fs/fscache/cookie.c5
-rw-r--r--fs/fscache/netfs.c1
-rw-r--r--fs/fscache/object.c32
-rw-r--r--fs/fuse/dev.c4
-rw-r--r--fs/fuse/file.c3
-rw-r--r--fs/gfs2/dir.c4
-rw-r--r--fs/gfs2/glock.c16
-rw-r--r--fs/gfs2/incore.h3
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/inode.c6
-rw-r--r--fs/internal.h4
-rw-r--r--fs/jbd2/transaction.c4
-rw-r--r--fs/jfs/super.c4
-rw-r--r--fs/mbcache2.c359
-rw-r--r--fs/mount.h4
-rw-r--r--fs/mpage.c8
-rw-r--r--fs/namei.c187
-rw-r--r--fs/namespace.c192
-rw-r--r--fs/nfs/Kconfig1
-rw-r--r--fs/nfs/dir.c33
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c1
-rw-r--r--fs/nfs/inode.c4
-rw-r--r--fs/nfs/nfs4proc.c15
-rw-r--r--fs/nfs/nfs4state.c1
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfsd/blocklayout.c4
-rw-r--r--fs/nfsd/nfs3xdr.c12
-rw-r--r--fs/nfsd/nfs4layouts.c5
-rw-r--r--fs/nfsd/nfs4proc.c13
-rw-r--r--fs/nfsd/nfs4state.c19
-rw-r--r--fs/nfsd/nfs4xdr.c21
-rw-r--r--fs/nfsd/nfssvc.c36
-rw-r--r--fs/nfsd/nfsxdr.c10
-rw-r--r--fs/nfsd/state.h4
-rw-r--r--fs/nfsd/vfs.c59
-rw-r--r--fs/nilfs2/btnode.c2
-rw-r--r--fs/nilfs2/inode.c4
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/notify/fanotify/fanotify_user.c2
-rw-r--r--fs/notify/fsnotify.c8
-rw-r--r--fs/notify/inotify/inotify_user.c2
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c8
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/open.c43
-rw-r--r--fs/pnode.c278
-rw-r--r--fs/pnode.h4
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/generic.c1
-rw-r--r--fs/proc/task_mmu.c13
-rw-r--r--fs/proc_namespace.c8
-rw-r--r--fs/pstore/ram.c5
-rw-r--r--fs/pstore/ram_core.c25
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/romfs/super.c23
-rw-r--r--fs/sdcardfs/dentry.c49
-rw-r--r--fs/sdcardfs/derived_perm.c378
-rw-r--r--fs/sdcardfs/file.c113
-rw-r--r--fs/sdcardfs/inode.c408
-rw-r--r--fs/sdcardfs/lookup.c220
-rw-r--r--fs/sdcardfs/main.c180
-rw-r--r--fs/sdcardfs/mmap.c76
-rw-r--r--fs/sdcardfs/multiuser.h30
-rw-r--r--fs/sdcardfs/packagelist.c859
-rw-r--r--fs/sdcardfs/sdcardfs.h244
-rw-r--r--fs/sdcardfs/super.c82
-rw-r--r--fs/seq_file.c7
-rw-r--r--fs/splice.c1
-rw-r--r--fs/squashfs/Kconfig28
-rw-r--r--fs/squashfs/Makefile3
-rw-r--r--fs/squashfs/block.c546
-rw-r--r--fs/squashfs/cache.c73
-rw-r--r--fs/squashfs/decompressor.c55
-rw-r--r--fs/squashfs/file.c140
-rw-r--r--fs/squashfs/file_cache.c38
-rw-r--r--fs/squashfs/file_direct.c245
-rw-r--r--fs/squashfs/lz4_wrapper.c32
-rw-r--r--fs/squashfs/lzo_wrapper.c40
-rw-r--r--fs/squashfs/page_actor.c175
-rw-r--r--fs/squashfs/page_actor.h84
-rw-r--r--fs/squashfs/squashfs.h11
-rw-r--r--fs/squashfs/squashfs_fs_sb.h2
-rw-r--r--fs/squashfs/super.c7
-rw-r--r--fs/squashfs/xz_wrapper.c15
-rw-r--r--fs/squashfs/zlib_wrapper.c14
-rw-r--r--fs/stat.c3
-rw-r--r--fs/super.c28
-rw-r--r--fs/sync.c1
-rw-r--r--fs/sysfs/file.c6
-rw-r--r--fs/timerfd.c17
-rw-r--r--fs/udf/inode.c6
-rw-r--r--fs/ufs/balloc.c26
-rw-r--r--fs/ufs/inode.c9
-rw-r--r--fs/ufs/super.c18
-rw-r--r--fs/ufs/util.h10
-rw-r--r--fs/utimes.c2
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c7
-rw-r--r--fs/xfs/libxfs/xfs_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c8
-rw-r--r--fs/xfs/xfs_aops.c31
-rw-r--r--fs/xfs/xfs_attr.h1
-rw-r--r--fs/xfs/xfs_attr_list.c8
-rw-r--r--fs/xfs/xfs_bmap_util.c9
-rw-r--r--fs/xfs/xfs_buf.c25
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_dir2_readdir.c15
-rw-r--r--fs/xfs/xfs_file.c35
-rw-r--r--fs/xfs/xfs_icache.c58
-rw-r--r--fs/xfs/xfs_icache.h8
-rw-r--r--fs/xfs/xfs_inode.h4
-rw-r--r--fs/xfs/xfs_ioctl.c11
-rw-r--r--fs/xfs/xfs_qm.c7
-rw-r--r--fs/xfs/xfs_qm_syscalls.c3
-rw-r--r--fs/xfs/xfs_xattr.c16
197 files changed, 5660 insertions, 2292 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 929b618da43b..c30c6ceac2c4 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -283,6 +283,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
283 case ACL_TYPE_ACCESS: 283 case ACL_TYPE_ACCESS:
284 if (acl) { 284 if (acl) {
285 struct iattr iattr; 285 struct iattr iattr;
286 struct posix_acl *old_acl = acl;
286 287
287 retval = posix_acl_update_mode(inode, &iattr.ia_mode, &acl); 288 retval = posix_acl_update_mode(inode, &iattr.ia_mode, &acl);
288 if (retval) 289 if (retval)
@@ -293,6 +294,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
293 * by the mode bits. So don't 294 * by the mode bits. So don't
294 * update ACL. 295 * update ACL.
295 */ 296 */
297 posix_acl_release(old_acl);
296 value = NULL; 298 value = NULL;
297 size = 0; 299 size = 0;
298 } 300 }
diff --git a/fs/Makefile b/fs/Makefile
index 3b54070cd629..dee237540bc0 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o
41obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o 41obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o
42obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o 42obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o
43 43
44obj-$(CONFIG_FS_MBCACHE) += mbcache.o 44obj-$(CONFIG_FS_MBCACHE) += mbcache.o mbcache2.o
45obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o 45obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
46obj-$(CONFIG_NFS_COMMON) += nfs_common/ 46obj-$(CONFIG_NFS_COMMON) += nfs_common/
47obj-$(CONFIG_COREDUMP) += coredump.o 47obj-$(CONFIG_COREDUMP) += coredump.o
diff --git a/fs/attr.c b/fs/attr.c
index d62f674a605f..c86b37c38fb7 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -187,7 +187,7 @@ EXPORT_SYMBOL(setattr_copy);
187 * the file open for write, as there can be no conflicting delegation in 187 * the file open for write, as there can be no conflicting delegation in
188 * that case. 188 * that case.
189 */ 189 */
190int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode) 190int notify_change2(struct vfsmount *mnt, struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
191{ 191{
192 struct inode *inode = dentry->d_inode; 192 struct inode *inode = dentry->d_inode;
193 umode_t mode = inode->i_mode; 193 umode_t mode = inode->i_mode;
@@ -211,7 +211,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
211 return -EPERM; 211 return -EPERM;
212 212
213 if (!inode_owner_or_capable(inode)) { 213 if (!inode_owner_or_capable(inode)) {
214 error = inode_permission(inode, MAY_WRITE); 214 error = inode_permission2(mnt, inode, MAY_WRITE);
215 if (error) 215 if (error)
216 return error; 216 return error;
217 } 217 }
@@ -277,7 +277,9 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
277 if (error) 277 if (error)
278 return error; 278 return error;
279 279
280 if (inode->i_op->setattr) 280 if (mnt && inode->i_op->setattr2)
281 error = inode->i_op->setattr2(mnt, dentry, attr);
282 else if (inode->i_op->setattr)
281 error = inode->i_op->setattr(dentry, attr); 283 error = inode->i_op->setattr(dentry, attr);
282 else 284 else
283 error = simple_setattr(dentry, attr); 285 error = simple_setattr(dentry, attr);
@@ -290,4 +292,10 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
290 292
291 return error; 293 return error;
292} 294}
295EXPORT_SYMBOL(notify_change2);
296
297int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
298{
299 return notify_change2(NULL, dentry, attr, delegated_inode);
300}
293EXPORT_SYMBOL(notify_change); 301EXPORT_SYMBOL(notify_change);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index ac7d921ed984..257425511d10 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -331,7 +331,7 @@ static int autofs_dev_ioctl_fail(struct file *fp,
331 int status; 331 int status;
332 332
333 token = (autofs_wqt_t) param->fail.token; 333 token = (autofs_wqt_t) param->fail.token;
334 status = param->fail.status ? param->fail.status : -ENOENT; 334 status = param->fail.status < 0 ? param->fail.status : -ENOENT;
335 return autofs4_wait_release(sbi, token, status); 335 return autofs4_wait_release(sbi, token, status);
336} 336}
337 337
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0c52941dd62c..8a0243efd359 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -905,17 +905,60 @@ static int load_elf_binary(struct linux_binprm *bprm)
905 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE; 905 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
906 906
907 vaddr = elf_ppnt->p_vaddr; 907 vaddr = elf_ppnt->p_vaddr;
908 /*
909 * If we are loading ET_EXEC or we have already performed
910 * the ET_DYN load_addr calculations, proceed normally.
911 */
908 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { 912 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
909 elf_flags |= MAP_FIXED; 913 elf_flags |= MAP_FIXED;
910 } else if (loc->elf_ex.e_type == ET_DYN) { 914 } else if (loc->elf_ex.e_type == ET_DYN) {
911 /* Try and get dynamic programs out of the way of the 915 /*
912 * default mmap base, as well as whatever program they 916 * This logic is run once for the first LOAD Program
913 * might try to exec. This is because the brk will 917 * Header for ET_DYN binaries to calculate the
914 * follow the loader, and is not movable. */ 918 * randomization (load_bias) for all the LOAD
915 load_bias = ELF_ET_DYN_BASE - vaddr; 919 * Program Headers, and to calculate the entire
916 if (current->flags & PF_RANDOMIZE) 920 * size of the ELF mapping (total_size). (Note that
917 load_bias += arch_mmap_rnd(); 921 * load_addr_set is set to true later once the
918 load_bias = ELF_PAGESTART(load_bias); 922 * initial mapping is performed.)
923 *
924 * There are effectively two types of ET_DYN
925 * binaries: programs (i.e. PIE: ET_DYN with INTERP)
926 * and loaders (ET_DYN without INTERP, since they
927 * _are_ the ELF interpreter). The loaders must
928 * be loaded away from programs since the program
929 * may otherwise collide with the loader (especially
930 * for ET_EXEC which does not have a randomized
931 * position). For example to handle invocations of
932 * "./ld.so someprog" to test out a new version of
933 * the loader, the subsequent program that the
934 * loader loads must avoid the loader itself, so
935 * they cannot share the same load range. Sufficient
936 * room for the brk must be allocated with the
937 * loader as well, since brk must be available with
938 * the loader.
939 *
940 * Therefore, programs are loaded offset from
941 * ELF_ET_DYN_BASE and loaders are loaded into the
942 * independently randomized mmap region (0 load_bias
943 * without MAP_FIXED).
944 */
945 if (elf_interpreter) {
946 load_bias = ELF_ET_DYN_BASE;
947 if (current->flags & PF_RANDOMIZE)
948 load_bias += arch_mmap_rnd();
949 elf_flags |= MAP_FIXED;
950 } else
951 load_bias = 0;
952
953 /*
954 * Since load_bias is used for all subsequent loading
955 * calculations, we must lower it by the first vaddr
956 * so that the remaining calculations based on the
957 * ELF vaddrs will be correctly offset. The result
958 * is then page aligned.
959 */
960 load_bias = ELF_PAGESTART(load_bias - vaddr);
961
919 total_size = total_mapping_size(elf_phdata, 962 total_size = total_mapping_size(elf_phdata,
920 loc->elf_ex.e_phnum); 963 loc->elf_ex.e_phnum);
921 if (!total_size) { 964 if (!total_size) {
@@ -2295,6 +2338,7 @@ static int elf_core_dump(struct coredump_params *cprm)
2295 goto end_coredump; 2338 goto end_coredump;
2296 } 2339 }
2297 } 2340 }
2341 dump_truncate(cprm);
2298 2342
2299 if (!elf_core_write_extra_data(cprm)) 2343 if (!elf_core_write_extra_data(cprm))
2300 goto end_coredump; 2344 goto end_coredump;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 198aea66fe71..26bbaaefdff4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -88,12 +88,11 @@ void invalidate_bdev(struct block_device *bdev)
88{ 88{
89 struct address_space *mapping = bdev->bd_inode->i_mapping; 89 struct address_space *mapping = bdev->bd_inode->i_mapping;
90 90
91 if (mapping->nrpages == 0) 91 if (mapping->nrpages) {
92 return; 92 invalidate_bh_lrus();
93 93 lru_add_drain_all(); /* make sure all lru add caches are flushed */
94 invalidate_bh_lrus(); 94 invalidate_mapping_pages(mapping, 0, -1);
95 lru_add_drain_all(); /* make sure all lru add caches are flushed */ 95 }
96 invalidate_mapping_pages(mapping, 0, -1);
97 /* 99% of the time, we don't need to flush the cleancache on the bdev. 96 /* 99% of the time, we don't need to flush the cleancache on the bdev.
98 * But, for the strange corners, lets be cautious 97 * But, for the strange corners, lets be cautious
99 */ 98 */
@@ -1098,7 +1097,6 @@ int revalidate_disk(struct gendisk *disk)
1098 1097
1099 if (disk->fops->revalidate_disk) 1098 if (disk->fops->revalidate_disk)
1100 ret = disk->fops->revalidate_disk(disk); 1099 ret = disk->fops->revalidate_disk(disk);
1101 blk_integrity_revalidate(disk);
1102 bdev = bdget_disk(disk, 0); 1100 bdev = bdget_disk(disk, 0);
1103 if (!bdev) 1101 if (!bdev)
1104 return ret; 1102 return ret;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2a2e370399ba..c36a03fa7678 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3854,6 +3854,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3854 info->space_info_kobj, "%s", 3854 info->space_info_kobj, "%s",
3855 alloc_name(found->flags)); 3855 alloc_name(found->flags));
3856 if (ret) { 3856 if (ret) {
3857 percpu_counter_destroy(&found->total_bytes_pinned);
3857 kfree(found); 3858 kfree(found);
3858 return ret; 3859 return ret;
3859 } 3860 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 353f4bae658c..d4a6eef31854 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2771,7 +2771,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2771 if (!ret) 2771 if (!ret)
2772 ret = btrfs_prealloc_file_range(inode, mode, 2772 ret = btrfs_prealloc_file_range(inode, mode,
2773 range->start, 2773 range->start,
2774 range->len, 1 << inode->i_blkbits, 2774 range->len, i_blocksize(inode),
2775 offset + len, &alloc_hint); 2775 offset + len, &alloc_hint);
2776 list_del(&range->list); 2776 list_del(&range->list);
2777 kfree(range); 2777 kfree(range);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3cff6523f27d..bebd6517355d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4397,8 +4397,19 @@ search_again:
4397 if (found_type > min_type) { 4397 if (found_type > min_type) {
4398 del_item = 1; 4398 del_item = 1;
4399 } else { 4399 } else {
4400 if (item_end < new_size) 4400 if (item_end < new_size) {
4401 /*
4402 * With NO_HOLES mode, for the following mapping
4403 *
4404 * [0-4k][hole][8k-12k]
4405 *
4406 * if truncating isize down to 6k, it ends up
4407 * isize being 8k.
4408 */
4409 if (btrfs_fs_incompat(root->fs_info, NO_HOLES))
4410 last_size = new_size;
4401 break; 4411 break;
4412 }
4402 if (found_key.offset >= new_size) 4413 if (found_key.offset >= new_size)
4403 del_item = 1; 4414 del_item = 1;
4404 else 4415 else
@@ -7318,8 +7329,8 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
7318 int found = false; 7329 int found = false;
7319 void **pagep = NULL; 7330 void **pagep = NULL;
7320 struct page *page = NULL; 7331 struct page *page = NULL;
7321 int start_idx; 7332 unsigned long start_idx;
7322 int end_idx; 7333 unsigned long end_idx;
7323 7334
7324 start_idx = start >> PAGE_CACHE_SHIFT; 7335 start_idx = start >> PAGE_CACHE_SHIFT;
7325 7336
@@ -7510,11 +7521,18 @@ static void adjust_dio_outstanding_extents(struct inode *inode,
7510 * within our reservation, otherwise we need to adjust our inode 7521 * within our reservation, otherwise we need to adjust our inode
7511 * counter appropriately. 7522 * counter appropriately.
7512 */ 7523 */
7513 if (dio_data->outstanding_extents) { 7524 if (dio_data->outstanding_extents >= num_extents) {
7514 dio_data->outstanding_extents -= num_extents; 7525 dio_data->outstanding_extents -= num_extents;
7515 } else { 7526 } else {
7527 /*
7528 * If dio write length has been split due to no large enough
7529 * contiguous space, we need to compensate our inode counter
7530 * appropriately.
7531 */
7532 u64 num_needed = num_extents - dio_data->outstanding_extents;
7533
7516 spin_lock(&BTRFS_I(inode)->lock); 7534 spin_lock(&BTRFS_I(inode)->lock);
7517 BTRFS_I(inode)->outstanding_extents += num_extents; 7535 BTRFS_I(inode)->outstanding_extents += num_needed;
7518 spin_unlock(&BTRFS_I(inode)->lock); 7536 spin_unlock(&BTRFS_I(inode)->lock);
7519 } 7537 }
7520} 7538}
diff --git a/fs/buffer.c b/fs/buffer.c
index 4f4cd959da7c..6f7d519a093b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2298,7 +2298,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
2298 loff_t pos, loff_t *bytes) 2298 loff_t pos, loff_t *bytes)
2299{ 2299{
2300 struct inode *inode = mapping->host; 2300 struct inode *inode = mapping->host;
2301 unsigned blocksize = 1 << inode->i_blkbits; 2301 unsigned int blocksize = i_blocksize(inode);
2302 struct page *page; 2302 struct page *page;
2303 void *fsdata; 2303 void *fsdata;
2304 pgoff_t index, curidx; 2304 pgoff_t index, curidx;
@@ -2378,8 +2378,8 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
2378 get_block_t *get_block, loff_t *bytes) 2378 get_block_t *get_block, loff_t *bytes)
2379{ 2379{
2380 struct inode *inode = mapping->host; 2380 struct inode *inode = mapping->host;
2381 unsigned blocksize = 1 << inode->i_blkbits; 2381 unsigned int blocksize = i_blocksize(inode);
2382 unsigned zerofrom; 2382 unsigned int zerofrom;
2383 int err; 2383 int err;
2384 2384
2385 err = cont_expand_zero(file, mapping, pos, bytes); 2385 err = cont_expand_zero(file, mapping, pos, bytes);
@@ -2741,7 +2741,7 @@ int nobh_truncate_page(struct address_space *mapping,
2741 struct buffer_head map_bh; 2741 struct buffer_head map_bh;
2742 int err; 2742 int err;
2743 2743
2744 blocksize = 1 << inode->i_blkbits; 2744 blocksize = i_blocksize(inode);
2745 length = offset & (blocksize - 1); 2745 length = offset & (blocksize - 1);
2746 2746
2747 /* Block boundary? Nothing to do */ 2747 /* Block boundary? Nothing to do */
@@ -2819,7 +2819,7 @@ int block_truncate_page(struct address_space *mapping,
2819 struct buffer_head *bh; 2819 struct buffer_head *bh;
2820 int err; 2820 int err;
2821 2821
2822 blocksize = 1 << inode->i_blkbits; 2822 blocksize = i_blocksize(inode);
2823 length = offset & (blocksize - 1); 2823 length = offset & (blocksize - 1);
2824 2824
2825 /* Block boundary? Nothing to do */ 2825 /* Block boundary? Nothing to do */
@@ -2931,7 +2931,7 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2931 struct inode *inode = mapping->host; 2931 struct inode *inode = mapping->host;
2932 tmp.b_state = 0; 2932 tmp.b_state = 0;
2933 tmp.b_blocknr = 0; 2933 tmp.b_blocknr = 0;
2934 tmp.b_size = 1 << inode->i_blkbits; 2934 tmp.b_size = i_blocksize(inode);
2935 get_block(inode, block, &tmp, 0); 2935 get_block(inode, block, &tmp, 0);
2936 return tmp.b_blocknr; 2936 return tmp.b_blocknr;
2937} 2937}
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 4d8caeb94a11..bdb9c94335f1 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -128,7 +128,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
128 if (new_mode != old_mode) { 128 if (new_mode != old_mode) {
129 newattrs.ia_mode = new_mode; 129 newattrs.ia_mode = new_mode;
130 newattrs.ia_valid = ATTR_MODE; 130 newattrs.ia_valid = ATTR_MODE;
131 ret = ceph_setattr(dentry, &newattrs); 131 ret = __ceph_setattr(dentry, &newattrs);
132 if (ret) 132 if (ret)
133 goto out_dput; 133 goto out_dput;
134 } 134 }
@@ -138,7 +138,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
138 if (new_mode != old_mode) { 138 if (new_mode != old_mode) {
139 newattrs.ia_mode = old_mode; 139 newattrs.ia_mode = old_mode;
140 newattrs.ia_valid = ATTR_MODE; 140 newattrs.ia_valid = ATTR_MODE;
141 ceph_setattr(dentry, &newattrs); 141 __ceph_setattr(dentry, &newattrs);
142 } 142 }
143 goto out_dput; 143 goto out_dput;
144 } 144 }
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b7d218a168fb..c6a1ec110c01 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -697,7 +697,7 @@ static int ceph_writepages_start(struct address_space *mapping,
697 struct pagevec pvec; 697 struct pagevec pvec;
698 int done = 0; 698 int done = 0;
699 int rc = 0; 699 int rc = 0;
700 unsigned wsize = 1 << inode->i_blkbits; 700 unsigned int wsize = i_blocksize(inode);
701 struct ceph_osd_request *req = NULL; 701 struct ceph_osd_request *req = NULL;
702 int do_sync = 0; 702 int do_sync = 0;
703 loff_t snap_size, i_size; 703 loff_t snap_size, i_size;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 9314b4ea2375..be7d187d53fd 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -247,6 +247,11 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
247 if (ret < 0) 247 if (ret < 0)
248 err = ret; 248 err = ret;
249 dput(last); 249 dput(last);
250 /* last_name no longer match cache index */
251 if (fi->readdir_cache_idx >= 0) {
252 fi->readdir_cache_idx = -1;
253 fi->dir_release_count = 0;
254 }
250 } 255 }
251 return err; 256 return err;
252} 257}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index d98536c8abfc..9f0d99094cc1 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1773,7 +1773,7 @@ static const struct inode_operations ceph_symlink_iops = {
1773/* 1773/*
1774 * setattr 1774 * setattr
1775 */ 1775 */
1776int ceph_setattr(struct dentry *dentry, struct iattr *attr) 1776int __ceph_setattr(struct dentry *dentry, struct iattr *attr)
1777{ 1777{
1778 struct inode *inode = d_inode(dentry); 1778 struct inode *inode = d_inode(dentry);
1779 struct ceph_inode_info *ci = ceph_inode(inode); 1779 struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1975,11 +1975,6 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1975 if (inode_dirty_flags) 1975 if (inode_dirty_flags)
1976 __mark_inode_dirty(inode, inode_dirty_flags); 1976 __mark_inode_dirty(inode, inode_dirty_flags);
1977 1977
1978 if (ia_valid & ATTR_MODE) {
1979 err = posix_acl_chmod(inode, attr->ia_mode);
1980 if (err)
1981 goto out_put;
1982 }
1983 1978
1984 if (mask) { 1979 if (mask) {
1985 req->r_inode = inode; 1980 req->r_inode = inode;
@@ -1993,13 +1988,23 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1993 ceph_cap_string(dirtied), mask); 1988 ceph_cap_string(dirtied), mask);
1994 1989
1995 ceph_mdsc_put_request(req); 1990 ceph_mdsc_put_request(req);
1996 if (mask & CEPH_SETATTR_SIZE)
1997 __ceph_do_pending_vmtruncate(inode);
1998 ceph_free_cap_flush(prealloc_cf); 1991 ceph_free_cap_flush(prealloc_cf);
1992
1993 if (err >= 0 && (mask & CEPH_SETATTR_SIZE))
1994 __ceph_do_pending_vmtruncate(inode);
1995
1999 return err; 1996 return err;
2000out_put: 1997}
2001 ceph_mdsc_put_request(req); 1998
2002 ceph_free_cap_flush(prealloc_cf); 1999int ceph_setattr(struct dentry *dentry, struct iattr *attr)
2000{
2001 int err;
2002
2003 err = __ceph_setattr(dentry, attr);
2004
2005 if (err >= 0 && (attr->ia_valid & ATTR_MODE))
2006 err = posix_acl_chmod(d_inode(dentry), attr->ia_mode);
2007
2003 return err; 2008 return err;
2004} 2009}
2005 2010
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 239bc9cba28c..f54f77037d22 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -644,6 +644,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
644{ 644{
645 dout("__unregister_request %p tid %lld\n", req, req->r_tid); 645 dout("__unregister_request %p tid %lld\n", req, req->r_tid);
646 646
647 /* Never leave an unregistered request on an unsafe list! */
648 list_del_init(&req->r_unsafe_item);
649
647 if (req->r_tid == mdsc->oldest_tid) { 650 if (req->r_tid == mdsc->oldest_tid) {
648 struct rb_node *p = rb_next(&req->r_node); 651 struct rb_node *p = rb_next(&req->r_node);
649 mdsc->oldest_tid = 0; 652 mdsc->oldest_tid = 0;
@@ -1051,7 +1054,6 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
1051 while (!list_empty(&session->s_unsafe)) { 1054 while (!list_empty(&session->s_unsafe)) {
1052 req = list_first_entry(&session->s_unsafe, 1055 req = list_first_entry(&session->s_unsafe,
1053 struct ceph_mds_request, r_unsafe_item); 1056 struct ceph_mds_request, r_unsafe_item);
1054 list_del_init(&req->r_unsafe_item);
1055 pr_warn_ratelimited(" dropping unsafe request %llu\n", 1057 pr_warn_ratelimited(" dropping unsafe request %llu\n",
1056 req->r_tid); 1058 req->r_tid);
1057 __unregister_request(mdsc, req); 1059 __unregister_request(mdsc, req);
@@ -2477,7 +2479,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2477 * useful we could do with a revised return value. 2479 * useful we could do with a revised return value.
2478 */ 2480 */
2479 dout("got safe reply %llu, mds%d\n", tid, mds); 2481 dout("got safe reply %llu, mds%d\n", tid, mds);
2480 list_del_init(&req->r_unsafe_item);
2481 2482
2482 /* last unsafe request during umount? */ 2483 /* last unsafe request during umount? */
2483 if (mdsc->stopping && !__get_oldest_req(mdsc)) 2484 if (mdsc->stopping && !__get_oldest_req(mdsc))
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 75b7d125ce66..8c8cb8fe3d32 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -788,6 +788,7 @@ static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
788 return __ceph_do_getattr(inode, NULL, mask, force); 788 return __ceph_do_getattr(inode, NULL, mask, force);
789} 789}
790extern int ceph_permission(struct inode *inode, int mask); 790extern int ceph_permission(struct inode *inode, int mask);
791extern int __ceph_setattr(struct dentry *dentry, struct iattr *attr);
791extern int ceph_setattr(struct dentry *dentry, struct iattr *attr); 792extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
792extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, 793extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
793 struct kstat *stat); 794 struct kstat *stat);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 819163d8313b..b24275ef97f7 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -369,6 +369,7 @@ static int __set_xattr(struct ceph_inode_info *ci,
369 369
370 if (update_xattr) { 370 if (update_xattr) {
371 int err = 0; 371 int err = 0;
372
372 if (xattr && (flags & XATTR_CREATE)) 373 if (xattr && (flags & XATTR_CREATE))
373 err = -EEXIST; 374 err = -EEXIST;
374 else if (!xattr && (flags & XATTR_REPLACE)) 375 else if (!xattr && (flags & XATTR_REPLACE))
@@ -376,12 +377,14 @@ static int __set_xattr(struct ceph_inode_info *ci,
376 if (err) { 377 if (err) {
377 kfree(name); 378 kfree(name);
378 kfree(val); 379 kfree(val);
380 kfree(*newxattr);
379 return err; 381 return err;
380 } 382 }
381 if (update_xattr < 0) { 383 if (update_xattr < 0) {
382 if (xattr) 384 if (xattr)
383 __remove_xattr(ci, xattr); 385 __remove_xattr(ci, xattr);
384 kfree(name); 386 kfree(name);
387 kfree(*newxattr);
385 return 0; 388 return 0;
386 } 389 }
387 } 390 }
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 02b071bf3732..a0b3e7d1be48 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -83,6 +83,9 @@ convert_sfm_char(const __u16 src_char, char *target)
83 case SFM_COLON: 83 case SFM_COLON:
84 *target = ':'; 84 *target = ':';
85 break; 85 break;
86 case SFM_DOUBLEQUOTE:
87 *target = '"';
88 break;
86 case SFM_ASTERISK: 89 case SFM_ASTERISK:
87 *target = '*'; 90 *target = '*';
88 break; 91 break;
@@ -418,6 +421,9 @@ static __le16 convert_to_sfm_char(char src_char, bool end_of_string)
418 case ':': 421 case ':':
419 dest_char = cpu_to_le16(SFM_COLON); 422 dest_char = cpu_to_le16(SFM_COLON);
420 break; 423 break;
424 case '"':
425 dest_char = cpu_to_le16(SFM_DOUBLEQUOTE);
426 break;
421 case '*': 427 case '*':
422 dest_char = cpu_to_le16(SFM_ASTERISK); 428 dest_char = cpu_to_le16(SFM_ASTERISK);
423 break; 429 break;
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 479bc0a941f3..07ade707fa60 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -57,6 +57,7 @@
57 * not conflict (although almost does) with the mapping above. 57 * not conflict (although almost does) with the mapping above.
58 */ 58 */
59 59
60#define SFM_DOUBLEQUOTE ((__u16) 0xF020)
60#define SFM_ASTERISK ((__u16) 0xF021) 61#define SFM_ASTERISK ((__u16) 0xF021)
61#define SFM_QUESTION ((__u16) 0xF025) 62#define SFM_QUESTION ((__u16) 0xF025)
62#define SFM_COLON ((__u16) 0xF022) 63#define SFM_COLON ((__u16) 0xF022)
@@ -64,8 +65,8 @@
64#define SFM_LESSTHAN ((__u16) 0xF023) 65#define SFM_LESSTHAN ((__u16) 0xF023)
65#define SFM_PIPE ((__u16) 0xF027) 66#define SFM_PIPE ((__u16) 0xF027)
66#define SFM_SLASH ((__u16) 0xF026) 67#define SFM_SLASH ((__u16) 0xF026)
67#define SFM_PERIOD ((__u16) 0xF028) 68#define SFM_SPACE ((__u16) 0xF028)
68#define SFM_SPACE ((__u16) 0xF029) 69#define SFM_PERIOD ((__u16) 0xF029)
69 70
70/* 71/*
71 * Mapping mechanism to use when one of the seven reserved characters is 72 * Mapping mechanism to use when one of the seven reserved characters is
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index b76883606e4b..e2f6a79e9b01 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -227,6 +227,7 @@ struct smb_version_operations {
227 /* verify the message */ 227 /* verify the message */
228 int (*check_message)(char *, unsigned int); 228 int (*check_message)(char *, unsigned int);
229 bool (*is_oplock_break)(char *, struct TCP_Server_Info *); 229 bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
230 int (*handle_cancelled_mid)(char *, struct TCP_Server_Info *);
230 void (*downgrade_oplock)(struct TCP_Server_Info *, 231 void (*downgrade_oplock)(struct TCP_Server_Info *,
231 struct cifsInodeInfo *, bool); 232 struct cifsInodeInfo *, bool);
232 /* process transaction2 response */ 233 /* process transaction2 response */
@@ -906,7 +907,6 @@ struct cifs_tcon {
906 bool use_persistent:1; /* use persistent instead of durable handles */ 907 bool use_persistent:1; /* use persistent instead of durable handles */
907#ifdef CONFIG_CIFS_SMB2 908#ifdef CONFIG_CIFS_SMB2
908 bool print:1; /* set if connection to printer share */ 909 bool print:1; /* set if connection to printer share */
909 bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */
910 __le32 capabilities; 910 __le32 capabilities;
911 __u32 share_flags; 911 __u32 share_flags;
912 __u32 maximal_access; 912 __u32 maximal_access;
@@ -1290,12 +1290,19 @@ struct mid_q_entry {
1290 void *callback_data; /* general purpose pointer for callback */ 1290 void *callback_data; /* general purpose pointer for callback */
1291 void *resp_buf; /* pointer to received SMB header */ 1291 void *resp_buf; /* pointer to received SMB header */
1292 int mid_state; /* wish this were enum but can not pass to wait_event */ 1292 int mid_state; /* wish this were enum but can not pass to wait_event */
1293 unsigned int mid_flags;
1293 __le16 command; /* smb command code */ 1294 __le16 command; /* smb command code */
1294 bool large_buf:1; /* if valid response, is pointer to large buf */ 1295 bool large_buf:1; /* if valid response, is pointer to large buf */
1295 bool multiRsp:1; /* multiple trans2 responses for one request */ 1296 bool multiRsp:1; /* multiple trans2 responses for one request */
1296 bool multiEnd:1; /* both received */ 1297 bool multiEnd:1; /* both received */
1297}; 1298};
1298 1299
1300struct close_cancelled_open {
1301 struct cifs_fid fid;
1302 struct cifs_tcon *tcon;
1303 struct work_struct work;
1304};
1305
1299/* Make code in transport.c a little cleaner by moving 1306/* Make code in transport.c a little cleaner by moving
1300 update of optional stats into function below */ 1307 update of optional stats into function below */
1301#ifdef CONFIG_CIFS_STATS2 1308#ifdef CONFIG_CIFS_STATS2
@@ -1427,6 +1434,9 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
1427#define MID_RESPONSE_MALFORMED 0x10 1434#define MID_RESPONSE_MALFORMED 0x10
1428#define MID_SHUTDOWN 0x20 1435#define MID_SHUTDOWN 0x20
1429 1436
1437/* Flags */
1438#define MID_WAIT_CANCELLED 1 /* Cancelled while waiting for response */
1439
1430/* Types of response buffer returned from SendReceive2 */ 1440/* Types of response buffer returned from SendReceive2 */
1431#define CIFS_NO_BUFFER 0 /* Response buffer not returned */ 1441#define CIFS_NO_BUFFER 0 /* Response buffer not returned */
1432#define CIFS_SMALL_BUFFER 1 1442#define CIFS_SMALL_BUFFER 1
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b1104ed8f54c..b60150e5b5ce 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -717,6 +717,9 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
717 if (rc) 717 if (rc)
718 return rc; 718 return rc;
719 719
720 if (server->capabilities & CAP_UNICODE)
721 smb->hdr.Flags2 |= SMBFLG2_UNICODE;
722
720 /* set up echo request */ 723 /* set up echo request */
721 smb->hdr.Tid = 0xffff; 724 smb->hdr.Tid = 0xffff;
722 smb->hdr.WordCount = 1; 725 smb->hdr.WordCount = 1;
@@ -1424,6 +1427,8 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1424 1427
1425 length = discard_remaining_data(server); 1428 length = discard_remaining_data(server);
1426 dequeue_mid(mid, rdata->result); 1429 dequeue_mid(mid, rdata->result);
1430 mid->resp_buf = server->smallbuf;
1431 server->smallbuf = NULL;
1427 return length; 1432 return length;
1428} 1433}
1429 1434
@@ -1538,6 +1543,8 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1538 return cifs_readv_discard(server, mid); 1543 return cifs_readv_discard(server, mid);
1539 1544
1540 dequeue_mid(mid, false); 1545 dequeue_mid(mid, false);
1546 mid->resp_buf = server->smallbuf;
1547 server->smallbuf = NULL;
1541 return length; 1548 return length;
1542} 1549}
1543 1550
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5d59f25521ce..53a827c6d8b1 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -412,6 +412,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
412 } 412 }
413 } while (server->tcpStatus == CifsNeedReconnect); 413 } while (server->tcpStatus == CifsNeedReconnect);
414 414
415 if (server->tcpStatus == CifsNeedNegotiate)
416 mod_delayed_work(cifsiod_wq, &server->echo, 0);
417
415 return rc; 418 return rc;
416} 419}
417 420
@@ -421,18 +424,27 @@ cifs_echo_request(struct work_struct *work)
421 int rc; 424 int rc;
422 struct TCP_Server_Info *server = container_of(work, 425 struct TCP_Server_Info *server = container_of(work,
423 struct TCP_Server_Info, echo.work); 426 struct TCP_Server_Info, echo.work);
427 unsigned long echo_interval;
424 428
425 /* 429 /*
426 * We cannot send an echo if it is disabled or until the 430 * If we need to renegotiate, set echo interval to zero to
427 * NEGOTIATE_PROTOCOL request is done, which is indicated by 431 * immediately call echo service where we can renegotiate.
428 * server->ops->need_neg() == true. Also, no need to ping if 432 */
429 * we got a response recently. 433 if (server->tcpStatus == CifsNeedNegotiate)
434 echo_interval = 0;
435 else
436 echo_interval = SMB_ECHO_INTERVAL;
437
438 /*
439 * We cannot send an echo if it is disabled.
440 * Also, no need to ping if we got a response recently.
430 */ 441 */
431 442
432 if (server->tcpStatus == CifsNeedReconnect || 443 if (server->tcpStatus == CifsNeedReconnect ||
433 server->tcpStatus == CifsExiting || server->tcpStatus == CifsNew || 444 server->tcpStatus == CifsExiting ||
445 server->tcpStatus == CifsNew ||
434 (server->ops->can_echo && !server->ops->can_echo(server)) || 446 (server->ops->can_echo && !server->ops->can_echo(server)) ||
435 time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) 447 time_before(jiffies, server->lstrp + echo_interval - HZ))
436 goto requeue_echo; 448 goto requeue_echo;
437 449
438 rc = server->ops->echo ? server->ops->echo(server) : -ENOSYS; 450 rc = server->ops->echo ? server->ops->echo(server) : -ENOSYS;
@@ -924,10 +936,19 @@ cifs_demultiplex_thread(void *p)
924 936
925 server->lstrp = jiffies; 937 server->lstrp = jiffies;
926 if (mid_entry != NULL) { 938 if (mid_entry != NULL) {
939 if ((mid_entry->mid_flags & MID_WAIT_CANCELLED) &&
940 mid_entry->mid_state == MID_RESPONSE_RECEIVED &&
941 server->ops->handle_cancelled_mid)
942 server->ops->handle_cancelled_mid(
943 mid_entry->resp_buf,
944 server);
945
927 if (!mid_entry->multiRsp || mid_entry->multiEnd) 946 if (!mid_entry->multiRsp || mid_entry->multiEnd)
928 mid_entry->callback(mid_entry); 947 mid_entry->callback(mid_entry);
929 } else if (!server->ops->is_oplock_break || 948 } else if (server->ops->is_oplock_break &&
930 !server->ops->is_oplock_break(buf, server)) { 949 server->ops->is_oplock_break(buf, server)) {
950 cifs_dbg(FYI, "Received oplock break\n");
951 } else {
931 cifs_dbg(VFS, "No task to wake, unknown frame received! NumMids %d\n", 952 cifs_dbg(VFS, "No task to wake, unknown frame received! NumMids %d\n",
932 atomic_read(&midCount)); 953 atomic_read(&midCount));
933 cifs_dump_mem("Received Data is: ", buf, 954 cifs_dump_mem("Received Data is: ", buf,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 72f270d4bd17..a0c0a49b6620 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2545,7 +2545,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2545 wdata->credits = credits; 2545 wdata->credits = credits;
2546 2546
2547 if (!wdata->cfile->invalidHandle || 2547 if (!wdata->cfile->invalidHandle ||
2548 !cifs_reopen_file(wdata->cfile, false)) 2548 !(rc = cifs_reopen_file(wdata->cfile, false)))
2549 rc = server->ops->async_writev(wdata, 2549 rc = server->ops->async_writev(wdata,
2550 cifs_uncached_writedata_release); 2550 cifs_uncached_writedata_release);
2551 if (rc) { 2551 if (rc) {
@@ -2958,7 +2958,7 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2958 rdata->credits = credits; 2958 rdata->credits = credits;
2959 2959
2960 if (!rdata->cfile->invalidHandle || 2960 if (!rdata->cfile->invalidHandle ||
2961 !cifs_reopen_file(rdata->cfile, true)) 2961 !(rc = cifs_reopen_file(rdata->cfile, true)))
2962 rc = server->ops->async_readv(rdata); 2962 rc = server->ops->async_readv(rdata);
2963error: 2963error:
2964 if (rc) { 2964 if (rc) {
@@ -3544,7 +3544,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
3544 } 3544 }
3545 3545
3546 if (!rdata->cfile->invalidHandle || 3546 if (!rdata->cfile->invalidHandle ||
3547 !cifs_reopen_file(rdata->cfile, true)) 3547 !(rc = cifs_reopen_file(rdata->cfile, true)))
3548 rc = server->ops->async_readv(rdata); 3548 rc = server->ops->async_readv(rdata);
3549 if (rc) { 3549 if (rc) {
3550 add_credits_and_wake_if(server, rdata->credits, 0); 3550 add_credits_and_wake_if(server, rdata->credits, 0);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 35cf990f87d3..a8f5b31636dc 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -272,6 +272,8 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
272 rc = -EOPNOTSUPP; 272 rc = -EOPNOTSUPP;
273 break; 273 break;
274 case CIFS_IOC_GET_MNT_INFO: 274 case CIFS_IOC_GET_MNT_INFO:
275 if (pSMBFile == NULL)
276 break;
275 tcon = tlink_tcon(pSMBFile->tlink); 277 tcon = tlink_tcon(pSMBFile->tlink);
276 rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg); 278 rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg);
277 break; 279 break;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 833e5844a2db..97d1a15873c5 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -282,6 +282,7 @@ initiate_cifs_search(const unsigned int xid, struct file *file)
282 rc = -ENOMEM; 282 rc = -ENOMEM;
283 goto error_exit; 283 goto error_exit;
284 } 284 }
285 spin_lock_init(&cifsFile->file_info_lock);
285 file->private_data = cifsFile; 286 file->private_data = cifsFile;
286 cifsFile->tlink = cifs_get_tlink(tlink); 287 cifsFile->tlink = cifs_get_tlink(tlink);
287 tcon = tlink_tcon(tlink); 288 tcon = tlink_tcon(tlink);
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index fc537c29044e..efd72e1fae74 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -849,8 +849,13 @@ cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
849 struct cifs_fid *fid, __u16 search_flags, 849 struct cifs_fid *fid, __u16 search_flags,
850 struct cifs_search_info *srch_inf) 850 struct cifs_search_info *srch_inf)
851{ 851{
852 return CIFSFindFirst(xid, tcon, path, cifs_sb, 852 int rc;
853 &fid->netfid, search_flags, srch_inf, true); 853
854 rc = CIFSFindFirst(xid, tcon, path, cifs_sb,
855 &fid->netfid, search_flags, srch_inf, true);
856 if (rc)
857 cifs_dbg(FYI, "find first failed=%d\n", rc);
858 return rc;
854} 859}
855 860
856static int 861static int
@@ -1015,6 +1020,15 @@ cifs_dir_needs_close(struct cifsFileInfo *cfile)
1015 return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle; 1020 return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle;
1016} 1021}
1017 1022
1023static bool
1024cifs_can_echo(struct TCP_Server_Info *server)
1025{
1026 if (server->tcpStatus == CifsGood)
1027 return true;
1028
1029 return false;
1030}
1031
1018struct smb_version_operations smb1_operations = { 1032struct smb_version_operations smb1_operations = {
1019 .send_cancel = send_nt_cancel, 1033 .send_cancel = send_nt_cancel,
1020 .compare_fids = cifs_compare_fids, 1034 .compare_fids = cifs_compare_fids,
@@ -1049,6 +1063,7 @@ struct smb_version_operations smb1_operations = {
1049 .get_dfs_refer = CIFSGetDFSRefer, 1063 .get_dfs_refer = CIFSGetDFSRefer,
1050 .qfs_tcon = cifs_qfs_tcon, 1064 .qfs_tcon = cifs_qfs_tcon,
1051 .is_path_accessible = cifs_is_path_accessible, 1065 .is_path_accessible = cifs_is_path_accessible,
1066 .can_echo = cifs_can_echo,
1052 .query_path_info = cifs_query_path_info, 1067 .query_path_info = cifs_query_path_info,
1053 .query_file_info = cifs_query_file_info, 1068 .query_file_info = cifs_query_file_info,
1054 .get_srv_inum = cifs_get_srv_inum, 1069 .get_srv_inum = cifs_get_srv_inum,
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index e5bc85e49be7..76ccf20fbfb7 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -630,3 +630,47 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
630 cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n"); 630 cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n");
631 return false; 631 return false;
632} 632}
633
634void
635smb2_cancelled_close_fid(struct work_struct *work)
636{
637 struct close_cancelled_open *cancelled = container_of(work,
638 struct close_cancelled_open, work);
639
640 cifs_dbg(VFS, "Close unmatched open\n");
641
642 SMB2_close(0, cancelled->tcon, cancelled->fid.persistent_fid,
643 cancelled->fid.volatile_fid);
644 cifs_put_tcon(cancelled->tcon);
645 kfree(cancelled);
646}
647
648int
649smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server)
650{
651 struct smb2_hdr *hdr = (struct smb2_hdr *)buffer;
652 struct smb2_create_rsp *rsp = (struct smb2_create_rsp *)buffer;
653 struct cifs_tcon *tcon;
654 struct close_cancelled_open *cancelled;
655
656 if (hdr->Command != SMB2_CREATE || hdr->Status != STATUS_SUCCESS)
657 return 0;
658
659 cancelled = kzalloc(sizeof(*cancelled), GFP_KERNEL);
660 if (!cancelled)
661 return -ENOMEM;
662
663 tcon = smb2_find_smb_tcon(server, hdr->SessionId, hdr->TreeId);
664 if (!tcon) {
665 kfree(cancelled);
666 return -ENOENT;
667 }
668
669 cancelled->fid.persistent_fid = rsp->PersistentFileId;
670 cancelled->fid.volatile_fid = rsp->VolatileFileId;
671 cancelled->tcon = tcon;
672 INIT_WORK(&cancelled->work, smb2_cancelled_close_fid);
673 queue_work(cifsiod_wq, &cancelled->work);
674
675 return 0;
676}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index be34b4860675..1d125d3d0d89 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -909,7 +909,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
909 rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL); 909 rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
910 kfree(utf16_path); 910 kfree(utf16_path);
911 if (rc) { 911 if (rc) {
912 cifs_dbg(VFS, "open dir failed\n"); 912 cifs_dbg(FYI, "open dir failed rc=%d\n", rc);
913 return rc; 913 return rc;
914 } 914 }
915 915
@@ -919,7 +919,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
919 rc = SMB2_query_directory(xid, tcon, fid->persistent_fid, 919 rc = SMB2_query_directory(xid, tcon, fid->persistent_fid,
920 fid->volatile_fid, 0, srch_inf); 920 fid->volatile_fid, 0, srch_inf);
921 if (rc) { 921 if (rc) {
922 cifs_dbg(VFS, "query directory failed\n"); 922 cifs_dbg(FYI, "query directory failed rc=%d\n", rc);
923 SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); 923 SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
924 } 924 }
925 return rc; 925 return rc;
@@ -1511,6 +1511,7 @@ struct smb_version_operations smb20_operations = {
1511 .clear_stats = smb2_clear_stats, 1511 .clear_stats = smb2_clear_stats,
1512 .print_stats = smb2_print_stats, 1512 .print_stats = smb2_print_stats,
1513 .is_oplock_break = smb2_is_valid_oplock_break, 1513 .is_oplock_break = smb2_is_valid_oplock_break,
1514 .handle_cancelled_mid = smb2_handle_cancelled_mid,
1514 .downgrade_oplock = smb2_downgrade_oplock, 1515 .downgrade_oplock = smb2_downgrade_oplock,
1515 .need_neg = smb2_need_neg, 1516 .need_neg = smb2_need_neg,
1516 .negotiate = smb2_negotiate, 1517 .negotiate = smb2_negotiate,
@@ -1589,6 +1590,7 @@ struct smb_version_operations smb21_operations = {
1589 .clear_stats = smb2_clear_stats, 1590 .clear_stats = smb2_clear_stats,
1590 .print_stats = smb2_print_stats, 1591 .print_stats = smb2_print_stats,
1591 .is_oplock_break = smb2_is_valid_oplock_break, 1592 .is_oplock_break = smb2_is_valid_oplock_break,
1593 .handle_cancelled_mid = smb2_handle_cancelled_mid,
1592 .downgrade_oplock = smb2_downgrade_oplock, 1594 .downgrade_oplock = smb2_downgrade_oplock,
1593 .need_neg = smb2_need_neg, 1595 .need_neg = smb2_need_neg,
1594 .negotiate = smb2_negotiate, 1596 .negotiate = smb2_negotiate,
@@ -1670,6 +1672,7 @@ struct smb_version_operations smb30_operations = {
1670 .print_stats = smb2_print_stats, 1672 .print_stats = smb2_print_stats,
1671 .dump_share_caps = smb2_dump_share_caps, 1673 .dump_share_caps = smb2_dump_share_caps,
1672 .is_oplock_break = smb2_is_valid_oplock_break, 1674 .is_oplock_break = smb2_is_valid_oplock_break,
1675 .handle_cancelled_mid = smb2_handle_cancelled_mid,
1673 .downgrade_oplock = smb2_downgrade_oplock, 1676 .downgrade_oplock = smb2_downgrade_oplock,
1674 .need_neg = smb2_need_neg, 1677 .need_neg = smb2_need_neg,
1675 .negotiate = smb2_negotiate, 1678 .negotiate = smb2_negotiate,
@@ -1757,6 +1760,7 @@ struct smb_version_operations smb311_operations = {
1757 .print_stats = smb2_print_stats, 1760 .print_stats = smb2_print_stats,
1758 .dump_share_caps = smb2_dump_share_caps, 1761 .dump_share_caps = smb2_dump_share_caps,
1759 .is_oplock_break = smb2_is_valid_oplock_break, 1762 .is_oplock_break = smb2_is_valid_oplock_break,
1763 .handle_cancelled_mid = smb2_handle_cancelled_mid,
1760 .downgrade_oplock = smb2_downgrade_oplock, 1764 .downgrade_oplock = smb2_downgrade_oplock,
1761 .need_neg = smb2_need_neg, 1765 .need_neg = smb2_need_neg,
1762 .negotiate = smb2_negotiate, 1766 .negotiate = smb2_negotiate,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 2fa754c5fd62..f4afa3b1cc56 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -564,8 +564,12 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
564 } 564 }
565 565
566 if (rsplen != sizeof(struct validate_negotiate_info_rsp)) { 566 if (rsplen != sizeof(struct validate_negotiate_info_rsp)) {
567 cifs_dbg(VFS, "invalid size of protocol negotiate response\n"); 567 cifs_dbg(VFS, "invalid protocol negotiate response size: %d\n",
568 return -EIO; 568 rsplen);
569
570 /* relax check since Mac returns max bufsize allowed on ioctl */
571 if (rsplen > CIFSMaxBufSize)
572 return -EIO;
569 } 573 }
570 574
571 /* check validate negotiate info response matches what we got earlier */ 575 /* check validate negotiate info response matches what we got earlier */
@@ -932,9 +936,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
932 else 936 else
933 return -EIO; 937 return -EIO;
934 938
935 if (tcon && tcon->bad_network_name)
936 return -ENOENT;
937
938 if ((tcon && tcon->seal) && 939 if ((tcon && tcon->seal) &&
939 ((ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) == 0)) { 940 ((ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) == 0)) {
940 cifs_dbg(VFS, "encryption requested but no server support"); 941 cifs_dbg(VFS, "encryption requested but no server support");
@@ -952,6 +953,10 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
952 return -EINVAL; 953 return -EINVAL;
953 } 954 }
954 955
956 /* SMB2 TREE_CONNECT request must be called with TreeId == 0 */
957 if (tcon)
958 tcon->tid = 0;
959
955 rc = small_smb2_init(SMB2_TREE_CONNECT, tcon, (void **) &req); 960 rc = small_smb2_init(SMB2_TREE_CONNECT, tcon, (void **) &req);
956 if (rc) { 961 if (rc) {
957 kfree(unc_path); 962 kfree(unc_path);
@@ -1032,8 +1037,6 @@ tcon_exit:
1032tcon_error_exit: 1037tcon_error_exit:
1033 if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) { 1038 if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) {
1034 cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); 1039 cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
1035 if (tcon)
1036 tcon->bad_network_name = true;
1037 } 1040 }
1038 goto tcon_exit; 1041 goto tcon_exit;
1039} 1042}
@@ -1519,8 +1522,12 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1519 * than one credit. Windows typically sets this smaller, but for some 1522 * than one credit. Windows typically sets this smaller, but for some
1520 * ioctls it may be useful to allow server to send more. No point 1523 * ioctls it may be useful to allow server to send more. No point
1521 * limiting what the server can send as long as fits in one credit 1524 * limiting what the server can send as long as fits in one credit
1525 * Unfortunately - we can not handle more than CIFS_MAX_MSG_SIZE
1526 * (by default, note that it can be overridden to make max larger)
1527 * in responses (except for read responses which can be bigger.
1528 * We may want to bump this limit up
1522 */ 1529 */
1523 req->MaxOutputResponse = cpu_to_le32(0xFF00); /* < 64K uses 1 credit */ 1530 req->MaxOutputResponse = cpu_to_le32(CIFSMaxBufSize);
1524 1531
1525 if (is_fsctl) 1532 if (is_fsctl)
1526 req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL); 1533 req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL);
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 0a406ae78129..adc5234486c3 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -47,6 +47,10 @@ extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses,
47 struct smb_rqst *rqst); 47 struct smb_rqst *rqst);
48extern struct mid_q_entry *smb2_setup_async_request( 48extern struct mid_q_entry *smb2_setup_async_request(
49 struct TCP_Server_Info *server, struct smb_rqst *rqst); 49 struct TCP_Server_Info *server, struct smb_rqst *rqst);
50extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server,
51 __u64 ses_id);
52extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server,
53 __u64 ses_id, __u32 tid);
50extern int smb2_calc_signature(struct smb_rqst *rqst, 54extern int smb2_calc_signature(struct smb_rqst *rqst,
51 struct TCP_Server_Info *server); 55 struct TCP_Server_Info *server);
52extern int smb3_calc_signature(struct smb_rqst *rqst, 56extern int smb3_calc_signature(struct smb_rqst *rqst,
@@ -157,6 +161,9 @@ extern int SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
157extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, 161extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
158 const u64 persistent_fid, const u64 volatile_fid, 162 const u64 persistent_fid, const u64 volatile_fid,
159 const __u8 oplock_level); 163 const __u8 oplock_level);
164extern int smb2_handle_cancelled_mid(char *buffer,
165 struct TCP_Server_Info *server);
166void smb2_cancelled_close_fid(struct work_struct *work);
160extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, 167extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
161 u64 persistent_file_id, u64 volatile_file_id, 168 u64 persistent_file_id, u64 volatile_file_id,
162 struct kstatfs *FSData); 169 struct kstatfs *FSData);
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index d4c5b6f109a7..69e3b322bbfe 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -115,22 +115,68 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
115} 115}
116 116
117static struct cifs_ses * 117static struct cifs_ses *
118smb2_find_smb_ses(struct smb2_hdr *smb2hdr, struct TCP_Server_Info *server) 118smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id)
119{ 119{
120 struct cifs_ses *ses; 120 struct cifs_ses *ses;
121 121
122 spin_lock(&cifs_tcp_ses_lock);
123 list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { 122 list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
124 if (ses->Suid != smb2hdr->SessionId) 123 if (ses->Suid != ses_id)
125 continue; 124 continue;
126 spin_unlock(&cifs_tcp_ses_lock);
127 return ses; 125 return ses;
128 } 126 }
127
128 return NULL;
129}
130
131struct cifs_ses *
132smb2_find_smb_ses(struct TCP_Server_Info *server, __u64 ses_id)
133{
134 struct cifs_ses *ses;
135
136 spin_lock(&cifs_tcp_ses_lock);
137 ses = smb2_find_smb_ses_unlocked(server, ses_id);
129 spin_unlock(&cifs_tcp_ses_lock); 138 spin_unlock(&cifs_tcp_ses_lock);
130 139
140 return ses;
141}
142
143static struct cifs_tcon *
144smb2_find_smb_sess_tcon_unlocked(struct cifs_ses *ses, __u32 tid)
145{
146 struct cifs_tcon *tcon;
147
148 list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
149 if (tcon->tid != tid)
150 continue;
151 ++tcon->tc_count;
152 return tcon;
153 }
154
131 return NULL; 155 return NULL;
132} 156}
133 157
158/*
159 * Obtain tcon corresponding to the tid in the given
160 * cifs_ses
161 */
162
163struct cifs_tcon *
164smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid)
165{
166 struct cifs_ses *ses;
167 struct cifs_tcon *tcon;
168
169 spin_lock(&cifs_tcp_ses_lock);
170 ses = smb2_find_smb_ses_unlocked(server, ses_id);
171 if (!ses) {
172 spin_unlock(&cifs_tcp_ses_lock);
173 return NULL;
174 }
175 tcon = smb2_find_smb_sess_tcon_unlocked(ses, tid);
176 spin_unlock(&cifs_tcp_ses_lock);
177
178 return tcon;
179}
134 180
135int 181int
136smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) 182smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
@@ -143,7 +189,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
143 struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; 189 struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base;
144 struct cifs_ses *ses; 190 struct cifs_ses *ses;
145 191
146 ses = smb2_find_smb_ses(smb2_pdu, server); 192 ses = smb2_find_smb_ses(server, smb2_pdu->SessionId);
147 if (!ses) { 193 if (!ses) {
148 cifs_dbg(VFS, "%s: Could not find session\n", __func__); 194 cifs_dbg(VFS, "%s: Could not find session\n", __func__);
149 return 0; 195 return 0;
@@ -314,7 +360,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
314 struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; 360 struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base;
315 struct cifs_ses *ses; 361 struct cifs_ses *ses;
316 362
317 ses = smb2_find_smb_ses(smb2_pdu, server); 363 ses = smb2_find_smb_ses(server, smb2_pdu->SessionId);
318 if (!ses) { 364 if (!ses) {
319 cifs_dbg(VFS, "%s: Could not find session\n", __func__); 365 cifs_dbg(VFS, "%s: Could not find session\n", __func__);
320 return 0; 366 return 0;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 87abe8ed074c..54af10204e83 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -786,9 +786,11 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
786 786
787 rc = wait_for_response(ses->server, midQ); 787 rc = wait_for_response(ses->server, midQ);
788 if (rc != 0) { 788 if (rc != 0) {
789 cifs_dbg(FYI, "Cancelling wait for mid %llu\n", midQ->mid);
789 send_cancel(ses->server, buf, midQ); 790 send_cancel(ses->server, buf, midQ);
790 spin_lock(&GlobalMid_Lock); 791 spin_lock(&GlobalMid_Lock);
791 if (midQ->mid_state == MID_REQUEST_SUBMITTED) { 792 if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
793 midQ->mid_flags |= MID_WAIT_CANCELLED;
792 midQ->callback = DeleteMidQEntry; 794 midQ->callback = DeleteMidQEntry;
793 spin_unlock(&GlobalMid_Lock); 795 spin_unlock(&GlobalMid_Lock);
794 cifs_small_buf_release(buf); 796 cifs_small_buf_release(buf);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index ec5c8325b503..0525ebc3aea2 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -83,14 +83,13 @@ static int create_link(struct config_item *parent_item,
83 ret = -ENOMEM; 83 ret = -ENOMEM;
84 sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL); 84 sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
85 if (sl) { 85 if (sl) {
86 sl->sl_target = config_item_get(item);
87 spin_lock(&configfs_dirent_lock); 86 spin_lock(&configfs_dirent_lock);
88 if (target_sd->s_type & CONFIGFS_USET_DROPPING) { 87 if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
89 spin_unlock(&configfs_dirent_lock); 88 spin_unlock(&configfs_dirent_lock);
90 config_item_put(item);
91 kfree(sl); 89 kfree(sl);
92 return -ENOENT; 90 return -ENOENT;
93 } 91 }
92 sl->sl_target = config_item_get(item);
94 list_add(&sl->sl_list, &target_sd->s_links); 93 list_add(&sl->sl_list, &target_sd->s_links);
95 spin_unlock(&configfs_dirent_lock); 94 spin_unlock(&configfs_dirent_lock);
96 ret = configfs_create_link(sl, parent_item->ci_dentry, 95 ret = configfs_create_link(sl, parent_item->ci_dentry,
diff --git a/fs/coredump.c b/fs/coredump.c
index 5d15c4975ba1..2ce5ef429c48 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -720,7 +720,7 @@ void do_coredump(const siginfo_t *siginfo)
720 goto close_fail; 720 goto close_fail;
721 if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) 721 if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
722 goto close_fail; 722 goto close_fail;
723 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) 723 if (do_truncate2(cprm.file->f_path.mnt, cprm.file->f_path.dentry, 0, 0, cprm.file))
724 goto close_fail; 724 goto close_fail;
725 } 725 }
726 726
@@ -810,3 +810,21 @@ int dump_align(struct coredump_params *cprm, int align)
810 return mod ? dump_skip(cprm, align - mod) : 1; 810 return mod ? dump_skip(cprm, align - mod) : 1;
811} 811}
812EXPORT_SYMBOL(dump_align); 812EXPORT_SYMBOL(dump_align);
813
814/*
815 * Ensures that file size is big enough to contain the current file
816 * postion. This prevents gdb from complaining about a truncated file
817 * if the last "write" to the file was dump_skip.
818 */
819void dump_truncate(struct coredump_params *cprm)
820{
821 struct file *file = cprm->file;
822 loff_t offset;
823
824 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
825 offset = file->f_op->llseek(file, 0, SEEK_CUR);
826 if (i_size_read(file->f_mapping->host) < offset)
827 do_truncate(file->f_path.dentry, offset, 0, file);
828 }
829}
830EXPORT_SYMBOL(dump_truncate);
diff --git a/fs/dcache.c b/fs/dcache.c
index 7b8feb6d60c8..5bf7b4a188e9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -269,6 +269,33 @@ static inline int dname_external(const struct dentry *dentry)
269 return dentry->d_name.name != dentry->d_iname; 269 return dentry->d_name.name != dentry->d_iname;
270} 270}
271 271
272void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
273{
274 spin_lock(&dentry->d_lock);
275 if (unlikely(dname_external(dentry))) {
276 struct external_name *p = external_name(dentry);
277 atomic_inc(&p->u.count);
278 spin_unlock(&dentry->d_lock);
279 name->name = p->name;
280 } else {
281 memcpy(name->inline_name, dentry->d_iname, DNAME_INLINE_LEN);
282 spin_unlock(&dentry->d_lock);
283 name->name = name->inline_name;
284 }
285}
286EXPORT_SYMBOL(take_dentry_name_snapshot);
287
288void release_dentry_name_snapshot(struct name_snapshot *name)
289{
290 if (unlikely(name->name != name->inline_name)) {
291 struct external_name *p;
292 p = container_of(name->name, struct external_name, name[0]);
293 if (unlikely(atomic_dec_and_test(&p->u.count)))
294 kfree_rcu(p, u.head);
295 }
296}
297EXPORT_SYMBOL(release_dentry_name_snapshot);
298
272static inline void __d_set_inode_and_type(struct dentry *dentry, 299static inline void __d_set_inode_and_type(struct dentry *dentry,
273 struct inode *inode, 300 struct inode *inode,
274 unsigned type_flags) 301 unsigned type_flags)
@@ -1128,11 +1155,12 @@ void shrink_dcache_sb(struct super_block *sb)
1128 LIST_HEAD(dispose); 1155 LIST_HEAD(dispose);
1129 1156
1130 freed = list_lru_walk(&sb->s_dentry_lru, 1157 freed = list_lru_walk(&sb->s_dentry_lru,
1131 dentry_lru_isolate_shrink, &dispose, UINT_MAX); 1158 dentry_lru_isolate_shrink, &dispose, 1024);
1132 1159
1133 this_cpu_sub(nr_dentry_unused, freed); 1160 this_cpu_sub(nr_dentry_unused, freed);
1134 shrink_dentry_list(&dispose); 1161 shrink_dentry_list(&dispose);
1135 } while (freed > 0); 1162 cond_resched();
1163 } while (list_lru_count(&sb->s_dentry_lru) > 0);
1136} 1164}
1137EXPORT_SYMBOL(shrink_dcache_sb); 1165EXPORT_SYMBOL(shrink_dcache_sb);
1138 1166
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 0f5d05bf2131..e49ba072bd64 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -669,7 +669,7 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
669{ 669{
670 int error; 670 int error;
671 struct dentry *dentry = NULL, *trap; 671 struct dentry *dentry = NULL, *trap;
672 const char *old_name; 672 struct name_snapshot old_name;
673 673
674 trap = lock_rename(new_dir, old_dir); 674 trap = lock_rename(new_dir, old_dir);
675 /* Source or destination directories don't exist? */ 675 /* Source or destination directories don't exist? */
@@ -684,19 +684,19 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
684 if (IS_ERR(dentry) || dentry == trap || d_really_is_positive(dentry)) 684 if (IS_ERR(dentry) || dentry == trap || d_really_is_positive(dentry))
685 goto exit; 685 goto exit;
686 686
687 old_name = fsnotify_oldname_init(old_dentry->d_name.name); 687 take_dentry_name_snapshot(&old_name, old_dentry);
688 688
689 error = simple_rename(d_inode(old_dir), old_dentry, d_inode(new_dir), 689 error = simple_rename(d_inode(old_dir), old_dentry, d_inode(new_dir),
690 dentry); 690 dentry);
691 if (error) { 691 if (error) {
692 fsnotify_oldname_free(old_name); 692 release_dentry_name_snapshot(&old_name);
693 goto exit; 693 goto exit;
694 } 694 }
695 d_move(old_dentry, dentry); 695 d_move(old_dentry, dentry);
696 fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name, 696 fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name.name,
697 d_is_dir(old_dentry), 697 d_is_dir(old_dentry),
698 NULL, old_dentry); 698 NULL, old_dentry);
699 fsnotify_oldname_free(old_name); 699 release_dentry_name_snapshot(&old_name);
700 unlock_rename(new_dir, old_dir); 700 unlock_rename(new_dir, old_dir);
701 dput(dentry); 701 dput(dentry);
702 return old_dentry; 702 return old_dentry;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 01171d8a6ee9..c772fdf36cd9 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -575,7 +575,7 @@ static int dio_set_defer_completion(struct dio *dio)
575/* 575/*
576 * Call into the fs to map some more disk blocks. We record the current number 576 * Call into the fs to map some more disk blocks. We record the current number
577 * of available blocks at sdio->blocks_available. These are in units of the 577 * of available blocks at sdio->blocks_available. These are in units of the
578 * fs blocksize, (1 << inode->i_blkbits). 578 * fs blocksize, i_blocksize(inode).
579 * 579 *
580 * The fs is allowed to map lots of blocks at once. If it wants to do that, 580 * The fs is allowed to map lots of blocks at once. If it wants to do that,
581 * it uses the passed inode-relative block number as the file offset, as usual. 581 * it uses the passed inode-relative block number as the file offset, as usual.
diff --git a/fs/exec.c b/fs/exec.c
index 3a6de10d3891..dd3a59420506 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -206,7 +206,24 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
206 206
207 if (write) { 207 if (write) {
208 unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; 208 unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
209 struct rlimit *rlim; 209 unsigned long ptr_size, limit;
210
211 /*
212 * Since the stack will hold pointers to the strings, we
213 * must account for them as well.
214 *
215 * The size calculation is the entire vma while each arg page is
216 * built, so each time we get here it's calculating how far it
217 * is currently (rather than each call being just the newly
218 * added size from the arg page). As a result, we need to
219 * always add the entire size of the pointers, so that on the
220 * last call to get_arg_page() we'll actually have the entire
221 * correct size.
222 */
223 ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
224 if (ptr_size > ULONG_MAX - size)
225 goto fail;
226 size += ptr_size;
210 227
211 acct_arg_size(bprm, size / PAGE_SIZE); 228 acct_arg_size(bprm, size / PAGE_SIZE);
212 229
@@ -218,20 +235,24 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
218 return page; 235 return page;
219 236
220 /* 237 /*
221 * Limit to 1/4-th the stack size for the argv+env strings. 238 * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
239 * (whichever is smaller) for the argv+env strings.
222 * This ensures that: 240 * This ensures that:
223 * - the remaining binfmt code will not run out of stack space, 241 * - the remaining binfmt code will not run out of stack space,
224 * - the program will have a reasonable amount of stack left 242 * - the program will have a reasonable amount of stack left
225 * to work from. 243 * to work from.
226 */ 244 */
227 rlim = current->signal->rlim; 245 limit = _STK_LIM / 4 * 3;
228 if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) { 246 limit = min(limit, rlimit(RLIMIT_STACK) / 4);
229 put_page(page); 247 if (size > limit)
230 return NULL; 248 goto fail;
231 }
232 } 249 }
233 250
234 return page; 251 return page;
252
253fail:
254 put_page(page);
255 return NULL;
235} 256}
236 257
237static void put_arg_page(struct page *page) 258static void put_arg_page(struct page *page)
@@ -1132,7 +1153,7 @@ EXPORT_SYMBOL(flush_old_exec);
1132void would_dump(struct linux_binprm *bprm, struct file *file) 1153void would_dump(struct linux_binprm *bprm, struct file *file)
1133{ 1154{
1134 struct inode *inode = file_inode(file); 1155 struct inode *inode = file_inode(file);
1135 if (inode_permission(inode, MAY_READ) < 0) { 1156 if (inode_permission2(file->f_path.mnt, inode, MAY_READ) < 0) {
1136 struct user_namespace *old, *user_ns; 1157 struct user_namespace *old, *user_ns;
1137 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; 1158 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
1138 1159
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 4c69c94cafd8..f98ce7e60a0f 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -61,6 +61,8 @@ struct ext2_block_alloc_info {
61#define rsv_start rsv_window._rsv_start 61#define rsv_start rsv_window._rsv_start
62#define rsv_end rsv_window._rsv_end 62#define rsv_end rsv_window._rsv_end
63 63
64struct mb2_cache;
65
64/* 66/*
65 * second extended-fs super-block data in memory 67 * second extended-fs super-block data in memory
66 */ 68 */
@@ -111,6 +113,7 @@ struct ext2_sb_info {
111 * of the mount options. 113 * of the mount options.
112 */ 114 */
113 spinlock_t s_lock; 115 spinlock_t s_lock;
116 struct mb2_cache *s_mb_cache;
114}; 117};
115 118
116static inline spinlock_t * 119static inline spinlock_t *
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 748d35afc902..111a31761ffa 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -131,7 +131,10 @@ static void ext2_put_super (struct super_block * sb)
131 131
132 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); 132 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
133 133
134 ext2_xattr_put_super(sb); 134 if (sbi->s_mb_cache) {
135 ext2_xattr_destroy_cache(sbi->s_mb_cache);
136 sbi->s_mb_cache = NULL;
137 }
135 if (!(sb->s_flags & MS_RDONLY)) { 138 if (!(sb->s_flags & MS_RDONLY)) {
136 struct ext2_super_block *es = sbi->s_es; 139 struct ext2_super_block *es = sbi->s_es;
137 140
@@ -1104,6 +1107,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
1104 ext2_msg(sb, KERN_ERR, "error: insufficient memory"); 1107 ext2_msg(sb, KERN_ERR, "error: insufficient memory");
1105 goto failed_mount3; 1108 goto failed_mount3;
1106 } 1109 }
1110
1111#ifdef CONFIG_EXT2_FS_XATTR
1112 sbi->s_mb_cache = ext2_xattr_create_cache();
1113 if (!sbi->s_mb_cache) {
1114 ext2_msg(sb, KERN_ERR, "Failed to create an mb_cache");
1115 goto failed_mount3;
1116 }
1117#endif
1107 /* 1118 /*
1108 * set up enough so that it can read an inode 1119 * set up enough so that it can read an inode
1109 */ 1120 */
@@ -1149,6 +1160,8 @@ cantfind_ext2:
1149 sb->s_id); 1160 sb->s_id);
1150 goto failed_mount; 1161 goto failed_mount;
1151failed_mount3: 1162failed_mount3:
1163 if (sbi->s_mb_cache)
1164 ext2_xattr_destroy_cache(sbi->s_mb_cache);
1152 percpu_counter_destroy(&sbi->s_freeblocks_counter); 1165 percpu_counter_destroy(&sbi->s_freeblocks_counter);
1153 percpu_counter_destroy(&sbi->s_freeinodes_counter); 1166 percpu_counter_destroy(&sbi->s_freeinodes_counter);
1154 percpu_counter_destroy(&sbi->s_dirs_counter); 1167 percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -1555,20 +1568,17 @@ MODULE_ALIAS_FS("ext2");
1555 1568
1556static int __init init_ext2_fs(void) 1569static int __init init_ext2_fs(void)
1557{ 1570{
1558 int err = init_ext2_xattr(); 1571 int err;
1559 if (err) 1572
1560 return err;
1561 err = init_inodecache(); 1573 err = init_inodecache();
1562 if (err) 1574 if (err)
1563 goto out1; 1575 return err;
1564 err = register_filesystem(&ext2_fs_type); 1576 err = register_filesystem(&ext2_fs_type);
1565 if (err) 1577 if (err)
1566 goto out; 1578 goto out;
1567 return 0; 1579 return 0;
1568out: 1580out:
1569 destroy_inodecache(); 1581 destroy_inodecache();
1570out1:
1571 exit_ext2_xattr();
1572 return err; 1582 return err;
1573} 1583}
1574 1584
@@ -1576,7 +1586,6 @@ static void __exit exit_ext2_fs(void)
1576{ 1586{
1577 unregister_filesystem(&ext2_fs_type); 1587 unregister_filesystem(&ext2_fs_type);
1578 destroy_inodecache(); 1588 destroy_inodecache();
1579 exit_ext2_xattr();
1580} 1589}
1581 1590
1582MODULE_AUTHOR("Remy Card and others"); 1591MODULE_AUTHOR("Remy Card and others");
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index fa70848afa8f..24736c8b3d51 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -56,7 +56,7 @@
56#include <linux/buffer_head.h> 56#include <linux/buffer_head.h>
57#include <linux/init.h> 57#include <linux/init.h>
58#include <linux/slab.h> 58#include <linux/slab.h>
59#include <linux/mbcache.h> 59#include <linux/mbcache2.h>
60#include <linux/quotaops.h> 60#include <linux/quotaops.h>
61#include <linux/rwsem.h> 61#include <linux/rwsem.h>
62#include <linux/security.h> 62#include <linux/security.h>
@@ -92,14 +92,12 @@
92static int ext2_xattr_set2(struct inode *, struct buffer_head *, 92static int ext2_xattr_set2(struct inode *, struct buffer_head *,
93 struct ext2_xattr_header *); 93 struct ext2_xattr_header *);
94 94
95static int ext2_xattr_cache_insert(struct buffer_head *); 95static int ext2_xattr_cache_insert(struct mb2_cache *, struct buffer_head *);
96static struct buffer_head *ext2_xattr_cache_find(struct inode *, 96static struct buffer_head *ext2_xattr_cache_find(struct inode *,
97 struct ext2_xattr_header *); 97 struct ext2_xattr_header *);
98static void ext2_xattr_rehash(struct ext2_xattr_header *, 98static void ext2_xattr_rehash(struct ext2_xattr_header *,
99 struct ext2_xattr_entry *); 99 struct ext2_xattr_entry *);
100 100
101static struct mb_cache *ext2_xattr_cache;
102
103static const struct xattr_handler *ext2_xattr_handler_map[] = { 101static const struct xattr_handler *ext2_xattr_handler_map[] = {
104 [EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler, 102 [EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler,
105#ifdef CONFIG_EXT2_FS_POSIX_ACL 103#ifdef CONFIG_EXT2_FS_POSIX_ACL
@@ -154,6 +152,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
154 size_t name_len, size; 152 size_t name_len, size;
155 char *end; 153 char *end;
156 int error; 154 int error;
155 struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
157 156
158 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", 157 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
159 name_index, name, buffer, (long)buffer_size); 158 name_index, name, buffer, (long)buffer_size);
@@ -198,7 +197,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
198 goto found; 197 goto found;
199 entry = next; 198 entry = next;
200 } 199 }
201 if (ext2_xattr_cache_insert(bh)) 200 if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
202 ea_idebug(inode, "cache insert failed"); 201 ea_idebug(inode, "cache insert failed");
203 error = -ENODATA; 202 error = -ENODATA;
204 goto cleanup; 203 goto cleanup;
@@ -211,7 +210,7 @@ found:
211 le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) 210 le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
212 goto bad_block; 211 goto bad_block;
213 212
214 if (ext2_xattr_cache_insert(bh)) 213 if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
215 ea_idebug(inode, "cache insert failed"); 214 ea_idebug(inode, "cache insert failed");
216 if (buffer) { 215 if (buffer) {
217 error = -ERANGE; 216 error = -ERANGE;
@@ -249,6 +248,7 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
249 char *end; 248 char *end;
250 size_t rest = buffer_size; 249 size_t rest = buffer_size;
251 int error; 250 int error;
251 struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
252 252
253 ea_idebug(inode, "buffer=%p, buffer_size=%ld", 253 ea_idebug(inode, "buffer=%p, buffer_size=%ld",
254 buffer, (long)buffer_size); 254 buffer, (long)buffer_size);
@@ -283,7 +283,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
283 goto bad_block; 283 goto bad_block;
284 entry = next; 284 entry = next;
285 } 285 }
286 if (ext2_xattr_cache_insert(bh)) 286 if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
287 ea_idebug(inode, "cache insert failed"); 287 ea_idebug(inode, "cache insert failed");
288 288
289 /* list the attribute names */ 289 /* list the attribute names */
@@ -480,22 +480,23 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
480 /* Here we know that we can set the new attribute. */ 480 /* Here we know that we can set the new attribute. */
481 481
482 if (header) { 482 if (header) {
483 struct mb_cache_entry *ce;
484
485 /* assert(header == HDR(bh)); */ 483 /* assert(header == HDR(bh)); */
486 ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev,
487 bh->b_blocknr);
488 lock_buffer(bh); 484 lock_buffer(bh);
489 if (header->h_refcount == cpu_to_le32(1)) { 485 if (header->h_refcount == cpu_to_le32(1)) {
486 __u32 hash = le32_to_cpu(header->h_hash);
487
490 ea_bdebug(bh, "modifying in-place"); 488 ea_bdebug(bh, "modifying in-place");
491 if (ce) 489 /*
492 mb_cache_entry_free(ce); 490 * This must happen under buffer lock for
491 * ext2_xattr_set2() to reliably detect modified block
492 */
493 mb2_cache_entry_delete_block(EXT2_SB(sb)->s_mb_cache,
494 hash, bh->b_blocknr);
495
493 /* keep the buffer locked while modifying it. */ 496 /* keep the buffer locked while modifying it. */
494 } else { 497 } else {
495 int offset; 498 int offset;
496 499
497 if (ce)
498 mb_cache_entry_release(ce);
499 unlock_buffer(bh); 500 unlock_buffer(bh);
500 ea_bdebug(bh, "cloning"); 501 ea_bdebug(bh, "cloning");
501 header = kmalloc(bh->b_size, GFP_KERNEL); 502 header = kmalloc(bh->b_size, GFP_KERNEL);
@@ -623,6 +624,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
623 struct super_block *sb = inode->i_sb; 624 struct super_block *sb = inode->i_sb;
624 struct buffer_head *new_bh = NULL; 625 struct buffer_head *new_bh = NULL;
625 int error; 626 int error;
627 struct mb2_cache *ext2_mb_cache = EXT2_SB(sb)->s_mb_cache;
626 628
627 if (header) { 629 if (header) {
628 new_bh = ext2_xattr_cache_find(inode, header); 630 new_bh = ext2_xattr_cache_find(inode, header);
@@ -650,7 +652,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
650 don't need to change the reference count. */ 652 don't need to change the reference count. */
651 new_bh = old_bh; 653 new_bh = old_bh;
652 get_bh(new_bh); 654 get_bh(new_bh);
653 ext2_xattr_cache_insert(new_bh); 655 ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
654 } else { 656 } else {
655 /* We need to allocate a new block */ 657 /* We need to allocate a new block */
656 ext2_fsblk_t goal = ext2_group_first_block_no(sb, 658 ext2_fsblk_t goal = ext2_group_first_block_no(sb,
@@ -671,7 +673,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
671 memcpy(new_bh->b_data, header, new_bh->b_size); 673 memcpy(new_bh->b_data, header, new_bh->b_size);
672 set_buffer_uptodate(new_bh); 674 set_buffer_uptodate(new_bh);
673 unlock_buffer(new_bh); 675 unlock_buffer(new_bh);
674 ext2_xattr_cache_insert(new_bh); 676 ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
675 677
676 ext2_xattr_update_super_block(sb); 678 ext2_xattr_update_super_block(sb);
677 } 679 }
@@ -704,19 +706,21 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
704 706
705 error = 0; 707 error = 0;
706 if (old_bh && old_bh != new_bh) { 708 if (old_bh && old_bh != new_bh) {
707 struct mb_cache_entry *ce;
708
709 /* 709 /*
710 * If there was an old block and we are no longer using it, 710 * If there was an old block and we are no longer using it,
711 * release the old block. 711 * release the old block.
712 */ 712 */
713 ce = mb_cache_entry_get(ext2_xattr_cache, old_bh->b_bdev,
714 old_bh->b_blocknr);
715 lock_buffer(old_bh); 713 lock_buffer(old_bh);
716 if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { 714 if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
715 __u32 hash = le32_to_cpu(HDR(old_bh)->h_hash);
716
717 /*
718 * This must happen under buffer lock for
719 * ext2_xattr_set2() to reliably detect freed block
720 */
721 mb2_cache_entry_delete_block(ext2_mb_cache,
722 hash, old_bh->b_blocknr);
717 /* Free the old block. */ 723 /* Free the old block. */
718 if (ce)
719 mb_cache_entry_free(ce);
720 ea_bdebug(old_bh, "freeing"); 724 ea_bdebug(old_bh, "freeing");
721 ext2_free_blocks(inode, old_bh->b_blocknr, 1); 725 ext2_free_blocks(inode, old_bh->b_blocknr, 1);
722 mark_inode_dirty(inode); 726 mark_inode_dirty(inode);
@@ -727,8 +731,6 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
727 } else { 731 } else {
728 /* Decrement the refcount only. */ 732 /* Decrement the refcount only. */
729 le32_add_cpu(&HDR(old_bh)->h_refcount, -1); 733 le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
730 if (ce)
731 mb_cache_entry_release(ce);
732 dquot_free_block_nodirty(inode, 1); 734 dquot_free_block_nodirty(inode, 1);
733 mark_inode_dirty(inode); 735 mark_inode_dirty(inode);
734 mark_buffer_dirty(old_bh); 736 mark_buffer_dirty(old_bh);
@@ -754,7 +756,6 @@ void
754ext2_xattr_delete_inode(struct inode *inode) 756ext2_xattr_delete_inode(struct inode *inode)
755{ 757{
756 struct buffer_head *bh = NULL; 758 struct buffer_head *bh = NULL;
757 struct mb_cache_entry *ce;
758 759
759 down_write(&EXT2_I(inode)->xattr_sem); 760 down_write(&EXT2_I(inode)->xattr_sem);
760 if (!EXT2_I(inode)->i_file_acl) 761 if (!EXT2_I(inode)->i_file_acl)
@@ -774,19 +775,22 @@ ext2_xattr_delete_inode(struct inode *inode)
774 EXT2_I(inode)->i_file_acl); 775 EXT2_I(inode)->i_file_acl);
775 goto cleanup; 776 goto cleanup;
776 } 777 }
777 ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr);
778 lock_buffer(bh); 778 lock_buffer(bh);
779 if (HDR(bh)->h_refcount == cpu_to_le32(1)) { 779 if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
780 if (ce) 780 __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
781 mb_cache_entry_free(ce); 781
782 /*
783 * This must happen under buffer lock for ext2_xattr_set2() to
784 * reliably detect freed block
785 */
786 mb2_cache_entry_delete_block(EXT2_SB(inode->i_sb)->s_mb_cache,
787 hash, bh->b_blocknr);
782 ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); 788 ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
783 get_bh(bh); 789 get_bh(bh);
784 bforget(bh); 790 bforget(bh);
785 unlock_buffer(bh); 791 unlock_buffer(bh);
786 } else { 792 } else {
787 le32_add_cpu(&HDR(bh)->h_refcount, -1); 793 le32_add_cpu(&HDR(bh)->h_refcount, -1);
788 if (ce)
789 mb_cache_entry_release(ce);
790 ea_bdebug(bh, "refcount now=%d", 794 ea_bdebug(bh, "refcount now=%d",
791 le32_to_cpu(HDR(bh)->h_refcount)); 795 le32_to_cpu(HDR(bh)->h_refcount));
792 unlock_buffer(bh); 796 unlock_buffer(bh);
@@ -803,18 +807,6 @@ cleanup:
803} 807}
804 808
805/* 809/*
806 * ext2_xattr_put_super()
807 *
808 * This is called when a file system is unmounted.
809 */
810void
811ext2_xattr_put_super(struct super_block *sb)
812{
813 mb_cache_shrink(sb->s_bdev);
814}
815
816
817/*
818 * ext2_xattr_cache_insert() 810 * ext2_xattr_cache_insert()
819 * 811 *
820 * Create a new entry in the extended attribute cache, and insert 812 * Create a new entry in the extended attribute cache, and insert
@@ -823,28 +815,20 @@ ext2_xattr_put_super(struct super_block *sb)
823 * Returns 0, or a negative error number on failure. 815 * Returns 0, or a negative error number on failure.
824 */ 816 */
825static int 817static int
826ext2_xattr_cache_insert(struct buffer_head *bh) 818ext2_xattr_cache_insert(struct mb2_cache *cache, struct buffer_head *bh)
827{ 819{
828 __u32 hash = le32_to_cpu(HDR(bh)->h_hash); 820 __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
829 struct mb_cache_entry *ce;
830 int error; 821 int error;
831 822
832 ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS); 823 error = mb2_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr);
833 if (!ce)
834 return -ENOMEM;
835 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
836 if (error) { 824 if (error) {
837 mb_cache_entry_free(ce);
838 if (error == -EBUSY) { 825 if (error == -EBUSY) {
839 ea_bdebug(bh, "already in cache (%d cache entries)", 826 ea_bdebug(bh, "already in cache (%d cache entries)",
840 atomic_read(&ext2_xattr_cache->c_entry_count)); 827 atomic_read(&ext2_xattr_cache->c_entry_count));
841 error = 0; 828 error = 0;
842 } 829 }
843 } else { 830 } else
844 ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, 831 ea_bdebug(bh, "inserting [%x]", (int)hash);
845 atomic_read(&ext2_xattr_cache->c_entry_count));
846 mb_cache_entry_release(ce);
847 }
848 return error; 832 return error;
849} 833}
850 834
@@ -900,23 +884,17 @@ static struct buffer_head *
900ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) 884ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
901{ 885{
902 __u32 hash = le32_to_cpu(header->h_hash); 886 __u32 hash = le32_to_cpu(header->h_hash);
903 struct mb_cache_entry *ce; 887 struct mb2_cache_entry *ce;
888 struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
904 889
905 if (!header->h_hash) 890 if (!header->h_hash)
906 return NULL; /* never share */ 891 return NULL; /* never share */
907 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 892 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
908again: 893again:
909 ce = mb_cache_entry_find_first(ext2_xattr_cache, inode->i_sb->s_bdev, 894 ce = mb2_cache_entry_find_first(ext2_mb_cache, hash);
910 hash);
911 while (ce) { 895 while (ce) {
912 struct buffer_head *bh; 896 struct buffer_head *bh;
913 897
914 if (IS_ERR(ce)) {
915 if (PTR_ERR(ce) == -EAGAIN)
916 goto again;
917 break;
918 }
919
920 bh = sb_bread(inode->i_sb, ce->e_block); 898 bh = sb_bread(inode->i_sb, ce->e_block);
921 if (!bh) { 899 if (!bh) {
922 ext2_error(inode->i_sb, "ext2_xattr_cache_find", 900 ext2_error(inode->i_sb, "ext2_xattr_cache_find",
@@ -924,7 +902,21 @@ again:
924 inode->i_ino, (unsigned long) ce->e_block); 902 inode->i_ino, (unsigned long) ce->e_block);
925 } else { 903 } else {
926 lock_buffer(bh); 904 lock_buffer(bh);
927 if (le32_to_cpu(HDR(bh)->h_refcount) > 905 /*
906 * We have to be careful about races with freeing or
907 * rehashing of xattr block. Once we hold buffer lock
908 * xattr block's state is stable so we can check
909 * whether the block got freed / rehashed or not.
910 * Since we unhash mbcache entry under buffer lock when
911 * freeing / rehashing xattr block, checking whether
912 * entry is still hashed is reliable.
913 */
914 if (hlist_bl_unhashed(&ce->e_hash_list)) {
915 mb2_cache_entry_put(ext2_mb_cache, ce);
916 unlock_buffer(bh);
917 brelse(bh);
918 goto again;
919 } else if (le32_to_cpu(HDR(bh)->h_refcount) >
928 EXT2_XATTR_REFCOUNT_MAX) { 920 EXT2_XATTR_REFCOUNT_MAX) {
929 ea_idebug(inode, "block %ld refcount %d>%d", 921 ea_idebug(inode, "block %ld refcount %d>%d",
930 (unsigned long) ce->e_block, 922 (unsigned long) ce->e_block,
@@ -933,13 +925,14 @@ again:
933 } else if (!ext2_xattr_cmp(header, HDR(bh))) { 925 } else if (!ext2_xattr_cmp(header, HDR(bh))) {
934 ea_bdebug(bh, "b_count=%d", 926 ea_bdebug(bh, "b_count=%d",
935 atomic_read(&(bh->b_count))); 927 atomic_read(&(bh->b_count)));
936 mb_cache_entry_release(ce); 928 mb2_cache_entry_touch(ext2_mb_cache, ce);
929 mb2_cache_entry_put(ext2_mb_cache, ce);
937 return bh; 930 return bh;
938 } 931 }
939 unlock_buffer(bh); 932 unlock_buffer(bh);
940 brelse(bh); 933 brelse(bh);
941 } 934 }
942 ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash); 935 ce = mb2_cache_entry_find_next(ext2_mb_cache, ce);
943 } 936 }
944 return NULL; 937 return NULL;
945} 938}
@@ -1012,17 +1005,15 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header,
1012 1005
1013#undef BLOCK_HASH_SHIFT 1006#undef BLOCK_HASH_SHIFT
1014 1007
1015int __init 1008#define HASH_BUCKET_BITS 10
1016init_ext2_xattr(void) 1009
1010struct mb2_cache *ext2_xattr_create_cache(void)
1017{ 1011{
1018 ext2_xattr_cache = mb_cache_create("ext2_xattr", 6); 1012 return mb2_cache_create(HASH_BUCKET_BITS);
1019 if (!ext2_xattr_cache)
1020 return -ENOMEM;
1021 return 0;
1022} 1013}
1023 1014
1024void 1015void ext2_xattr_destroy_cache(struct mb2_cache *cache)
1025exit_ext2_xattr(void)
1026{ 1016{
1027 mb_cache_destroy(ext2_xattr_cache); 1017 if (cache)
1018 mb2_cache_destroy(cache);
1028} 1019}
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index 60edf298644e..6ea38aa9563a 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -53,6 +53,8 @@ struct ext2_xattr_entry {
53#define EXT2_XATTR_SIZE(size) \ 53#define EXT2_XATTR_SIZE(size) \
54 (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND) 54 (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND)
55 55
56struct mb2_cache;
57
56# ifdef CONFIG_EXT2_FS_XATTR 58# ifdef CONFIG_EXT2_FS_XATTR
57 59
58extern const struct xattr_handler ext2_xattr_user_handler; 60extern const struct xattr_handler ext2_xattr_user_handler;
@@ -65,10 +67,9 @@ extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t);
65extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); 67extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
66 68
67extern void ext2_xattr_delete_inode(struct inode *); 69extern void ext2_xattr_delete_inode(struct inode *);
68extern void ext2_xattr_put_super(struct super_block *);
69 70
70extern int init_ext2_xattr(void); 71extern struct mb2_cache *ext2_xattr_create_cache(void);
71extern void exit_ext2_xattr(void); 72extern void ext2_xattr_destroy_cache(struct mb2_cache *cache);
72 73
73extern const struct xattr_handler *ext2_xattr_handlers[]; 74extern const struct xattr_handler *ext2_xattr_handlers[];
74 75
@@ -93,19 +94,7 @@ ext2_xattr_delete_inode(struct inode *inode)
93{ 94{
94} 95}
95 96
96static inline void 97static inline void ext2_xattr_destroy_cache(struct mb2_cache *cache)
97ext2_xattr_put_super(struct super_block *sb)
98{
99}
100
101static inline int
102init_ext2_xattr(void)
103{
104 return 0;
105}
106
107static inline void
108exit_ext2_xattr(void)
109{ 98{
110} 99}
111 100
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index b46e9fc64196..3c8293215603 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -106,6 +106,7 @@ config EXT4_ENCRYPTION
106 select CRYPTO_ECB 106 select CRYPTO_ECB
107 select CRYPTO_XTS 107 select CRYPTO_XTS
108 select CRYPTO_CTS 108 select CRYPTO_CTS
109 select CRYPTO_HEH
109 select CRYPTO_CTR 110 select CRYPTO_CTR
110 select CRYPTO_SHA256 111 select CRYPTO_SHA256
111 select KEYS 112 select KEYS
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index 1a0835073663..f240cef8b326 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -34,6 +34,7 @@
34#include <linux/random.h> 34#include <linux/random.h>
35#include <linux/scatterlist.h> 35#include <linux/scatterlist.h>
36#include <linux/spinlock_types.h> 36#include <linux/spinlock_types.h>
37#include <linux/namei.h>
37 38
38#include "ext4_extents.h" 39#include "ext4_extents.h"
39#include "xattr.h" 40#include "xattr.h"
@@ -93,7 +94,8 @@ void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx)
93 * Return: An allocated and initialized encryption context on success; error 94 * Return: An allocated and initialized encryption context on success; error
94 * value or NULL otherwise. 95 * value or NULL otherwise.
95 */ 96 */
96struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode) 97struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode,
98 gfp_t gfp_flags)
97{ 99{
98 struct ext4_crypto_ctx *ctx = NULL; 100 struct ext4_crypto_ctx *ctx = NULL;
99 int res = 0; 101 int res = 0;
@@ -120,7 +122,7 @@ struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode)
120 list_del(&ctx->free_list); 122 list_del(&ctx->free_list);
121 spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags); 123 spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags);
122 if (!ctx) { 124 if (!ctx) {
123 ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, GFP_NOFS); 125 ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, gfp_flags);
124 if (!ctx) { 126 if (!ctx) {
125 res = -ENOMEM; 127 res = -ENOMEM;
126 goto out; 128 goto out;
@@ -257,7 +259,8 @@ static int ext4_page_crypto(struct inode *inode,
257 ext4_direction_t rw, 259 ext4_direction_t rw,
258 pgoff_t index, 260 pgoff_t index,
259 struct page *src_page, 261 struct page *src_page,
260 struct page *dest_page) 262 struct page *dest_page,
263 gfp_t gfp_flags)
261 264
262{ 265{
263 u8 xts_tweak[EXT4_XTS_TWEAK_SIZE]; 266 u8 xts_tweak[EXT4_XTS_TWEAK_SIZE];
@@ -268,7 +271,7 @@ static int ext4_page_crypto(struct inode *inode,
268 struct crypto_ablkcipher *tfm = ci->ci_ctfm; 271 struct crypto_ablkcipher *tfm = ci->ci_ctfm;
269 int res = 0; 272 int res = 0;
270 273
271 req = ablkcipher_request_alloc(tfm, GFP_NOFS); 274 req = ablkcipher_request_alloc(tfm, gfp_flags);
272 if (!req) { 275 if (!req) {
273 printk_ratelimited(KERN_ERR 276 printk_ratelimited(KERN_ERR
274 "%s: crypto_request_alloc() failed\n", 277 "%s: crypto_request_alloc() failed\n",
@@ -309,9 +312,10 @@ static int ext4_page_crypto(struct inode *inode,
309 return 0; 312 return 0;
310} 313}
311 314
312static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx) 315static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx,
316 gfp_t gfp_flags)
313{ 317{
314 ctx->w.bounce_page = mempool_alloc(ext4_bounce_page_pool, GFP_NOWAIT); 318 ctx->w.bounce_page = mempool_alloc(ext4_bounce_page_pool, gfp_flags);
315 if (ctx->w.bounce_page == NULL) 319 if (ctx->w.bounce_page == NULL)
316 return ERR_PTR(-ENOMEM); 320 return ERR_PTR(-ENOMEM);
317 ctx->flags |= EXT4_WRITE_PATH_FL; 321 ctx->flags |= EXT4_WRITE_PATH_FL;
@@ -334,7 +338,8 @@ static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx)
334 * error value or NULL. 338 * error value or NULL.
335 */ 339 */
336struct page *ext4_encrypt(struct inode *inode, 340struct page *ext4_encrypt(struct inode *inode,
337 struct page *plaintext_page) 341 struct page *plaintext_page,
342 gfp_t gfp_flags)
338{ 343{
339 struct ext4_crypto_ctx *ctx; 344 struct ext4_crypto_ctx *ctx;
340 struct page *ciphertext_page = NULL; 345 struct page *ciphertext_page = NULL;
@@ -342,17 +347,17 @@ struct page *ext4_encrypt(struct inode *inode,
342 347
343 BUG_ON(!PageLocked(plaintext_page)); 348 BUG_ON(!PageLocked(plaintext_page));
344 349
345 ctx = ext4_get_crypto_ctx(inode); 350 ctx = ext4_get_crypto_ctx(inode, gfp_flags);
346 if (IS_ERR(ctx)) 351 if (IS_ERR(ctx))
347 return (struct page *) ctx; 352 return (struct page *) ctx;
348 353
349 /* The encryption operation will require a bounce page. */ 354 /* The encryption operation will require a bounce page. */
350 ciphertext_page = alloc_bounce_page(ctx); 355 ciphertext_page = alloc_bounce_page(ctx, gfp_flags);
351 if (IS_ERR(ciphertext_page)) 356 if (IS_ERR(ciphertext_page))
352 goto errout; 357 goto errout;
353 ctx->w.control_page = plaintext_page; 358 ctx->w.control_page = plaintext_page;
354 err = ext4_page_crypto(inode, EXT4_ENCRYPT, plaintext_page->index, 359 err = ext4_page_crypto(inode, EXT4_ENCRYPT, plaintext_page->index,
355 plaintext_page, ciphertext_page); 360 plaintext_page, ciphertext_page, gfp_flags);
356 if (err) { 361 if (err) {
357 ciphertext_page = ERR_PTR(err); 362 ciphertext_page = ERR_PTR(err);
358 errout: 363 errout:
@@ -380,8 +385,8 @@ int ext4_decrypt(struct page *page)
380{ 385{
381 BUG_ON(!PageLocked(page)); 386 BUG_ON(!PageLocked(page));
382 387
383 return ext4_page_crypto(page->mapping->host, 388 return ext4_page_crypto(page->mapping->host, EXT4_DECRYPT,
384 EXT4_DECRYPT, page->index, page, page); 389 page->index, page, page, GFP_NOFS);
385} 390}
386 391
387int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) 392int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
@@ -402,11 +407,11 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
402 407
403 BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE); 408 BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE);
404 409
405 ctx = ext4_get_crypto_ctx(inode); 410 ctx = ext4_get_crypto_ctx(inode, GFP_NOFS);
406 if (IS_ERR(ctx)) 411 if (IS_ERR(ctx))
407 return PTR_ERR(ctx); 412 return PTR_ERR(ctx);
408 413
409 ciphertext_page = alloc_bounce_page(ctx); 414 ciphertext_page = alloc_bounce_page(ctx, GFP_NOWAIT);
410 if (IS_ERR(ciphertext_page)) { 415 if (IS_ERR(ciphertext_page)) {
411 err = PTR_ERR(ciphertext_page); 416 err = PTR_ERR(ciphertext_page);
412 goto errout; 417 goto errout;
@@ -414,11 +419,12 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
414 419
415 while (len--) { 420 while (len--) {
416 err = ext4_page_crypto(inode, EXT4_ENCRYPT, lblk, 421 err = ext4_page_crypto(inode, EXT4_ENCRYPT, lblk,
417 ZERO_PAGE(0), ciphertext_page); 422 ZERO_PAGE(0), ciphertext_page,
423 GFP_NOFS);
418 if (err) 424 if (err)
419 goto errout; 425 goto errout;
420 426
421 bio = bio_alloc(GFP_KERNEL, 1); 427 bio = bio_alloc(GFP_NOWAIT, 1);
422 if (!bio) { 428 if (!bio) {
423 err = -ENOMEM; 429 err = -ENOMEM;
424 goto errout; 430 goto errout;
@@ -469,3 +475,61 @@ uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size)
469 return size; 475 return size;
470 return 0; 476 return 0;
471} 477}
478
479/*
480 * Validate dentries for encrypted directories to make sure we aren't
481 * potentially caching stale data after a key has been added or
482 * removed.
483 */
484static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags)
485{
486 struct dentry *dir;
487 struct ext4_crypt_info *ci;
488 int dir_has_key, cached_with_key;
489
490 if (flags & LOOKUP_RCU)
491 return -ECHILD;
492
493 dir = dget_parent(dentry);
494 if (!ext4_encrypted_inode(d_inode(dir))) {
495 dput(dir);
496 return 0;
497 }
498 ci = EXT4_I(d_inode(dir))->i_crypt_info;
499
500 /* this should eventually be an flag in d_flags */
501 cached_with_key = dentry->d_fsdata != NULL;
502 dir_has_key = (ci != NULL);
503 dput(dir);
504
505 /*
506 * If the dentry was cached without the key, and it is a
507 * negative dentry, it might be a valid name. We can't check
508 * if the key has since been made available due to locking
509 * reasons, so we fail the validation so ext4_lookup() can do
510 * this check.
511 *
512 * We also fail the validation if the dentry was created with
513 * the key present, but we no longer have the key, or vice versa.
514 */
515 if ((!cached_with_key && d_is_negative(dentry)) ||
516 (!cached_with_key && dir_has_key) ||
517 (cached_with_key && !dir_has_key)) {
518#if 0 /* Revalidation debug */
519 char buf[80];
520 char *cp = simple_dname(dentry, buf, sizeof(buf));
521
522 if (IS_ERR(cp))
523 cp = (char *) "???";
524 pr_err("revalidate: %s %p %d %d %d\n", cp, dentry->d_fsdata,
525 cached_with_key, d_is_negative(dentry),
526 dir_has_key);
527#endif
528 return 0;
529 }
530 return 1;
531}
532
533const struct dentry_operations ext4_encrypted_d_ops = {
534 .d_revalidate = ext4_d_revalidate,
535};
diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
index 2fbef8a14760..026716bdbbfc 100644
--- a/fs/ext4/crypto_fname.c
+++ b/fs/ext4/crypto_fname.c
@@ -44,7 +44,8 @@ static void ext4_dir_crypt_complete(struct crypto_async_request *req, int res)
44 44
45bool ext4_valid_filenames_enc_mode(uint32_t mode) 45bool ext4_valid_filenames_enc_mode(uint32_t mode)
46{ 46{
47 return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS); 47 return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS ||
48 mode == EXT4_ENCRYPTION_MODE_AES_256_HEH);
48} 49}
49 50
50static unsigned max_name_len(struct inode *inode) 51static unsigned max_name_len(struct inode *inode)
@@ -343,7 +344,7 @@ int _ext4_fname_disk_to_usr(struct inode *inode,
343 memcpy(buf+4, &hinfo->minor_hash, 4); 344 memcpy(buf+4, &hinfo->minor_hash, 4);
344 } else 345 } else
345 memset(buf, 0, 8); 346 memset(buf, 0, 8);
346 memcpy(buf + 8, iname->name + iname->len - 16, 16); 347 memcpy(buf + 8, iname->name + ((iname->len - 17) & ~15), 16);
347 oname->name[0] = '_'; 348 oname->name[0] = '_';
348 ret = digest_encode(buf, 24, oname->name+1); 349 ret = digest_encode(buf, 24, oname->name+1);
349 oname->len = ret + 1; 350 oname->len = ret + 1;
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index 9a16d1e75a49..22096e31a720 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -29,16 +29,16 @@ static void derive_crypt_complete(struct crypto_async_request *req, int rc)
29} 29}
30 30
31/** 31/**
32 * ext4_derive_key_aes() - Derive a key using AES-128-ECB 32 * ext4_derive_key_v1() - Derive a key using AES-128-ECB
33 * @deriving_key: Encryption key used for derivation. 33 * @deriving_key: Encryption key used for derivation.
34 * @source_key: Source key to which to apply derivation. 34 * @source_key: Source key to which to apply derivation.
35 * @derived_key: Derived key. 35 * @derived_key: Derived key.
36 * 36 *
37 * Return: Zero on success; non-zero otherwise. 37 * Return: 0 on success, -errno on failure
38 */ 38 */
39static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE], 39static int ext4_derive_key_v1(const char deriving_key[EXT4_AES_128_ECB_KEY_SIZE],
40 char source_key[EXT4_AES_256_XTS_KEY_SIZE], 40 const char source_key[EXT4_AES_256_XTS_KEY_SIZE],
41 char derived_key[EXT4_AES_256_XTS_KEY_SIZE]) 41 char derived_key[EXT4_AES_256_XTS_KEY_SIZE])
42{ 42{
43 int res = 0; 43 int res = 0;
44 struct ablkcipher_request *req = NULL; 44 struct ablkcipher_request *req = NULL;
@@ -83,13 +83,96 @@ out:
83 return res; 83 return res;
84} 84}
85 85
86/**
87 * ext4_derive_key_v2() - Derive a key non-reversibly
88 * @nonce: the nonce associated with the file
89 * @master_key: the master key referenced by the file
90 * @derived_key: (output) the resulting derived key
91 *
92 * This function computes the following:
93 * derived_key[0:127] = AES-256-ENCRYPT(master_key[0:255], nonce)
94 * derived_key[128:255] = AES-256-ENCRYPT(master_key[0:255], nonce ^ 0x01)
95 * derived_key[256:383] = AES-256-ENCRYPT(master_key[256:511], nonce)
96 * derived_key[384:511] = AES-256-ENCRYPT(master_key[256:511], nonce ^ 0x01)
97 *
98 * 'nonce ^ 0x01' denotes flipping the low order bit of the last byte.
99 *
100 * Unlike the v1 algorithm, the v2 algorithm is "non-reversible", meaning that
101 * compromising a derived key does not also compromise the master key.
102 *
103 * Return: 0 on success, -errno on failure
104 */
105static int ext4_derive_key_v2(const char nonce[EXT4_KEY_DERIVATION_NONCE_SIZE],
106 const char master_key[EXT4_MAX_KEY_SIZE],
107 char derived_key[EXT4_MAX_KEY_SIZE])
108{
109 const int noncelen = EXT4_KEY_DERIVATION_NONCE_SIZE;
110 struct crypto_cipher *tfm;
111 int err;
112 int i;
113
114 /*
115 * Since we only use each transform for a small number of encryptions,
116 * requesting just "aes" turns out to be significantly faster than
117 * "ecb(aes)", by about a factor of two.
118 */
119 tfm = crypto_alloc_cipher("aes", 0, 0);
120 if (IS_ERR(tfm))
121 return PTR_ERR(tfm);
122
123 BUILD_BUG_ON(4 * EXT4_KEY_DERIVATION_NONCE_SIZE != EXT4_MAX_KEY_SIZE);
124 BUILD_BUG_ON(2 * EXT4_AES_256_ECB_KEY_SIZE != EXT4_MAX_KEY_SIZE);
125 for (i = 0; i < 2; i++) {
126 memcpy(derived_key, nonce, noncelen);
127 memcpy(derived_key + noncelen, nonce, noncelen);
128 derived_key[2 * noncelen - 1] ^= 0x01;
129 err = crypto_cipher_setkey(tfm, master_key,
130 EXT4_AES_256_ECB_KEY_SIZE);
131 if (err)
132 break;
133 crypto_cipher_encrypt_one(tfm, derived_key, derived_key);
134 crypto_cipher_encrypt_one(tfm, derived_key + noncelen,
135 derived_key + noncelen);
136 master_key += EXT4_AES_256_ECB_KEY_SIZE;
137 derived_key += 2 * noncelen;
138 }
139 crypto_free_cipher(tfm);
140 return err;
141}
142
143/**
144 * ext4_derive_key() - Derive a per-file key from a nonce and master key
145 * @ctx: the encryption context associated with the file
146 * @master_key: the master key referenced by the file
147 * @derived_key: (output) the resulting derived key
148 *
149 * Return: 0 on success, -errno on failure
150 */
151static int ext4_derive_key(const struct ext4_encryption_context *ctx,
152 const char master_key[EXT4_MAX_KEY_SIZE],
153 char derived_key[EXT4_MAX_KEY_SIZE])
154{
155 BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE != EXT4_KEY_DERIVATION_NONCE_SIZE);
156 BUILD_BUG_ON(EXT4_AES_256_XTS_KEY_SIZE != EXT4_MAX_KEY_SIZE);
157
158 /*
159 * Although the key derivation algorithm is logically independent of the
160 * choice of encryption modes, in this kernel it is bundled with HEH
161 * encryption of filenames, which is another crypto improvement that
162 * requires an on-disk format change and requires userspace to specify
163 * different encryption policies.
164 */
165 if (ctx->filenames_encryption_mode == EXT4_ENCRYPTION_MODE_AES_256_HEH)
166 return ext4_derive_key_v2(ctx->nonce, master_key, derived_key);
167 else
168 return ext4_derive_key_v1(ctx->nonce, master_key, derived_key);
169}
170
86void ext4_free_crypt_info(struct ext4_crypt_info *ci) 171void ext4_free_crypt_info(struct ext4_crypt_info *ci)
87{ 172{
88 if (!ci) 173 if (!ci)
89 return; 174 return;
90 175
91 if (ci->ci_keyring_key)
92 key_put(ci->ci_keyring_key);
93 crypto_free_ablkcipher(ci->ci_ctfm); 176 crypto_free_ablkcipher(ci->ci_ctfm);
94 kmem_cache_free(ext4_crypt_info_cachep, ci); 177 kmem_cache_free(ext4_crypt_info_cachep, ci);
95} 178}
@@ -111,7 +194,7 @@ void ext4_free_encryption_info(struct inode *inode,
111 ext4_free_crypt_info(ci); 194 ext4_free_crypt_info(ci);
112} 195}
113 196
114int _ext4_get_encryption_info(struct inode *inode) 197int ext4_get_encryption_info(struct inode *inode)
115{ 198{
116 struct ext4_inode_info *ei = EXT4_I(inode); 199 struct ext4_inode_info *ei = EXT4_I(inode);
117 struct ext4_crypt_info *crypt_info; 200 struct ext4_crypt_info *crypt_info;
@@ -128,22 +211,15 @@ int _ext4_get_encryption_info(struct inode *inode)
128 char mode; 211 char mode;
129 int res; 212 int res;
130 213
214 if (ei->i_crypt_info)
215 return 0;
216
131 if (!ext4_read_workqueue) { 217 if (!ext4_read_workqueue) {
132 res = ext4_init_crypto(); 218 res = ext4_init_crypto();
133 if (res) 219 if (res)
134 return res; 220 return res;
135 } 221 }
136 222
137retry:
138 crypt_info = ACCESS_ONCE(ei->i_crypt_info);
139 if (crypt_info) {
140 if (!crypt_info->ci_keyring_key ||
141 key_validate(crypt_info->ci_keyring_key) == 0)
142 return 0;
143 ext4_free_encryption_info(inode, crypt_info);
144 goto retry;
145 }
146
147 res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, 223 res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
148 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, 224 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
149 &ctx, sizeof(ctx)); 225 &ctx, sizeof(ctx));
@@ -166,7 +242,6 @@ retry:
166 crypt_info->ci_data_mode = ctx.contents_encryption_mode; 242 crypt_info->ci_data_mode = ctx.contents_encryption_mode;
167 crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; 243 crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
168 crypt_info->ci_ctfm = NULL; 244 crypt_info->ci_ctfm = NULL;
169 crypt_info->ci_keyring_key = NULL;
170 memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, 245 memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
171 sizeof(crypt_info->ci_master_key)); 246 sizeof(crypt_info->ci_master_key));
172 if (S_ISREG(inode->i_mode)) 247 if (S_ISREG(inode->i_mode))
@@ -182,6 +257,9 @@ retry:
182 case EXT4_ENCRYPTION_MODE_AES_256_CTS: 257 case EXT4_ENCRYPTION_MODE_AES_256_CTS:
183 cipher_str = "cts(cbc(aes))"; 258 cipher_str = "cts(cbc(aes))";
184 break; 259 break;
260 case EXT4_ENCRYPTION_MODE_AES_256_HEH:
261 cipher_str = "heh(aes)";
262 break;
185 default: 263 default:
186 printk_once(KERN_WARNING 264 printk_once(KERN_WARNING
187 "ext4: unsupported key mode %d (ino %u)\n", 265 "ext4: unsupported key mode %d (ino %u)\n",
@@ -206,7 +284,6 @@ retry:
206 keyring_key = NULL; 284 keyring_key = NULL;
207 goto out; 285 goto out;
208 } 286 }
209 crypt_info->ci_keyring_key = keyring_key;
210 if (keyring_key->type != &key_type_logon) { 287 if (keyring_key->type != &key_type_logon) {
211 printk_once(KERN_WARNING 288 printk_once(KERN_WARNING
212 "ext4: key type must be logon\n"); 289 "ext4: key type must be logon\n");
@@ -231,8 +308,7 @@ retry:
231 up_read(&keyring_key->sem); 308 up_read(&keyring_key->sem);
232 goto out; 309 goto out;
233 } 310 }
234 res = ext4_derive_key_aes(ctx.nonce, master_key->raw, 311 res = ext4_derive_key(&ctx, master_key->raw, raw_key);
235 raw_key);
236 up_read(&keyring_key->sem); 312 up_read(&keyring_key->sem);
237 if (res) 313 if (res)
238 goto out; 314 goto out;
@@ -253,16 +329,13 @@ got_key:
253 ext4_encryption_key_size(mode)); 329 ext4_encryption_key_size(mode));
254 if (res) 330 if (res)
255 goto out; 331 goto out;
256 memzero_explicit(raw_key, sizeof(raw_key));
257 if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) != NULL) {
258 ext4_free_crypt_info(crypt_info);
259 goto retry;
260 }
261 return 0;
262 332
333 if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) == NULL)
334 crypt_info = NULL;
263out: 335out:
264 if (res == -ENOKEY) 336 if (res == -ENOKEY)
265 res = 0; 337 res = 0;
338 key_put(keyring_key);
266 ext4_free_crypt_info(crypt_info); 339 ext4_free_crypt_info(crypt_info);
267 memzero_explicit(raw_key, sizeof(raw_key)); 340 memzero_explicit(raw_key, sizeof(raw_key));
268 return res; 341 return res;
diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c
index 8a9feb341f31..e4f4fc4e56ab 100644
--- a/fs/ext4/crypto_policy.c
+++ b/fs/ext4/crypto_policy.c
@@ -148,20 +148,38 @@ int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy)
148int ext4_is_child_context_consistent_with_parent(struct inode *parent, 148int ext4_is_child_context_consistent_with_parent(struct inode *parent,
149 struct inode *child) 149 struct inode *child)
150{ 150{
151 struct ext4_crypt_info *parent_ci, *child_ci; 151 const struct ext4_crypt_info *parent_ci, *child_ci;
152 struct ext4_encryption_context parent_ctx, child_ctx;
152 int res; 153 int res;
153 154
154 if ((parent == NULL) || (child == NULL)) { 155 /* No restrictions on file types which are never encrypted */
155 pr_err("parent %p child %p\n", parent, child); 156 if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) &&
156 WARN_ON(1); /* Should never happen */ 157 !S_ISLNK(child->i_mode))
157 return 0; 158 return 1;
158 } 159
159 /* no restrictions if the parent directory is not encrypted */ 160 /* No restrictions if the parent directory is unencrypted */
160 if (!ext4_encrypted_inode(parent)) 161 if (!ext4_encrypted_inode(parent))
161 return 1; 162 return 1;
162 /* if the child directory is not encrypted, this is always a problem */ 163
164 /* Encrypted directories must not contain unencrypted files */
163 if (!ext4_encrypted_inode(child)) 165 if (!ext4_encrypted_inode(child))
164 return 0; 166 return 0;
167
168 /*
169 * Both parent and child are encrypted, so verify they use the same
170 * encryption policy. Compare the fscrypt_info structs if the keys are
171 * available, otherwise retrieve and compare the fscrypt_contexts.
172 *
173 * Note that the fscrypt_context retrieval will be required frequently
174 * when accessing an encrypted directory tree without the key.
175 * Performance-wise this is not a big deal because we already don't
176 * really optimize for file access without the key (to the extent that
177 * such access is even possible), given that any attempted access
178 * already causes a fscrypt_context retrieval and keyring search.
179 *
180 * In any case, if an unexpected error occurs, fall back to "forbidden".
181 */
182
165 res = ext4_get_encryption_info(parent); 183 res = ext4_get_encryption_info(parent);
166 if (res) 184 if (res)
167 return 0; 185 return 0;
@@ -170,17 +188,35 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent,
170 return 0; 188 return 0;
171 parent_ci = EXT4_I(parent)->i_crypt_info; 189 parent_ci = EXT4_I(parent)->i_crypt_info;
172 child_ci = EXT4_I(child)->i_crypt_info; 190 child_ci = EXT4_I(child)->i_crypt_info;
173 if (!parent_ci && !child_ci) 191 if (parent_ci && child_ci) {
174 return 1; 192 return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key,
175 if (!parent_ci || !child_ci) 193 EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
194 (parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
195 (parent_ci->ci_filename_mode ==
196 child_ci->ci_filename_mode) &&
197 (parent_ci->ci_flags == child_ci->ci_flags);
198 }
199
200 res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION,
201 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
202 &parent_ctx, sizeof(parent_ctx));
203 if (res != sizeof(parent_ctx))
204 return 0;
205
206 res = ext4_xattr_get(child, EXT4_XATTR_INDEX_ENCRYPTION,
207 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
208 &child_ctx, sizeof(child_ctx));
209 if (res != sizeof(child_ctx))
176 return 0; 210 return 0;
177 211
178 return (memcmp(parent_ci->ci_master_key, 212 return memcmp(parent_ctx.master_key_descriptor,
179 child_ci->ci_master_key, 213 child_ctx.master_key_descriptor,
180 EXT4_KEY_DESCRIPTOR_SIZE) == 0 && 214 EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
181 (parent_ci->ci_data_mode == child_ci->ci_data_mode) && 215 (parent_ctx.contents_encryption_mode ==
182 (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) && 216 child_ctx.contents_encryption_mode) &&
183 (parent_ci->ci_flags == child_ci->ci_flags)); 217 (parent_ctx.filenames_encryption_mode ==
218 child_ctx.filenames_encryption_mode) &&
219 (parent_ctx.flags == child_ctx.flags);
184} 220}
185 221
186/** 222/**
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 1d1bca74f844..6d17f31a31d7 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -111,6 +111,12 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
111 int dir_has_error = 0; 111 int dir_has_error = 0;
112 struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; 112 struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
113 113
114 if (ext4_encrypted_inode(inode)) {
115 err = ext4_get_encryption_info(inode);
116 if (err && err != -ENOKEY)
117 return err;
118 }
119
114 if (is_dx_dir(inode)) { 120 if (is_dx_dir(inode)) {
115 err = ext4_dx_readdir(file, ctx); 121 err = ext4_dx_readdir(file, ctx);
116 if (err != ERR_BAD_DX_DIR) { 122 if (err != ERR_BAD_DX_DIR) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9f31991a5e05..6edacb849e48 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -589,6 +589,7 @@ enum {
589#define EXT4_ENCRYPTION_MODE_AES_256_GCM 2 589#define EXT4_ENCRYPTION_MODE_AES_256_GCM 2
590#define EXT4_ENCRYPTION_MODE_AES_256_CBC 3 590#define EXT4_ENCRYPTION_MODE_AES_256_CBC 3
591#define EXT4_ENCRYPTION_MODE_AES_256_CTS 4 591#define EXT4_ENCRYPTION_MODE_AES_256_CTS 4
592#define EXT4_ENCRYPTION_MODE_AES_256_HEH 126
592 593
593#include "ext4_crypto.h" 594#include "ext4_crypto.h"
594 595
@@ -1441,7 +1442,7 @@ struct ext4_sb_info {
1441 struct list_head s_es_list; /* List of inodes with reclaimable extents */ 1442 struct list_head s_es_list; /* List of inodes with reclaimable extents */
1442 long s_es_nr_inode; 1443 long s_es_nr_inode;
1443 struct ext4_es_stats s_es_stats; 1444 struct ext4_es_stats s_es_stats;
1444 struct mb_cache *s_mb_cache; 1445 struct mb2_cache *s_mb_cache;
1445 spinlock_t s_es_lock ____cacheline_aligned_in_smp; 1446 spinlock_t s_es_lock ____cacheline_aligned_in_smp;
1446 1447
1447 /* Ratelimit ext4 messages. */ 1448 /* Ratelimit ext4 messages. */
@@ -2261,13 +2262,16 @@ extern struct kmem_cache *ext4_crypt_info_cachep;
2261bool ext4_valid_contents_enc_mode(uint32_t mode); 2262bool ext4_valid_contents_enc_mode(uint32_t mode);
2262uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size); 2263uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size);
2263extern struct workqueue_struct *ext4_read_workqueue; 2264extern struct workqueue_struct *ext4_read_workqueue;
2264struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode); 2265struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode,
2266 gfp_t gfp_flags);
2265void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx); 2267void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx);
2266void ext4_restore_control_page(struct page *data_page); 2268void ext4_restore_control_page(struct page *data_page);
2267struct page *ext4_encrypt(struct inode *inode, 2269struct page *ext4_encrypt(struct inode *inode,
2268 struct page *plaintext_page); 2270 struct page *plaintext_page,
2271 gfp_t gfp_flags);
2269int ext4_decrypt(struct page *page); 2272int ext4_decrypt(struct page *page);
2270int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex); 2273int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex);
2274extern const struct dentry_operations ext4_encrypted_d_ops;
2271 2275
2272#ifdef CONFIG_EXT4_FS_ENCRYPTION 2276#ifdef CONFIG_EXT4_FS_ENCRYPTION
2273int ext4_init_crypto(void); 2277int ext4_init_crypto(void);
@@ -2330,23 +2334,11 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
2330/* crypto_key.c */ 2334/* crypto_key.c */
2331void ext4_free_crypt_info(struct ext4_crypt_info *ci); 2335void ext4_free_crypt_info(struct ext4_crypt_info *ci);
2332void ext4_free_encryption_info(struct inode *inode, struct ext4_crypt_info *ci); 2336void ext4_free_encryption_info(struct inode *inode, struct ext4_crypt_info *ci);
2333int _ext4_get_encryption_info(struct inode *inode);
2334 2337
2335#ifdef CONFIG_EXT4_FS_ENCRYPTION 2338#ifdef CONFIG_EXT4_FS_ENCRYPTION
2336int ext4_has_encryption_key(struct inode *inode); 2339int ext4_has_encryption_key(struct inode *inode);
2337 2340
2338static inline int ext4_get_encryption_info(struct inode *inode) 2341int ext4_get_encryption_info(struct inode *inode);
2339{
2340 struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
2341
2342 if (!ci ||
2343 (ci->ci_keyring_key &&
2344 (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
2345 (1 << KEY_FLAG_REVOKED) |
2346 (1 << KEY_FLAG_DEAD)))))
2347 return _ext4_get_encryption_info(inode);
2348 return 0;
2349}
2350 2342
2351static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode) 2343static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode)
2352{ 2344{
diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h
index ac7d4e813796..e52637d969db 100644
--- a/fs/ext4/ext4_crypto.h
+++ b/fs/ext4/ext4_crypto.h
@@ -58,8 +58,10 @@ struct ext4_encryption_context {
58#define EXT4_XTS_TWEAK_SIZE 16 58#define EXT4_XTS_TWEAK_SIZE 16
59#define EXT4_AES_128_ECB_KEY_SIZE 16 59#define EXT4_AES_128_ECB_KEY_SIZE 16
60#define EXT4_AES_256_GCM_KEY_SIZE 32 60#define EXT4_AES_256_GCM_KEY_SIZE 32
61#define EXT4_AES_256_ECB_KEY_SIZE 32
61#define EXT4_AES_256_CBC_KEY_SIZE 32 62#define EXT4_AES_256_CBC_KEY_SIZE 32
62#define EXT4_AES_256_CTS_KEY_SIZE 32 63#define EXT4_AES_256_CTS_KEY_SIZE 32
64#define EXT4_AES_256_HEH_KEY_SIZE 32
63#define EXT4_AES_256_XTS_KEY_SIZE 64 65#define EXT4_AES_256_XTS_KEY_SIZE 64
64#define EXT4_MAX_KEY_SIZE 64 66#define EXT4_MAX_KEY_SIZE 64
65 67
@@ -78,7 +80,6 @@ struct ext4_crypt_info {
78 char ci_filename_mode; 80 char ci_filename_mode;
79 char ci_flags; 81 char ci_flags;
80 struct crypto_ablkcipher *ci_ctfm; 82 struct crypto_ablkcipher *ci_ctfm;
81 struct key *ci_keyring_key;
82 char ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE]; 83 char ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE];
83}; 84};
84 85
@@ -121,6 +122,8 @@ static inline int ext4_encryption_key_size(int mode)
121 return EXT4_AES_256_CBC_KEY_SIZE; 122 return EXT4_AES_256_CBC_KEY_SIZE;
122 case EXT4_ENCRYPTION_MODE_AES_256_CTS: 123 case EXT4_ENCRYPTION_MODE_AES_256_CTS:
123 return EXT4_AES_256_CTS_KEY_SIZE; 124 return EXT4_AES_256_CTS_KEY_SIZE;
125 case EXT4_ENCRYPTION_MODE_AES_256_HEH:
126 return EXT4_AES_256_HEH_KEY_SIZE;
124 default: 127 default:
125 BUG(); 128 BUG();
126 } 129 }
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9da42ace762a..61d5bfc7318c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4902,6 +4902,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4902 4902
4903 /* Zero out partial block at the edges of the range */ 4903 /* Zero out partial block at the edges of the range */
4904 ret = ext4_zero_partial_blocks(handle, inode, offset, len); 4904 ret = ext4_zero_partial_blocks(handle, inode, offset, len);
4905 if (ret >= 0)
4906 ext4_update_inode_fsync_trans(handle, inode, 1);
4905 4907
4906 if (file->f_flags & O_SYNC) 4908 if (file->f_flags & O_SYNC)
4907 ext4_handle_sync(handle); 4909 ext4_handle_sync(handle);
@@ -5362,7 +5364,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5362 ext4_lblk_t stop, *iterator, ex_start, ex_end; 5364 ext4_lblk_t stop, *iterator, ex_start, ex_end;
5363 5365
5364 /* Let path point to the last extent */ 5366 /* Let path point to the last extent */
5365 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); 5367 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
5368 EXT4_EX_NOCACHE);
5366 if (IS_ERR(path)) 5369 if (IS_ERR(path))
5367 return PTR_ERR(path); 5370 return PTR_ERR(path);
5368 5371
@@ -5371,15 +5374,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5371 if (!extent) 5374 if (!extent)
5372 goto out; 5375 goto out;
5373 5376
5374 stop = le32_to_cpu(extent->ee_block) + 5377 stop = le32_to_cpu(extent->ee_block);
5375 ext4_ext_get_actual_len(extent);
5376 5378
5377 /* 5379 /*
5378 * In case of left shift, Don't start shifting extents until we make 5380 * In case of left shift, Don't start shifting extents until we make
5379 * sure the hole is big enough to accommodate the shift. 5381 * sure the hole is big enough to accommodate the shift.
5380 */ 5382 */
5381 if (SHIFT == SHIFT_LEFT) { 5383 if (SHIFT == SHIFT_LEFT) {
5382 path = ext4_find_extent(inode, start - 1, &path, 0); 5384 path = ext4_find_extent(inode, start - 1, &path,
5385 EXT4_EX_NOCACHE);
5383 if (IS_ERR(path)) 5386 if (IS_ERR(path))
5384 return PTR_ERR(path); 5387 return PTR_ERR(path);
5385 depth = path->p_depth; 5388 depth = path->p_depth;
@@ -5411,9 +5414,14 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5411 else 5414 else
5412 iterator = &stop; 5415 iterator = &stop;
5413 5416
5414 /* Its safe to start updating extents */ 5417 /*
5415 while (start < stop) { 5418 * Its safe to start updating extents. Start and stop are unsigned, so
5416 path = ext4_find_extent(inode, *iterator, &path, 0); 5419 * in case of right shift if extent with 0 block is reached, iterator
5420 * becomes NULL to indicate the end of the loop.
5421 */
5422 while (iterator && start <= stop) {
5423 path = ext4_find_extent(inode, *iterator, &path,
5424 EXT4_EX_NOCACHE);
5417 if (IS_ERR(path)) 5425 if (IS_ERR(path))
5418 return PTR_ERR(path); 5426 return PTR_ERR(path);
5419 depth = path->p_depth; 5427 depth = path->p_depth;
@@ -5440,8 +5448,11 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5440 ext4_ext_get_actual_len(extent); 5448 ext4_ext_get_actual_len(extent);
5441 } else { 5449 } else {
5442 extent = EXT_FIRST_EXTENT(path[depth].p_hdr); 5450 extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
5443 *iterator = le32_to_cpu(extent->ee_block) > 0 ? 5451 if (le32_to_cpu(extent->ee_block) > 0)
5444 le32_to_cpu(extent->ee_block) - 1 : 0; 5452 *iterator = le32_to_cpu(extent->ee_block) - 1;
5453 else
5454 /* Beginning is reached, end of the loop */
5455 iterator = NULL;
5445 /* Update path extent in case we need to stop */ 5456 /* Update path extent in case we need to stop */
5446 while (le32_to_cpu(extent->ee_block) < start) 5457 while (le32_to_cpu(extent->ee_block) < start)
5447 extent++; 5458 extent++;
@@ -5588,6 +5599,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5588 ext4_handle_sync(handle); 5599 ext4_handle_sync(handle);
5589 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 5600 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
5590 ext4_mark_inode_dirty(handle, inode); 5601 ext4_mark_inode_dirty(handle, inode);
5602 ext4_update_inode_fsync_trans(handle, inode, 1);
5591 5603
5592out_stop: 5604out_stop:
5593 ext4_journal_stop(handle); 5605 ext4_journal_stop(handle);
@@ -5761,6 +5773,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
5761 up_write(&EXT4_I(inode)->i_data_sem); 5773 up_write(&EXT4_I(inode)->i_data_sem);
5762 if (IS_SYNC(inode)) 5774 if (IS_SYNC(inode))
5763 ext4_handle_sync(handle); 5775 ext4_handle_sync(handle);
5776 if (ret >= 0)
5777 ext4_update_inode_fsync_trans(handle, inode, 1);
5764 5778
5765out_stop: 5779out_stop:
5766 ext4_journal_stop(handle); 5780 ext4_journal_stop(handle);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 0d24ebcd7c9e..45ef9975caec 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -463,47 +463,27 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
463 num = min_t(pgoff_t, end - index, PAGEVEC_SIZE); 463 num = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
464 nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, 464 nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
465 (pgoff_t)num); 465 (pgoff_t)num);
466 if (nr_pages == 0) { 466 if (nr_pages == 0)
467 if (whence == SEEK_DATA)
468 break;
469
470 BUG_ON(whence != SEEK_HOLE);
471 /*
472 * If this is the first time to go into the loop and
473 * offset is not beyond the end offset, it will be a
474 * hole at this offset
475 */
476 if (lastoff == startoff || lastoff < endoff)
477 found = 1;
478 break; 467 break;
479 }
480
481 /*
482 * If this is the first time to go into the loop and
483 * offset is smaller than the first page offset, it will be a
484 * hole at this offset.
485 */
486 if (lastoff == startoff && whence == SEEK_HOLE &&
487 lastoff < page_offset(pvec.pages[0])) {
488 found = 1;
489 break;
490 }
491 468
492 for (i = 0; i < nr_pages; i++) { 469 for (i = 0; i < nr_pages; i++) {
493 struct page *page = pvec.pages[i]; 470 struct page *page = pvec.pages[i];
494 struct buffer_head *bh, *head; 471 struct buffer_head *bh, *head;
495 472
496 /* 473 /*
497 * If the current offset is not beyond the end of given 474 * If current offset is smaller than the page offset,
498 * range, it will be a hole. 475 * there is a hole at this offset.
499 */ 476 */
500 if (lastoff < endoff && whence == SEEK_HOLE && 477 if (whence == SEEK_HOLE && lastoff < endoff &&
501 page->index > end) { 478 lastoff < page_offset(pvec.pages[i])) {
502 found = 1; 479 found = 1;
503 *offset = lastoff; 480 *offset = lastoff;
504 goto out; 481 goto out;
505 } 482 }
506 483
484 if (page->index > end)
485 goto out;
486
507 lock_page(page); 487 lock_page(page);
508 488
509 if (unlikely(page->mapping != inode->i_mapping)) { 489 if (unlikely(page->mapping != inode->i_mapping)) {
@@ -520,6 +500,8 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
520 lastoff = page_offset(page); 500 lastoff = page_offset(page);
521 bh = head = page_buffers(page); 501 bh = head = page_buffers(page);
522 do { 502 do {
503 if (lastoff + bh->b_size <= startoff)
504 goto next;
523 if (buffer_uptodate(bh) || 505 if (buffer_uptodate(bh) ||
524 buffer_unwritten(bh)) { 506 buffer_unwritten(bh)) {
525 if (whence == SEEK_DATA) 507 if (whence == SEEK_DATA)
@@ -534,6 +516,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
534 unlock_page(page); 516 unlock_page(page);
535 goto out; 517 goto out;
536 } 518 }
519next:
537 lastoff += bh->b_size; 520 lastoff += bh->b_size;
538 bh = bh->b_this_page; 521 bh = bh->b_this_page;
539 } while (bh != head); 522 } while (bh != head);
@@ -543,20 +526,18 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
543 unlock_page(page); 526 unlock_page(page);
544 } 527 }
545 528
546 /* 529 /* The no. of pages is less than our desired, we are done. */
547 * The no. of pages is less than our desired, that would be a 530 if (nr_pages < num)
548 * hole in there.
549 */
550 if (nr_pages < num && whence == SEEK_HOLE) {
551 found = 1;
552 *offset = lastoff;
553 break; 531 break;
554 }
555 532
556 index = pvec.pages[i - 1]->index + 1; 533 index = pvec.pages[i - 1]->index + 1;
557 pagevec_release(&pvec); 534 pagevec_release(&pvec);
558 } while (index <= end); 535 } while (index <= end);
559 536
537 if (whence == SEEK_HOLE && lastoff < endoff) {
538 found = 1;
539 *offset = lastoff;
540 }
560out: 541out:
561 pagevec_release(&pvec); 542 pagevec_release(&pvec);
562 return found; 543 return found;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 43dcaab85201..bc7c082b7913 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -503,8 +503,16 @@ int ext4_readpage_inline(struct inode *inode, struct page *page)
503 return -EAGAIN; 503 return -EAGAIN;
504 } 504 }
505 505
506 trace_android_fs_dataread_start(inode, page_offset(page), PAGE_SIZE, 506 if (trace_android_fs_dataread_start_enabled()) {
507 current->pid, current->comm); 507 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
508
509 path = android_fstrace_get_pathname(pathbuf,
510 MAX_TRACE_PATHBUF_LEN,
511 inode);
512 trace_android_fs_dataread_start(inode, page_offset(page),
513 PAGE_SIZE, current->pid,
514 path, current->comm);
515 }
508 516
509 /* 517 /*
510 * Current inline data can only exist in the 1st page, 518 * Current inline data can only exist in the 1st page,
@@ -939,8 +947,15 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
939 struct page *page) 947 struct page *page)
940{ 948{
941 int i_size_changed = 0; 949 int i_size_changed = 0;
950 int ret;
942 951
943 copied = ext4_write_inline_data_end(inode, pos, len, copied, page); 952 ret = ext4_write_inline_data_end(inode, pos, len, copied, page);
953 if (ret < 0) {
954 unlock_page(page);
955 put_page(page);
956 return ret;
957 }
958 copied = ret;
944 959
945 /* 960 /*
946 * No need to use i_size_read() here, the i_size 961 * No need to use i_size_read() here, the i_size
@@ -1157,10 +1172,9 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
1157 set_buffer_uptodate(dir_block); 1172 set_buffer_uptodate(dir_block);
1158 err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); 1173 err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
1159 if (err) 1174 if (err)
1160 goto out; 1175 return err;
1161 set_buffer_verified(dir_block); 1176 set_buffer_verified(dir_block);
1162out: 1177 return ext4_mark_inode_dirty(handle, inode);
1163 return err;
1164} 1178}
1165 1179
1166static int ext4_convert_inline_data_nolock(handle_t *handle, 1180static int ext4_convert_inline_data_nolock(handle_t *handle,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c33fcb4f8533..e3d425eeab4a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -72,10 +72,9 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
72 csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, 72 csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum,
73 csum_size); 73 csum_size);
74 offset += csum_size; 74 offset += csum_size;
75 csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
76 EXT4_INODE_SIZE(inode->i_sb) -
77 offset);
78 } 75 }
76 csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
77 EXT4_INODE_SIZE(inode->i_sb) - offset);
79 } 78 }
80 79
81 return csum; 80 return csum;
@@ -1017,8 +1016,16 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
1017 pgoff_t index; 1016 pgoff_t index;
1018 unsigned from, to; 1017 unsigned from, to;
1019 1018
1020 trace_android_fs_datawrite_start(inode, pos, len, 1019 if (trace_android_fs_datawrite_start_enabled()) {
1021 current->pid, current->comm); 1020 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
1021
1022 path = android_fstrace_get_pathname(pathbuf,
1023 MAX_TRACE_PATHBUF_LEN,
1024 inode);
1025 trace_android_fs_datawrite_start(inode, pos, len,
1026 current->pid, path,
1027 current->comm);
1028 }
1022 trace_ext4_write_begin(inode, pos, len, flags); 1029 trace_ext4_write_begin(inode, pos, len, flags);
1023 /* 1030 /*
1024 * Reserve one block more for addition to orphan list in case 1031 * Reserve one block more for addition to orphan list in case
@@ -1169,8 +1176,11 @@ static int ext4_write_end(struct file *file,
1169 if (ext4_has_inline_data(inode)) { 1176 if (ext4_has_inline_data(inode)) {
1170 ret = ext4_write_inline_data_end(inode, pos, len, 1177 ret = ext4_write_inline_data_end(inode, pos, len,
1171 copied, page); 1178 copied, page);
1172 if (ret < 0) 1179 if (ret < 0) {
1180 unlock_page(page);
1181 put_page(page);
1173 goto errout; 1182 goto errout;
1183 }
1174 copied = ret; 1184 copied = ret;
1175 } else 1185 } else
1176 copied = block_write_end(file, mapping, pos, 1186 copied = block_write_end(file, mapping, pos,
@@ -1224,7 +1234,9 @@ errout:
1224 * set the buffer to be dirty, since in data=journalled mode we need 1234 * set the buffer to be dirty, since in data=journalled mode we need
1225 * to call ext4_handle_dirty_metadata() instead. 1235 * to call ext4_handle_dirty_metadata() instead.
1226 */ 1236 */
1227static void zero_new_buffers(struct page *page, unsigned from, unsigned to) 1237static void ext4_journalled_zero_new_buffers(handle_t *handle,
1238 struct page *page,
1239 unsigned from, unsigned to)
1228{ 1240{
1229 unsigned int block_start = 0, block_end; 1241 unsigned int block_start = 0, block_end;
1230 struct buffer_head *head, *bh; 1242 struct buffer_head *head, *bh;
@@ -1241,7 +1253,7 @@ static void zero_new_buffers(struct page *page, unsigned from, unsigned to)
1241 size = min(to, block_end) - start; 1253 size = min(to, block_end) - start;
1242 1254
1243 zero_user(page, start, size); 1255 zero_user(page, start, size);
1244 set_buffer_uptodate(bh); 1256 write_end_fn(handle, bh);
1245 } 1257 }
1246 clear_buffer_new(bh); 1258 clear_buffer_new(bh);
1247 } 1259 }
@@ -1271,18 +1283,25 @@ static int ext4_journalled_write_end(struct file *file,
1271 1283
1272 BUG_ON(!ext4_handle_valid(handle)); 1284 BUG_ON(!ext4_handle_valid(handle));
1273 1285
1274 if (ext4_has_inline_data(inode)) 1286 if (ext4_has_inline_data(inode)) {
1275 copied = ext4_write_inline_data_end(inode, pos, len, 1287 ret = ext4_write_inline_data_end(inode, pos, len,
1276 copied, page); 1288 copied, page);
1277 else { 1289 if (ret < 0) {
1278 if (copied < len) { 1290 unlock_page(page);
1279 if (!PageUptodate(page)) 1291 put_page(page);
1280 copied = 0; 1292 goto errout;
1281 zero_new_buffers(page, from+copied, to);
1282 } 1293 }
1283 1294 copied = ret;
1295 } else if (unlikely(copied < len) && !PageUptodate(page)) {
1296 copied = 0;
1297 ext4_journalled_zero_new_buffers(handle, page, from, to);
1298 } else {
1299 if (unlikely(copied < len))
1300 ext4_journalled_zero_new_buffers(handle, page,
1301 from + copied, to);
1284 ret = ext4_walk_page_buffers(handle, page_buffers(page), from, 1302 ret = ext4_walk_page_buffers(handle, page_buffers(page), from,
1285 to, &partial, write_end_fn); 1303 from + copied, &partial,
1304 write_end_fn);
1286 if (!partial) 1305 if (!partial)
1287 SetPageUptodate(page); 1306 SetPageUptodate(page);
1288 } 1307 }
@@ -1308,6 +1327,7 @@ static int ext4_journalled_write_end(struct file *file,
1308 */ 1327 */
1309 ext4_orphan_add(handle, inode); 1328 ext4_orphan_add(handle, inode);
1310 1329
1330errout:
1311 ret2 = ext4_journal_stop(handle); 1331 ret2 = ext4_journal_stop(handle);
1312 if (!ret) 1332 if (!ret)
1313 ret = ret2; 1333 ret = ret2;
@@ -2037,7 +2057,7 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
2037{ 2057{
2038 struct inode *inode = mpd->inode; 2058 struct inode *inode = mpd->inode;
2039 int err; 2059 int err;
2040 ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) 2060 ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1)
2041 >> inode->i_blkbits; 2061 >> inode->i_blkbits;
2042 2062
2043 do { 2063 do {
@@ -2732,8 +2752,16 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2732 len, flags, pagep, fsdata); 2752 len, flags, pagep, fsdata);
2733 } 2753 }
2734 *fsdata = (void *)0; 2754 *fsdata = (void *)0;
2735 trace_android_fs_datawrite_start(inode, pos, len, 2755 if (trace_android_fs_datawrite_start_enabled()) {
2736 current->pid, current->comm); 2756 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
2757
2758 path = android_fstrace_get_pathname(pathbuf,
2759 MAX_TRACE_PATHBUF_LEN,
2760 inode);
2761 trace_android_fs_datawrite_start(inode, pos, len,
2762 current->pid,
2763 path, current->comm);
2764 }
2737 trace_ext4_da_write_begin(inode, pos, len, flags); 2765 trace_ext4_da_write_begin(inode, pos, len, flags);
2738 2766
2739 if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { 2767 if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@@ -3342,16 +3370,27 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
3342 return 0; 3370 return 0;
3343 3371
3344 if (trace_android_fs_dataread_start_enabled() && 3372 if (trace_android_fs_dataread_start_enabled() &&
3345 (iov_iter_rw(iter) == READ)) 3373 (iov_iter_rw(iter) == READ)) {
3374 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
3375
3376 path = android_fstrace_get_pathname(pathbuf,
3377 MAX_TRACE_PATHBUF_LEN,
3378 inode);
3346 trace_android_fs_dataread_start(inode, offset, count, 3379 trace_android_fs_dataread_start(inode, offset, count,
3347 current->pid, 3380 current->pid, path,
3348 current->comm); 3381 current->comm);
3382 }
3349 if (trace_android_fs_datawrite_start_enabled() && 3383 if (trace_android_fs_datawrite_start_enabled() &&
3350 (iov_iter_rw(iter) == WRITE)) 3384 (iov_iter_rw(iter) == WRITE)) {
3385 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
3386
3387 path = android_fstrace_get_pathname(pathbuf,
3388 MAX_TRACE_PATHBUF_LEN,
3389 inode);
3351 trace_android_fs_datawrite_start(inode, offset, count, 3390 trace_android_fs_datawrite_start(inode, offset, count,
3352 current->pid, 3391 current->pid, path,
3353 current->comm); 3392 current->comm);
3354 3393 }
3355 trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); 3394 trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
3356 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3395 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3357 ret = ext4_ext_direct_IO(iocb, iter, offset); 3396 ret = ext4_ext_direct_IO(iocb, iter, offset);
@@ -3587,6 +3626,10 @@ static int ext4_block_truncate_page(handle_t *handle,
3587 unsigned blocksize; 3626 unsigned blocksize;
3588 struct inode *inode = mapping->host; 3627 struct inode *inode = mapping->host;
3589 3628
3629 /* If we are processing an encrypted inode during orphan list handling */
3630 if (ext4_encrypted_inode(inode) && !ext4_has_encryption_key(inode))
3631 return 0;
3632
3590 blocksize = inode->i_sb->s_blocksize; 3633 blocksize = inode->i_sb->s_blocksize;
3591 length = blocksize - (offset & (blocksize - 1)); 3634 length = blocksize - (offset & (blocksize - 1));
3592 3635
@@ -3804,6 +3847,8 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3804 3847
3805 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 3848 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
3806 ext4_mark_inode_dirty(handle, inode); 3849 ext4_mark_inode_dirty(handle, inode);
3850 if (ret >= 0)
3851 ext4_update_inode_fsync_trans(handle, inode, 1);
3807out_stop: 3852out_stop:
3808 ext4_journal_stop(handle); 3853 ext4_journal_stop(handle);
3809out_dio: 3854out_dio:
@@ -5173,8 +5218,9 @@ static int ext4_expand_extra_isize(struct inode *inode,
5173 /* No extended attributes present */ 5218 /* No extended attributes present */
5174 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || 5219 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
5175 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { 5220 header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
5176 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, 5221 memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
5177 new_extra_isize); 5222 EXT4_I(inode)->i_extra_isize, 0,
5223 new_extra_isize - EXT4_I(inode)->i_extra_isize);
5178 EXT4_I(inode)->i_extra_isize = new_extra_isize; 5224 EXT4_I(inode)->i_extra_isize = new_extra_isize;
5179 return 0; 5225 return 0;
5180 } 5226 }
@@ -5404,6 +5450,11 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5404 file_update_time(vma->vm_file); 5450 file_update_time(vma->vm_file);
5405 5451
5406 down_read(&EXT4_I(inode)->i_mmap_sem); 5452 down_read(&EXT4_I(inode)->i_mmap_sem);
5453
5454 ret = ext4_convert_inline_data(inode);
5455 if (ret)
5456 goto out_ret;
5457
5407 /* Delalloc case is easy... */ 5458 /* Delalloc case is easy... */
5408 if (test_opt(inode->i_sb, DELALLOC) && 5459 if (test_opt(inode->i_sb, DELALLOC) &&
5409 !ext4_should_journal_data(inode) && 5460 !ext4_should_journal_data(inode) &&
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 7e974878d9a9..c21826be1cb3 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -626,6 +626,9 @@ resizefs_out:
626 struct ext4_encryption_policy policy; 626 struct ext4_encryption_policy policy;
627 int err = 0; 627 int err = 0;
628 628
629 if (!ext4_has_feature_encrypt(sb))
630 return -EOPNOTSUPP;
631
629 if (copy_from_user(&policy, 632 if (copy_from_user(&policy,
630 (struct ext4_encryption_policy __user *)arg, 633 (struct ext4_encryption_policy __user *)arg,
631 sizeof(policy))) { 634 sizeof(policy))) {
@@ -637,8 +640,12 @@ resizefs_out:
637 if (err) 640 if (err)
638 goto encryption_policy_out; 641 goto encryption_policy_out;
639 642
643 mutex_lock(&inode->i_mutex);
644
640 err = ext4_process_policy(&policy, inode); 645 err = ext4_process_policy(&policy, inode);
641 646
647 mutex_unlock(&inode->i_mutex);
648
642 mnt_drop_write_file(filp); 649 mnt_drop_write_file(filp);
643encryption_policy_out: 650encryption_policy_out:
644 return err; 651 return err;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index be1227c196d8..c2810503eb50 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3121,6 +3121,13 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3121 if (ar->pright && start + size - 1 >= ar->lright) 3121 if (ar->pright && start + size - 1 >= ar->lright)
3122 size -= start + size - ar->lright; 3122 size -= start + size - ar->lright;
3123 3123
3124 /*
3125 * Trim allocation request for filesystems with artificially small
3126 * groups.
3127 */
3128 if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
3129 size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
3130
3124 end = start + size; 3131 end = start + size;
3125 3132
3126 /* check we don't cross already preallocated blocks */ 3133 /* check we don't cross already preallocated blocks */
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 7861d801b048..05048fcfd602 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -187,7 +187,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
187 if (PageUptodate(page)) 187 if (PageUptodate(page))
188 return 0; 188 return 0;
189 189
190 blocksize = 1 << inode->i_blkbits; 190 blocksize = i_blocksize(inode);
191 if (!page_has_buffers(page)) 191 if (!page_has_buffers(page))
192 create_empty_buffers(page, blocksize, 0); 192 create_empty_buffers(page, blocksize, 0);
193 193
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 573b4cbb0cb9..1d007e853f5c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1243,9 +1243,9 @@ static inline int ext4_match(struct ext4_filename *fname,
1243 if (unlikely(!name)) { 1243 if (unlikely(!name)) {
1244 if (fname->usr_fname->name[0] == '_') { 1244 if (fname->usr_fname->name[0] == '_') {
1245 int ret; 1245 int ret;
1246 if (de->name_len < 16) 1246 if (de->name_len <= 32)
1247 return 0; 1247 return 0;
1248 ret = memcmp(de->name + de->name_len - 16, 1248 ret = memcmp(de->name + ((de->name_len - 17) & ~15),
1249 fname->crypto_buf.name + 8, 16); 1249 fname->crypto_buf.name + 8, 16);
1250 return (ret == 0) ? 1 : 0; 1250 return (ret == 0) ? 1 : 0;
1251 } 1251 }
@@ -1557,6 +1557,24 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
1557 struct ext4_dir_entry_2 *de; 1557 struct ext4_dir_entry_2 *de;
1558 struct buffer_head *bh; 1558 struct buffer_head *bh;
1559 1559
1560 if (ext4_encrypted_inode(dir)) {
1561 int res = ext4_get_encryption_info(dir);
1562
1563 /*
1564 * This should be a properly defined flag for
1565 * dentry->d_flags when we uplift this to the VFS.
1566 * d_fsdata is set to (void *) 1 if if the dentry is
1567 * created while the directory was encrypted and we
1568 * don't have access to the key.
1569 */
1570 dentry->d_fsdata = NULL;
1571 if (ext4_encryption_info(dir))
1572 dentry->d_fsdata = (void *) 1;
1573 d_set_d_op(dentry, &ext4_encrypted_d_ops);
1574 if (res && res != -ENOKEY)
1575 return ERR_PTR(res);
1576 }
1577
1560 if (dentry->d_name.len > EXT4_NAME_LEN) 1578 if (dentry->d_name.len > EXT4_NAME_LEN)
1561 return ERR_PTR(-ENAMETOOLONG); 1579 return ERR_PTR(-ENAMETOOLONG);
1562 1580
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 17fbe3882b8e..6ca56f5f72b5 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -23,6 +23,7 @@
23#include <linux/kernel.h> 23#include <linux/kernel.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/mm.h> 25#include <linux/mm.h>
26#include <linux/backing-dev.h>
26 27
27#include "ext4_jbd2.h" 28#include "ext4_jbd2.h"
28#include "xattr.h" 29#include "xattr.h"
@@ -485,9 +486,20 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
485 486
486 if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) && 487 if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) &&
487 nr_to_submit) { 488 nr_to_submit) {
488 data_page = ext4_encrypt(inode, page); 489 gfp_t gfp_flags = GFP_NOFS;
490
491 retry_encrypt:
492 data_page = ext4_encrypt(inode, page, gfp_flags);
489 if (IS_ERR(data_page)) { 493 if (IS_ERR(data_page)) {
490 ret = PTR_ERR(data_page); 494 ret = PTR_ERR(data_page);
495 if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
496 if (io->io_bio) {
497 ext4_io_submit(io);
498 congestion_wait(BLK_RW_ASYNC, HZ/50);
499 }
500 gfp_flags |= __GFP_NOFAIL;
501 goto retry_encrypt;
502 }
491 data_page = NULL; 503 data_page = NULL;
492 goto out; 504 goto out;
493 } 505 }
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 1ce24a6759a0..783e33d839cf 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -152,11 +152,17 @@ ext4_submit_bio_read(struct bio *bio)
152 struct page *first_page = bio->bi_io_vec[0].bv_page; 152 struct page *first_page = bio->bi_io_vec[0].bv_page;
153 153
154 if (first_page != NULL) { 154 if (first_page != NULL) {
155 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
156
157 path = android_fstrace_get_pathname(pathbuf,
158 MAX_TRACE_PATHBUF_LEN,
159 first_page->mapping->host);
155 trace_android_fs_dataread_start( 160 trace_android_fs_dataread_start(
156 first_page->mapping->host, 161 first_page->mapping->host,
157 page_offset(first_page), 162 page_offset(first_page),
158 bio->bi_iter.bi_size, 163 bio->bi_iter.bi_size,
159 current->pid, 164 current->pid,
165 path,
160 current->comm); 166 current->comm);
161 } 167 }
162 } 168 }
@@ -312,7 +318,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
312 318
313 if (ext4_encrypted_inode(inode) && 319 if (ext4_encrypted_inode(inode) &&
314 S_ISREG(inode->i_mode)) { 320 S_ISREG(inode->i_mode)) {
315 ctx = ext4_get_crypto_ctx(inode); 321 ctx = ext4_get_crypto_ctx(inode, GFP_NOFS);
316 if (IS_ERR(ctx)) 322 if (IS_ERR(ctx))
317 goto set_error_page; 323 goto set_error_page;
318 } 324 }
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 34038e3598d5..74516efd874c 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1926,7 +1926,8 @@ retry:
1926 n_desc_blocks = o_desc_blocks + 1926 n_desc_blocks = o_desc_blocks +
1927 le16_to_cpu(es->s_reserved_gdt_blocks); 1927 le16_to_cpu(es->s_reserved_gdt_blocks);
1928 n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); 1928 n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb);
1929 n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb); 1929 n_blocks_count = (ext4_fsblk_t)n_group *
1930 EXT4_BLOCKS_PER_GROUP(sb);
1930 n_group--; /* set to last group number */ 1931 n_group--; /* set to last group number */
1931 } 1932 }
1932 1933
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 68640e6f95c5..bd8831bfbafe 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -793,6 +793,7 @@ static void ext4_put_super(struct super_block *sb)
793{ 793{
794 struct ext4_sb_info *sbi = EXT4_SB(sb); 794 struct ext4_sb_info *sbi = EXT4_SB(sb);
795 struct ext4_super_block *es = sbi->s_es; 795 struct ext4_super_block *es = sbi->s_es;
796 int aborted = 0;
796 int i, err; 797 int i, err;
797 798
798 ext4_unregister_li_request(sb); 799 ext4_unregister_li_request(sb);
@@ -802,9 +803,10 @@ static void ext4_put_super(struct super_block *sb)
802 destroy_workqueue(sbi->rsv_conversion_wq); 803 destroy_workqueue(sbi->rsv_conversion_wq);
803 804
804 if (sbi->s_journal) { 805 if (sbi->s_journal) {
806 aborted = is_journal_aborted(sbi->s_journal);
805 err = jbd2_journal_destroy(sbi->s_journal); 807 err = jbd2_journal_destroy(sbi->s_journal);
806 sbi->s_journal = NULL; 808 sbi->s_journal = NULL;
807 if (err < 0) 809 if ((err < 0) && !aborted)
808 ext4_abort(sb, "Couldn't clean up the journal"); 810 ext4_abort(sb, "Couldn't clean up the journal");
809 } 811 }
810 812
@@ -814,9 +816,8 @@ static void ext4_put_super(struct super_block *sb)
814 ext4_release_system_zone(sb); 816 ext4_release_system_zone(sb);
815 ext4_mb_release(sb); 817 ext4_mb_release(sb);
816 ext4_ext_release(sb); 818 ext4_ext_release(sb);
817 ext4_xattr_put_super(sb);
818 819
819 if (!(sb->s_flags & MS_RDONLY)) { 820 if (!(sb->s_flags & MS_RDONLY) && !aborted) {
820 ext4_clear_feature_journal_needs_recovery(sb); 821 ext4_clear_feature_journal_needs_recovery(sb);
821 es->s_state = cpu_to_le16(sbi->s_mount_state); 822 es->s_state = cpu_to_le16(sbi->s_mount_state);
822 } 823 }
@@ -3663,6 +3664,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3663 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); 3664 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
3664 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 3665 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
3665 EXT4_DESC_PER_BLOCK(sb); 3666 EXT4_DESC_PER_BLOCK(sb);
3667 if (ext4_has_feature_meta_bg(sb)) {
3668 if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
3669 ext4_msg(sb, KERN_WARNING,
3670 "first meta block group too large: %u "
3671 "(group descriptor block count %u)",
3672 le32_to_cpu(es->s_first_meta_bg), db_count);
3673 goto failed_mount;
3674 }
3675 }
3666 sbi->s_group_desc = ext4_kvmalloc(db_count * 3676 sbi->s_group_desc = ext4_kvmalloc(db_count *
3667 sizeof(struct buffer_head *), 3677 sizeof(struct buffer_head *),
3668 GFP_KERNEL); 3678 GFP_KERNEL);
@@ -3737,7 +3747,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3737 * root first: it may be modified in the journal! 3747 * root first: it may be modified in the journal!
3738 */ 3748 */
3739 if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) { 3749 if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
3740 if (ext4_load_journal(sb, es, journal_devnum)) 3750 err = ext4_load_journal(sb, es, journal_devnum);
3751 if (err)
3741 goto failed_mount3a; 3752 goto failed_mount3a;
3742 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 3753 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
3743 ext4_has_feature_journal_needs_recovery(sb)) { 3754 ext4_has_feature_journal_needs_recovery(sb)) {
@@ -3821,7 +3832,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3821 3832
3822no_journal: 3833no_journal:
3823 if (ext4_mballoc_ready) { 3834 if (ext4_mballoc_ready) {
3824 sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id); 3835 sbi->s_mb_cache = ext4_xattr_create_cache();
3825 if (!sbi->s_mb_cache) { 3836 if (!sbi->s_mb_cache) {
3826 ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); 3837 ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
3827 goto failed_mount_wq; 3838 goto failed_mount_wq;
@@ -4053,6 +4064,10 @@ failed_mount4:
4053 if (EXT4_SB(sb)->rsv_conversion_wq) 4064 if (EXT4_SB(sb)->rsv_conversion_wq)
4054 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); 4065 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4055failed_mount_wq: 4066failed_mount_wq:
4067 if (sbi->s_mb_cache) {
4068 ext4_xattr_destroy_cache(sbi->s_mb_cache);
4069 sbi->s_mb_cache = NULL;
4070 }
4056 if (sbi->s_journal) { 4071 if (sbi->s_journal) {
4057 jbd2_journal_destroy(sbi->s_journal); 4072 jbd2_journal_destroy(sbi->s_journal);
4058 sbi->s_journal = NULL; 4073 sbi->s_journal = NULL;
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 5d09ea585840..c2ee23acf359 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -100,7 +100,7 @@ static ssize_t reserved_clusters_store(struct ext4_attr *a,
100 int ret; 100 int ret;
101 101
102 ret = kstrtoull(skip_spaces(buf), 0, &val); 102 ret = kstrtoull(skip_spaces(buf), 0, &val);
103 if (!ret || val >= clusters) 103 if (ret || val >= clusters)
104 return -EINVAL; 104 return -EINVAL;
105 105
106 atomic64_set(&sbi->s_resv_clusters, val); 106 atomic64_set(&sbi->s_resv_clusters, val);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 263002f0389d..b310ed81c10e 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -53,7 +53,7 @@
53#include <linux/init.h> 53#include <linux/init.h>
54#include <linux/fs.h> 54#include <linux/fs.h>
55#include <linux/slab.h> 55#include <linux/slab.h>
56#include <linux/mbcache.h> 56#include <linux/mbcache2.h>
57#include <linux/quotaops.h> 57#include <linux/quotaops.h>
58#include "ext4_jbd2.h" 58#include "ext4_jbd2.h"
59#include "ext4.h" 59#include "ext4.h"
@@ -80,10 +80,10 @@
80# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) 80# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
81#endif 81#endif
82 82
83static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *); 83static void ext4_xattr_cache_insert(struct mb2_cache *, struct buffer_head *);
84static struct buffer_head *ext4_xattr_cache_find(struct inode *, 84static struct buffer_head *ext4_xattr_cache_find(struct inode *,
85 struct ext4_xattr_header *, 85 struct ext4_xattr_header *,
86 struct mb_cache_entry **); 86 struct mb2_cache_entry **);
87static void ext4_xattr_rehash(struct ext4_xattr_header *, 87static void ext4_xattr_rehash(struct ext4_xattr_header *,
88 struct ext4_xattr_entry *); 88 struct ext4_xattr_entry *);
89static int ext4_xattr_list(struct dentry *dentry, char *buffer, 89static int ext4_xattr_list(struct dentry *dentry, char *buffer,
@@ -233,6 +233,27 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
233 return error; 233 return error;
234} 234}
235 235
236static int
237__xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
238 void *end, const char *function, unsigned int line)
239{
240 struct ext4_xattr_entry *entry = IFIRST(header);
241 int error = -EFSCORRUPTED;
242
243 if (((void *) header >= end) ||
244 (header->h_magic != le32_to_cpu(EXT4_XATTR_MAGIC)))
245 goto errout;
246 error = ext4_xattr_check_names(entry, end, entry);
247errout:
248 if (error)
249 __ext4_error_inode(inode, function, line, 0,
250 "corrupted in-inode xattr");
251 return error;
252}
253
254#define xattr_check_inode(inode, header, end) \
255 __xattr_check_inode((inode), (header), (end), __func__, __LINE__)
256
236static inline int 257static inline int
237ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size) 258ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size)
238{ 259{
@@ -279,7 +300,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
279 struct ext4_xattr_entry *entry; 300 struct ext4_xattr_entry *entry;
280 size_t size; 301 size_t size;
281 int error; 302 int error;
282 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 303 struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
283 304
284 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", 305 ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
285 name_index, name, buffer, (long)buffer_size); 306 name_index, name, buffer, (long)buffer_size);
@@ -344,7 +365,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
344 header = IHDR(inode, raw_inode); 365 header = IHDR(inode, raw_inode);
345 entry = IFIRST(header); 366 entry = IFIRST(header);
346 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 367 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
347 error = ext4_xattr_check_names(entry, end, entry); 368 error = xattr_check_inode(inode, header, end);
348 if (error) 369 if (error)
349 goto cleanup; 370 goto cleanup;
350 error = ext4_xattr_find_entry(&entry, name_index, name, 371 error = ext4_xattr_find_entry(&entry, name_index, name,
@@ -426,7 +447,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
426 struct inode *inode = d_inode(dentry); 447 struct inode *inode = d_inode(dentry);
427 struct buffer_head *bh = NULL; 448 struct buffer_head *bh = NULL;
428 int error; 449 int error;
429 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 450 struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
430 451
431 ea_idebug(inode, "buffer=%p, buffer_size=%ld", 452 ea_idebug(inode, "buffer=%p, buffer_size=%ld",
432 buffer, (long)buffer_size); 453 buffer, (long)buffer_size);
@@ -475,7 +496,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
475 raw_inode = ext4_raw_inode(&iloc); 496 raw_inode = ext4_raw_inode(&iloc);
476 header = IHDR(inode, raw_inode); 497 header = IHDR(inode, raw_inode);
477 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 498 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
478 error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header)); 499 error = xattr_check_inode(inode, header, end);
479 if (error) 500 if (error)
480 goto cleanup; 501 goto cleanup;
481 error = ext4_xattr_list_entries(dentry, IFIRST(header), 502 error = ext4_xattr_list_entries(dentry, IFIRST(header),
@@ -543,11 +564,8 @@ static void
543ext4_xattr_release_block(handle_t *handle, struct inode *inode, 564ext4_xattr_release_block(handle_t *handle, struct inode *inode,
544 struct buffer_head *bh) 565 struct buffer_head *bh)
545{ 566{
546 struct mb_cache_entry *ce = NULL;
547 int error = 0; 567 int error = 0;
548 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
549 568
550 ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr);
551 BUFFER_TRACE(bh, "get_write_access"); 569 BUFFER_TRACE(bh, "get_write_access");
552 error = ext4_journal_get_write_access(handle, bh); 570 error = ext4_journal_get_write_access(handle, bh);
553 if (error) 571 if (error)
@@ -555,9 +573,15 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
555 573
556 lock_buffer(bh); 574 lock_buffer(bh);
557 if (BHDR(bh)->h_refcount == cpu_to_le32(1)) { 575 if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
576 __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
577
558 ea_bdebug(bh, "refcount now=0; freeing"); 578 ea_bdebug(bh, "refcount now=0; freeing");
559 if (ce) 579 /*
560 mb_cache_entry_free(ce); 580 * This must happen under buffer lock for
581 * ext4_xattr_block_set() to reliably detect freed block
582 */
583 mb2_cache_entry_delete_block(EXT4_GET_MB_CACHE(inode), hash,
584 bh->b_blocknr);
561 get_bh(bh); 585 get_bh(bh);
562 unlock_buffer(bh); 586 unlock_buffer(bh);
563 ext4_free_blocks(handle, inode, bh, 0, 1, 587 ext4_free_blocks(handle, inode, bh, 0, 1,
@@ -565,8 +589,6 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
565 EXT4_FREE_BLOCKS_FORGET); 589 EXT4_FREE_BLOCKS_FORGET);
566 } else { 590 } else {
567 le32_add_cpu(&BHDR(bh)->h_refcount, -1); 591 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
568 if (ce)
569 mb_cache_entry_release(ce);
570 /* 592 /*
571 * Beware of this ugliness: Releasing of xattr block references 593 * Beware of this ugliness: Releasing of xattr block references
572 * from different inodes can race and so we have to protect 594 * from different inodes can race and so we have to protect
@@ -779,17 +801,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
779 struct super_block *sb = inode->i_sb; 801 struct super_block *sb = inode->i_sb;
780 struct buffer_head *new_bh = NULL; 802 struct buffer_head *new_bh = NULL;
781 struct ext4_xattr_search *s = &bs->s; 803 struct ext4_xattr_search *s = &bs->s;
782 struct mb_cache_entry *ce = NULL; 804 struct mb2_cache_entry *ce = NULL;
783 int error = 0; 805 int error = 0;
784 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 806 struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
785 807
786#define header(x) ((struct ext4_xattr_header *)(x)) 808#define header(x) ((struct ext4_xattr_header *)(x))
787 809
788 if (i->value && i->value_len > sb->s_blocksize) 810 if (i->value && i->value_len > sb->s_blocksize)
789 return -ENOSPC; 811 return -ENOSPC;
790 if (s->base) { 812 if (s->base) {
791 ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev,
792 bs->bh->b_blocknr);
793 BUFFER_TRACE(bs->bh, "get_write_access"); 813 BUFFER_TRACE(bs->bh, "get_write_access");
794 error = ext4_journal_get_write_access(handle, bs->bh); 814 error = ext4_journal_get_write_access(handle, bs->bh);
795 if (error) 815 if (error)
@@ -797,10 +817,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
797 lock_buffer(bs->bh); 817 lock_buffer(bs->bh);
798 818
799 if (header(s->base)->h_refcount == cpu_to_le32(1)) { 819 if (header(s->base)->h_refcount == cpu_to_le32(1)) {
800 if (ce) { 820 __u32 hash = le32_to_cpu(BHDR(bs->bh)->h_hash);
801 mb_cache_entry_free(ce); 821
802 ce = NULL; 822 /*
803 } 823 * This must happen under buffer lock for
824 * ext4_xattr_block_set() to reliably detect modified
825 * block
826 */
827 mb2_cache_entry_delete_block(ext4_mb_cache, hash,
828 bs->bh->b_blocknr);
804 ea_bdebug(bs->bh, "modifying in-place"); 829 ea_bdebug(bs->bh, "modifying in-place");
805 error = ext4_xattr_set_entry(i, s); 830 error = ext4_xattr_set_entry(i, s);
806 if (!error) { 831 if (!error) {
@@ -824,10 +849,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
824 int offset = (char *)s->here - bs->bh->b_data; 849 int offset = (char *)s->here - bs->bh->b_data;
825 850
826 unlock_buffer(bs->bh); 851 unlock_buffer(bs->bh);
827 if (ce) {
828 mb_cache_entry_release(ce);
829 ce = NULL;
830 }
831 ea_bdebug(bs->bh, "cloning"); 852 ea_bdebug(bs->bh, "cloning");
832 s->base = kmalloc(bs->bh->b_size, GFP_NOFS); 853 s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
833 error = -ENOMEM; 854 error = -ENOMEM;
@@ -882,6 +903,31 @@ inserted:
882 if (error) 903 if (error)
883 goto cleanup_dquot; 904 goto cleanup_dquot;
884 lock_buffer(new_bh); 905 lock_buffer(new_bh);
906 /*
907 * We have to be careful about races with
908 * freeing or rehashing of xattr block. Once we
909 * hold buffer lock xattr block's state is
910 * stable so we can check whether the block got
911 * freed / rehashed or not. Since we unhash
912 * mbcache entry under buffer lock when freeing
913 * / rehashing xattr block, checking whether
914 * entry is still hashed is reliable.
915 */
916 if (hlist_bl_unhashed(&ce->e_hash_list)) {
917 /*
918 * Undo everything and check mbcache
919 * again.
920 */
921 unlock_buffer(new_bh);
922 dquot_free_block(inode,
923 EXT4_C2B(EXT4_SB(sb),
924 1));
925 brelse(new_bh);
926 mb2_cache_entry_put(ext4_mb_cache, ce);
927 ce = NULL;
928 new_bh = NULL;
929 goto inserted;
930 }
885 le32_add_cpu(&BHDR(new_bh)->h_refcount, 1); 931 le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
886 ea_bdebug(new_bh, "reusing; refcount now=%d", 932 ea_bdebug(new_bh, "reusing; refcount now=%d",
887 le32_to_cpu(BHDR(new_bh)->h_refcount)); 933 le32_to_cpu(BHDR(new_bh)->h_refcount));
@@ -892,7 +938,8 @@ inserted:
892 if (error) 938 if (error)
893 goto cleanup_dquot; 939 goto cleanup_dquot;
894 } 940 }
895 mb_cache_entry_release(ce); 941 mb2_cache_entry_touch(ext4_mb_cache, ce);
942 mb2_cache_entry_put(ext4_mb_cache, ce);
896 ce = NULL; 943 ce = NULL;
897 } else if (bs->bh && s->base == bs->bh->b_data) { 944 } else if (bs->bh && s->base == bs->bh->b_data) {
898 /* We were modifying this block in-place. */ 945 /* We were modifying this block in-place. */
@@ -957,7 +1004,7 @@ getblk_failed:
957 1004
958cleanup: 1005cleanup:
959 if (ce) 1006 if (ce)
960 mb_cache_entry_release(ce); 1007 mb2_cache_entry_put(ext4_mb_cache, ce);
961 brelse(new_bh); 1008 brelse(new_bh);
962 if (!(bs->bh && s->base == bs->bh->b_data)) 1009 if (!(bs->bh && s->base == bs->bh->b_data))
963 kfree(s->base); 1010 kfree(s->base);
@@ -991,8 +1038,7 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
991 is->s.here = is->s.first; 1038 is->s.here = is->s.first;
992 is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 1039 is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
993 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { 1040 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
994 error = ext4_xattr_check_names(IFIRST(header), is->s.end, 1041 error = xattr_check_inode(inode, header, is->s.end);
995 IFIRST(header));
996 if (error) 1042 if (error)
997 return error; 1043 return error;
998 /* Find the named attribute. */ 1044 /* Find the named attribute. */
@@ -1293,6 +1339,10 @@ retry:
1293 last = entry; 1339 last = entry;
1294 total_ino = sizeof(struct ext4_xattr_ibody_header); 1340 total_ino = sizeof(struct ext4_xattr_ibody_header);
1295 1341
1342 error = xattr_check_inode(inode, header, end);
1343 if (error)
1344 goto cleanup;
1345
1296 free = ext4_xattr_free_space(last, &min_offs, base, &total_ino); 1346 free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
1297 if (free >= isize_diff) { 1347 if (free >= isize_diff) {
1298 entry = IFIRST(header); 1348 entry = IFIRST(header);
@@ -1519,17 +1569,6 @@ cleanup:
1519} 1569}
1520 1570
1521/* 1571/*
1522 * ext4_xattr_put_super()
1523 *
1524 * This is called when a file system is unmounted.
1525 */
1526void
1527ext4_xattr_put_super(struct super_block *sb)
1528{
1529 mb_cache_shrink(sb->s_bdev);
1530}
1531
1532/*
1533 * ext4_xattr_cache_insert() 1572 * ext4_xattr_cache_insert()
1534 * 1573 *
1535 * Create a new entry in the extended attribute cache, and insert 1574 * Create a new entry in the extended attribute cache, and insert
@@ -1538,28 +1577,18 @@ ext4_xattr_put_super(struct super_block *sb)
1538 * Returns 0, or a negative error number on failure. 1577 * Returns 0, or a negative error number on failure.
1539 */ 1578 */
1540static void 1579static void
1541ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh) 1580ext4_xattr_cache_insert(struct mb2_cache *ext4_mb_cache, struct buffer_head *bh)
1542{ 1581{
1543 __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); 1582 __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
1544 struct mb_cache_entry *ce;
1545 int error; 1583 int error;
1546 1584
1547 ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS); 1585 error = mb2_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
1548 if (!ce) { 1586 bh->b_blocknr);
1549 ea_bdebug(bh, "out of memory");
1550 return;
1551 }
1552 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
1553 if (error) { 1587 if (error) {
1554 mb_cache_entry_free(ce); 1588 if (error == -EBUSY)
1555 if (error == -EBUSY) {
1556 ea_bdebug(bh, "already in cache"); 1589 ea_bdebug(bh, "already in cache");
1557 error = 0; 1590 } else
1558 }
1559 } else {
1560 ea_bdebug(bh, "inserting [%x]", (int)hash); 1591 ea_bdebug(bh, "inserting [%x]", (int)hash);
1561 mb_cache_entry_release(ce);
1562 }
1563} 1592}
1564 1593
1565/* 1594/*
@@ -1612,26 +1641,19 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
1612 */ 1641 */
1613static struct buffer_head * 1642static struct buffer_head *
1614ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, 1643ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
1615 struct mb_cache_entry **pce) 1644 struct mb2_cache_entry **pce)
1616{ 1645{
1617 __u32 hash = le32_to_cpu(header->h_hash); 1646 __u32 hash = le32_to_cpu(header->h_hash);
1618 struct mb_cache_entry *ce; 1647 struct mb2_cache_entry *ce;
1619 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 1648 struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
1620 1649
1621 if (!header->h_hash) 1650 if (!header->h_hash)
1622 return NULL; /* never share */ 1651 return NULL; /* never share */
1623 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 1652 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1624again: 1653 ce = mb2_cache_entry_find_first(ext4_mb_cache, hash);
1625 ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev,
1626 hash);
1627 while (ce) { 1654 while (ce) {
1628 struct buffer_head *bh; 1655 struct buffer_head *bh;
1629 1656
1630 if (IS_ERR(ce)) {
1631 if (PTR_ERR(ce) == -EAGAIN)
1632 goto again;
1633 break;
1634 }
1635 bh = sb_bread(inode->i_sb, ce->e_block); 1657 bh = sb_bread(inode->i_sb, ce->e_block);
1636 if (!bh) { 1658 if (!bh) {
1637 EXT4_ERROR_INODE(inode, "block %lu read error", 1659 EXT4_ERROR_INODE(inode, "block %lu read error",
@@ -1647,7 +1669,7 @@ again:
1647 return bh; 1669 return bh;
1648 } 1670 }
1649 brelse(bh); 1671 brelse(bh);
1650 ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash); 1672 ce = mb2_cache_entry_find_next(ext4_mb_cache, ce);
1651 } 1673 }
1652 return NULL; 1674 return NULL;
1653} 1675}
@@ -1722,15 +1744,15 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
1722 1744
1723#define HASH_BUCKET_BITS 10 1745#define HASH_BUCKET_BITS 10
1724 1746
1725struct mb_cache * 1747struct mb2_cache *
1726ext4_xattr_create_cache(char *name) 1748ext4_xattr_create_cache(void)
1727{ 1749{
1728 return mb_cache_create(name, HASH_BUCKET_BITS); 1750 return mb2_cache_create(HASH_BUCKET_BITS);
1729} 1751}
1730 1752
1731void ext4_xattr_destroy_cache(struct mb_cache *cache) 1753void ext4_xattr_destroy_cache(struct mb2_cache *cache)
1732{ 1754{
1733 if (cache) 1755 if (cache)
1734 mb_cache_destroy(cache); 1756 mb2_cache_destroy(cache);
1735} 1757}
1736 1758
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index ddc0957760ba..10b0f7323ed6 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -108,7 +108,6 @@ extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_
108extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); 108extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
109 109
110extern void ext4_xattr_delete_inode(handle_t *, struct inode *); 110extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
111extern void ext4_xattr_put_super(struct super_block *);
112 111
113extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, 112extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
114 struct ext4_inode *raw_inode, handle_t *handle); 113 struct ext4_inode *raw_inode, handle_t *handle);
@@ -124,8 +123,8 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
124 struct ext4_xattr_info *i, 123 struct ext4_xattr_info *i,
125 struct ext4_xattr_ibody_find *is); 124 struct ext4_xattr_ibody_find *is);
126 125
127extern struct mb_cache *ext4_xattr_create_cache(char *name); 126extern struct mb2_cache *ext4_xattr_create_cache(void);
128extern void ext4_xattr_destroy_cache(struct mb_cache *); 127extern void ext4_xattr_destroy_cache(struct mb2_cache *);
129 128
130#ifdef CONFIG_EXT4_FS_SECURITY 129#ifdef CONFIG_EXT4_FS_SECURITY
131extern int ext4_init_security(handle_t *handle, struct inode *inode, 130extern int ext4_init_security(handle_t *handle, struct inode *inode,
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index e9a8d676c6bc..83dcf7bfd7b8 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -213,7 +213,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
213 switch (type) { 213 switch (type) {
214 case ACL_TYPE_ACCESS: 214 case ACL_TYPE_ACCESS:
215 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; 215 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
216 if (acl) { 216 if (acl && !ipage) {
217 error = posix_acl_update_mode(inode, &inode->i_mode, &acl); 217 error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
218 if (error) 218 if (error)
219 return error; 219 return error;
diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c
index ab377d496a39..38349ed5ea51 100644
--- a/fs/f2fs/crypto_fname.c
+++ b/fs/f2fs/crypto_fname.c
@@ -333,7 +333,7 @@ int f2fs_fname_disk_to_usr(struct inode *inode,
333 memset(buf + 4, 0, 4); 333 memset(buf + 4, 0, 4);
334 } else 334 } else
335 memset(buf, 0, 8); 335 memset(buf, 0, 8);
336 memcpy(buf + 8, iname->name + iname->len - 16, 16); 336 memcpy(buf + 8, iname->name + ((iname->len - 17) & ~15), 16);
337 oname->name[0] = '_'; 337 oname->name[0] = '_';
338 ret = digest_encode(buf, 24, oname->name + 1); 338 ret = digest_encode(buf, 24, oname->name + 1);
339 oname->len = ret + 1; 339 oname->len = ret + 1;
diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c
index 5de2d866a25c..18595d7a0efc 100644
--- a/fs/f2fs/crypto_key.c
+++ b/fs/f2fs/crypto_key.c
@@ -92,7 +92,6 @@ static void f2fs_free_crypt_info(struct f2fs_crypt_info *ci)
92 if (!ci) 92 if (!ci)
93 return; 93 return;
94 94
95 key_put(ci->ci_keyring_key);
96 crypto_free_ablkcipher(ci->ci_ctfm); 95 crypto_free_ablkcipher(ci->ci_ctfm);
97 kmem_cache_free(f2fs_crypt_info_cachep, ci); 96 kmem_cache_free(f2fs_crypt_info_cachep, ci);
98} 97}
@@ -113,7 +112,7 @@ void f2fs_free_encryption_info(struct inode *inode, struct f2fs_crypt_info *ci)
113 f2fs_free_crypt_info(ci); 112 f2fs_free_crypt_info(ci);
114} 113}
115 114
116int _f2fs_get_encryption_info(struct inode *inode) 115int f2fs_get_encryption_info(struct inode *inode)
117{ 116{
118 struct f2fs_inode_info *fi = F2FS_I(inode); 117 struct f2fs_inode_info *fi = F2FS_I(inode);
119 struct f2fs_crypt_info *crypt_info; 118 struct f2fs_crypt_info *crypt_info;
@@ -129,18 +128,12 @@ int _f2fs_get_encryption_info(struct inode *inode)
129 char mode; 128 char mode;
130 int res; 129 int res;
131 130
131 if (fi->i_crypt_info)
132 return 0;
133
132 res = f2fs_crypto_initialize(); 134 res = f2fs_crypto_initialize();
133 if (res) 135 if (res)
134 return res; 136 return res;
135retry:
136 crypt_info = ACCESS_ONCE(fi->i_crypt_info);
137 if (crypt_info) {
138 if (!crypt_info->ci_keyring_key ||
139 key_validate(crypt_info->ci_keyring_key) == 0)
140 return 0;
141 f2fs_free_encryption_info(inode, crypt_info);
142 goto retry;
143 }
144 137
145 res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, 138 res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
146 F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, 139 F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
@@ -159,7 +152,6 @@ retry:
159 crypt_info->ci_data_mode = ctx.contents_encryption_mode; 152 crypt_info->ci_data_mode = ctx.contents_encryption_mode;
160 crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; 153 crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
161 crypt_info->ci_ctfm = NULL; 154 crypt_info->ci_ctfm = NULL;
162 crypt_info->ci_keyring_key = NULL;
163 memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, 155 memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
164 sizeof(crypt_info->ci_master_key)); 156 sizeof(crypt_info->ci_master_key));
165 if (S_ISREG(inode->i_mode)) 157 if (S_ISREG(inode->i_mode))
@@ -197,7 +189,6 @@ retry:
197 keyring_key = NULL; 189 keyring_key = NULL;
198 goto out; 190 goto out;
199 } 191 }
200 crypt_info->ci_keyring_key = keyring_key;
201 BUG_ON(keyring_key->type != &key_type_logon); 192 BUG_ON(keyring_key->type != &key_type_logon);
202 ukp = user_key_payload(keyring_key); 193 ukp = user_key_payload(keyring_key);
203 if (ukp->datalen != sizeof(struct f2fs_encryption_key)) { 194 if (ukp->datalen != sizeof(struct f2fs_encryption_key)) {
@@ -230,17 +221,12 @@ retry:
230 if (res) 221 if (res)
231 goto out; 222 goto out;
232 223
233 memzero_explicit(raw_key, sizeof(raw_key)); 224 if (cmpxchg(&fi->i_crypt_info, NULL, crypt_info) == NULL)
234 if (cmpxchg(&fi->i_crypt_info, NULL, crypt_info) != NULL) { 225 crypt_info = NULL;
235 f2fs_free_crypt_info(crypt_info);
236 goto retry;
237 }
238 return 0;
239
240out: 226out:
241 if (res == -ENOKEY && !S_ISREG(inode->i_mode)) 227 if (res == -ENOKEY && !S_ISREG(inode->i_mode))
242 res = 0; 228 res = 0;
243 229 key_put(keyring_key);
244 f2fs_free_crypt_info(crypt_info); 230 f2fs_free_crypt_info(crypt_info);
245 memzero_explicit(raw_key, sizeof(raw_key)); 231 memzero_explicit(raw_key, sizeof(raw_key));
246 return res; 232 return res;
diff --git a/fs/f2fs/crypto_policy.c b/fs/f2fs/crypto_policy.c
index e504f548b64e..884f3f0fe29d 100644
--- a/fs/f2fs/crypto_policy.c
+++ b/fs/f2fs/crypto_policy.c
@@ -141,20 +141,38 @@ int f2fs_get_policy(struct inode *inode, struct f2fs_encryption_policy *policy)
141int f2fs_is_child_context_consistent_with_parent(struct inode *parent, 141int f2fs_is_child_context_consistent_with_parent(struct inode *parent,
142 struct inode *child) 142 struct inode *child)
143{ 143{
144 struct f2fs_crypt_info *parent_ci, *child_ci; 144 const struct f2fs_crypt_info *parent_ci, *child_ci;
145 struct f2fs_encryption_context parent_ctx, child_ctx;
145 int res; 146 int res;
146 147
147 if ((parent == NULL) || (child == NULL)) { 148 /* No restrictions on file types which are never encrypted */
148 pr_err("parent %p child %p\n", parent, child); 149 if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) &&
149 BUG_ON(1); 150 !S_ISLNK(child->i_mode))
150 } 151 return 1;
151 152
152 /* no restrictions if the parent directory is not encrypted */ 153 /* No restrictions if the parent directory is unencrypted */
153 if (!f2fs_encrypted_inode(parent)) 154 if (!f2fs_encrypted_inode(parent))
154 return 1; 155 return 1;
155 /* if the child directory is not encrypted, this is always a problem */ 156
157 /* Encrypted directories must not contain unencrypted files */
156 if (!f2fs_encrypted_inode(child)) 158 if (!f2fs_encrypted_inode(child))
157 return 0; 159 return 0;
160
161 /*
162 * Both parent and child are encrypted, so verify they use the same
163 * encryption policy. Compare the fscrypt_info structs if the keys are
164 * available, otherwise retrieve and compare the fscrypt_contexts.
165 *
166 * Note that the fscrypt_context retrieval will be required frequently
167 * when accessing an encrypted directory tree without the key.
168 * Performance-wise this is not a big deal because we already don't
169 * really optimize for file access without the key (to the extent that
170 * such access is even possible), given that any attempted access
171 * already causes a fscrypt_context retrieval and keyring search.
172 *
173 * In any case, if an unexpected error occurs, fall back to "forbidden".
174 */
175
158 res = f2fs_get_encryption_info(parent); 176 res = f2fs_get_encryption_info(parent);
159 if (res) 177 if (res)
160 return 0; 178 return 0;
@@ -163,17 +181,35 @@ int f2fs_is_child_context_consistent_with_parent(struct inode *parent,
163 return 0; 181 return 0;
164 parent_ci = F2FS_I(parent)->i_crypt_info; 182 parent_ci = F2FS_I(parent)->i_crypt_info;
165 child_ci = F2FS_I(child)->i_crypt_info; 183 child_ci = F2FS_I(child)->i_crypt_info;
166 if (!parent_ci && !child_ci) 184 if (parent_ci && child_ci) {
167 return 1; 185 return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key,
168 if (!parent_ci || !child_ci) 186 F2FS_KEY_DESCRIPTOR_SIZE) == 0 &&
187 (parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
188 (parent_ci->ci_filename_mode ==
189 child_ci->ci_filename_mode) &&
190 (parent_ci->ci_flags == child_ci->ci_flags);
191 }
192
193 res = f2fs_getxattr(parent, F2FS_XATTR_INDEX_ENCRYPTION,
194 F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
195 &parent_ctx, sizeof(parent_ctx), NULL);
196 if (res != sizeof(parent_ctx))
197 return 0;
198
199 res = f2fs_getxattr(child, F2FS_XATTR_INDEX_ENCRYPTION,
200 F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
201 &child_ctx, sizeof(child_ctx), NULL);
202 if (res != sizeof(child_ctx))
169 return 0; 203 return 0;
170 204
171 return (memcmp(parent_ci->ci_master_key, 205 return memcmp(parent_ctx.master_key_descriptor,
172 child_ci->ci_master_key, 206 child_ctx.master_key_descriptor,
173 F2FS_KEY_DESCRIPTOR_SIZE) == 0 && 207 F2FS_KEY_DESCRIPTOR_SIZE) == 0 &&
174 (parent_ci->ci_data_mode == child_ci->ci_data_mode) && 208 (parent_ctx.contents_encryption_mode ==
175 (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) && 209 child_ctx.contents_encryption_mode) &&
176 (parent_ci->ci_flags == child_ci->ci_flags)); 210 (parent_ctx.filenames_encryption_mode ==
211 child_ctx.filenames_encryption_mode) &&
212 (parent_ctx.flags == child_ctx.flags);
177} 213}
178 214
179/** 215/**
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index e692958d6e78..8936044dee4c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1402,8 +1402,16 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
1402 struct dnode_of_data dn; 1402 struct dnode_of_data dn;
1403 int err = 0; 1403 int err = 0;
1404 1404
1405 trace_android_fs_datawrite_start(inode, pos, len, 1405 if (trace_android_fs_datawrite_start_enabled()) {
1406 current->pid, current->comm); 1406 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
1407
1408 path = android_fstrace_get_pathname(pathbuf,
1409 MAX_TRACE_PATHBUF_LEN,
1410 inode);
1411 trace_android_fs_datawrite_start(inode, pos, len,
1412 current->pid, path,
1413 current->comm);
1414 }
1407 trace_f2fs_write_begin(inode, pos, len, flags); 1415 trace_f2fs_write_begin(inode, pos, len, flags);
1408 1416
1409 f2fs_balance_fs(sbi); 1417 f2fs_balance_fs(sbi);
@@ -1587,15 +1595,27 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
1587 trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); 1595 trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
1588 1596
1589 if (trace_android_fs_dataread_start_enabled() && 1597 if (trace_android_fs_dataread_start_enabled() &&
1590 (iov_iter_rw(iter) == READ)) 1598 (iov_iter_rw(iter) == READ)) {
1599 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
1600
1601 path = android_fstrace_get_pathname(pathbuf,
1602 MAX_TRACE_PATHBUF_LEN,
1603 inode);
1591 trace_android_fs_dataread_start(inode, offset, 1604 trace_android_fs_dataread_start(inode, offset,
1592 count, current->pid, 1605 count, current->pid, path,
1593 current->comm); 1606 current->comm);
1607 }
1594 if (trace_android_fs_datawrite_start_enabled() && 1608 if (trace_android_fs_datawrite_start_enabled() &&
1595 (iov_iter_rw(iter) == WRITE)) 1609 (iov_iter_rw(iter) == WRITE)) {
1596 trace_android_fs_datawrite_start(inode, offset, count, 1610 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
1597 current->pid, current->comm);
1598 1611
1612 path = android_fstrace_get_pathname(pathbuf,
1613 MAX_TRACE_PATHBUF_LEN,
1614 inode);
1615 trace_android_fs_datawrite_start(inode, offset, count,
1616 current->pid, path,
1617 current->comm);
1618 }
1599 if (iov_iter_rw(iter) == WRITE) { 1619 if (iov_iter_rw(iter) == WRITE) {
1600 __allocate_data_blocks(inode, offset, count); 1620 __allocate_data_blocks(inode, offset, count);
1601 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { 1621 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 7c1678ba8f92..60972a559685 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -124,19 +124,29 @@ struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *fname,
124 124
125 de = &d->dentry[bit_pos]; 125 de = &d->dentry[bit_pos];
126 126
127 /* encrypted case */ 127 if (de->hash_code != namehash)
128 goto not_match;
129
128 de_name.name = d->filename[bit_pos]; 130 de_name.name = d->filename[bit_pos];
129 de_name.len = le16_to_cpu(de->name_len); 131 de_name.len = le16_to_cpu(de->name_len);
130 132
131 /* show encrypted name */ 133#ifdef CONFIG_F2FS_FS_ENCRYPTION
132 if (fname->hash) { 134 if (unlikely(!name->name)) {
133 if (de->hash_code == fname->hash) 135 if (fname->usr_fname->name[0] == '_') {
134 goto found; 136 if (de_name.len > 32 &&
135 } else if (de_name.len == name->len && 137 !memcmp(de_name.name + ((de_name.len - 17) & ~15),
136 de->hash_code == namehash && 138 fname->crypto_buf.name + 8, 16))
137 !memcmp(de_name.name, name->name, name->len)) 139 goto found;
140 goto not_match;
141 }
142 name->name = fname->crypto_buf.name;
143 name->len = fname->crypto_buf.len;
144 }
145#endif
146 if (de_name.len == name->len &&
147 !memcmp(de_name.name, name->name, name->len))
138 goto found; 148 goto found;
139 149not_match:
140 if (max_slots && max_len > *max_slots) 150 if (max_slots && max_len > *max_slots)
141 *max_slots = max_len; 151 *max_slots = max_len;
142 max_len = 0; 152 max_len = 0;
@@ -170,7 +180,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
170 int max_slots; 180 int max_slots;
171 f2fs_hash_t namehash; 181 f2fs_hash_t namehash;
172 182
173 namehash = f2fs_dentry_hash(&name); 183 namehash = f2fs_dentry_hash(&name, fname);
174 184
175 f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH); 185 f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
176 186
@@ -547,7 +557,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
547 557
548 level = 0; 558 level = 0;
549 slots = GET_DENTRY_SLOTS(new_name.len); 559 slots = GET_DENTRY_SLOTS(new_name.len);
550 dentry_hash = f2fs_dentry_hash(&new_name); 560 dentry_hash = f2fs_dentry_hash(&new_name, NULL);
551 561
552 current_depth = F2FS_I(dir)->i_current_depth; 562 current_depth = F2FS_I(dir)->i_current_depth;
553 if (F2FS_I(dir)->chash == dentry_hash) { 563 if (F2FS_I(dir)->chash == dentry_hash) {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9db5500d63d9..2871576fbca4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1722,7 +1722,8 @@ void f2fs_msg(struct super_block *, const char *, const char *, ...);
1722/* 1722/*
1723 * hash.c 1723 * hash.c
1724 */ 1724 */
1725f2fs_hash_t f2fs_dentry_hash(const struct qstr *); 1725f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
1726 struct f2fs_filename *fname);
1726 1727
1727/* 1728/*
1728 * node.c 1729 * node.c
@@ -2149,7 +2150,6 @@ void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *, struct bio *);
2149 2150
2150/* crypto_key.c */ 2151/* crypto_key.c */
2151void f2fs_free_encryption_info(struct inode *, struct f2fs_crypt_info *); 2152void f2fs_free_encryption_info(struct inode *, struct f2fs_crypt_info *);
2152int _f2fs_get_encryption_info(struct inode *inode);
2153 2153
2154/* crypto_fname.c */ 2154/* crypto_fname.c */
2155bool f2fs_valid_filenames_enc_mode(uint32_t); 2155bool f2fs_valid_filenames_enc_mode(uint32_t);
@@ -2170,18 +2170,7 @@ void f2fs_exit_crypto(void);
2170 2170
2171int f2fs_has_encryption_key(struct inode *); 2171int f2fs_has_encryption_key(struct inode *);
2172 2172
2173static inline int f2fs_get_encryption_info(struct inode *inode) 2173int f2fs_get_encryption_info(struct inode *inode);
2174{
2175 struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
2176
2177 if (!ci ||
2178 (ci->ci_keyring_key &&
2179 (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
2180 (1 << KEY_FLAG_REVOKED) |
2181 (1 << KEY_FLAG_DEAD)))))
2182 return _f2fs_get_encryption_info(inode);
2183 return 0;
2184}
2185 2174
2186void f2fs_fname_crypto_free_buffer(struct f2fs_str *); 2175void f2fs_fname_crypto_free_buffer(struct f2fs_str *);
2187int f2fs_fname_setup_filename(struct inode *, const struct qstr *, 2176int f2fs_fname_setup_filename(struct inode *, const struct qstr *,
diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h
index c2c1c2b63b25..f113f1a1e8c1 100644
--- a/fs/f2fs/f2fs_crypto.h
+++ b/fs/f2fs/f2fs_crypto.h
@@ -79,7 +79,6 @@ struct f2fs_crypt_info {
79 char ci_filename_mode; 79 char ci_filename_mode;
80 char ci_flags; 80 char ci_flags;
81 struct crypto_ablkcipher *ci_ctfm; 81 struct crypto_ablkcipher *ci_ctfm;
82 struct key *ci_keyring_key;
83 char ci_master_key[F2FS_KEY_DESCRIPTOR_SIZE]; 82 char ci_master_key[F2FS_KEY_DESCRIPTOR_SIZE];
84}; 83};
85 84
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index a197215ad52b..4b449d263333 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1535,12 +1535,19 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
1535#ifdef CONFIG_F2FS_FS_ENCRYPTION 1535#ifdef CONFIG_F2FS_FS_ENCRYPTION
1536 struct f2fs_encryption_policy policy; 1536 struct f2fs_encryption_policy policy;
1537 struct inode *inode = file_inode(filp); 1537 struct inode *inode = file_inode(filp);
1538 int err;
1538 1539
1539 if (copy_from_user(&policy, (struct f2fs_encryption_policy __user *)arg, 1540 if (copy_from_user(&policy, (struct f2fs_encryption_policy __user *)arg,
1540 sizeof(policy))) 1541 sizeof(policy)))
1541 return -EFAULT; 1542 return -EFAULT;
1542 1543
1543 return f2fs_process_policy(&policy, inode); 1544 mutex_lock(&inode->i_mutex);
1545
1546 err = f2fs_process_policy(&policy, inode);
1547
1548 mutex_unlock(&inode->i_mutex);
1549
1550 return err;
1544#else 1551#else
1545 return -EOPNOTSUPP; 1552 return -EOPNOTSUPP;
1546#endif 1553#endif
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 71b7206c431e..b238d2fec3e5 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -70,7 +70,8 @@ static void str2hashbuf(const unsigned char *msg, size_t len,
70 *buf++ = pad; 70 *buf++ = pad;
71} 71}
72 72
73f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) 73f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
74 struct f2fs_filename *fname)
74{ 75{
75 __u32 hash; 76 __u32 hash;
76 f2fs_hash_t f2fs_hash; 77 f2fs_hash_t f2fs_hash;
@@ -79,6 +80,10 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
79 const unsigned char *name = name_info->name; 80 const unsigned char *name = name_info->name;
80 size_t len = name_info->len; 81 size_t len = name_info->len;
81 82
83 /* encrypted bigname case */
84 if (fname && !fname->disk_name.name)
85 return cpu_to_le32(fname->hash);
86
82 if (is_dot_dotdot(name_info)) 87 if (is_dot_dotdot(name_info))
83 return 0; 88 return 0;
84 89
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index d2c5d69ba0b1..f35f3eb3541f 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -85,9 +85,16 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
85{ 85{
86 struct page *ipage; 86 struct page *ipage;
87 87
88 trace_android_fs_dataread_start(inode, page_offset(page), 88 if (trace_android_fs_dataread_start_enabled()) {
89 PAGE_SIZE, current->pid, 89 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
90 current->comm); 90
91 path = android_fstrace_get_pathname(pathbuf,
92 MAX_TRACE_PATHBUF_LEN,
93 inode);
94 trace_android_fs_dataread_start(inode, page_offset(page),
95 PAGE_SIZE, current->pid,
96 path, current->comm);
97 }
91 98
92 ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); 99 ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
93 if (IS_ERR(ipage)) { 100 if (IS_ERR(ipage)) {
@@ -314,7 +321,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
314 if (IS_ERR(ipage)) 321 if (IS_ERR(ipage))
315 return NULL; 322 return NULL;
316 323
317 namehash = f2fs_dentry_hash(&name); 324 namehash = f2fs_dentry_hash(&name, fname);
318 325
319 inline_dentry = inline_data_addr(ipage); 326 inline_dentry = inline_data_addr(ipage);
320 327
@@ -479,7 +486,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
479 486
480 f2fs_wait_on_page_writeback(ipage, NODE); 487 f2fs_wait_on_page_writeback(ipage, NODE);
481 488
482 name_hash = f2fs_dentry_hash(name); 489 name_hash = f2fs_dentry_hash(name, NULL);
483 make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); 490 make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
484 f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos); 491 f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos);
485 492
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3a65e0132352..4f666368aa85 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -918,6 +918,79 @@ static loff_t max_file_size(unsigned bits)
918 return result; 918 return result;
919} 919}
920 920
921static inline bool sanity_check_area_boundary(struct super_block *sb,
922 struct f2fs_super_block *raw_super)
923{
924 u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
925 u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
926 u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr);
927 u32 nat_blkaddr = le32_to_cpu(raw_super->nat_blkaddr);
928 u32 ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
929 u32 main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
930 u32 segment_count_ckpt = le32_to_cpu(raw_super->segment_count_ckpt);
931 u32 segment_count_sit = le32_to_cpu(raw_super->segment_count_sit);
932 u32 segment_count_nat = le32_to_cpu(raw_super->segment_count_nat);
933 u32 segment_count_ssa = le32_to_cpu(raw_super->segment_count_ssa);
934 u32 segment_count_main = le32_to_cpu(raw_super->segment_count_main);
935 u32 segment_count = le32_to_cpu(raw_super->segment_count);
936 u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
937
938 if (segment0_blkaddr != cp_blkaddr) {
939 f2fs_msg(sb, KERN_INFO,
940 "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
941 segment0_blkaddr, cp_blkaddr);
942 return true;
943 }
944
945 if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
946 sit_blkaddr) {
947 f2fs_msg(sb, KERN_INFO,
948 "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
949 cp_blkaddr, sit_blkaddr,
950 segment_count_ckpt << log_blocks_per_seg);
951 return true;
952 }
953
954 if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
955 nat_blkaddr) {
956 f2fs_msg(sb, KERN_INFO,
957 "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
958 sit_blkaddr, nat_blkaddr,
959 segment_count_sit << log_blocks_per_seg);
960 return true;
961 }
962
963 if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
964 ssa_blkaddr) {
965 f2fs_msg(sb, KERN_INFO,
966 "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
967 nat_blkaddr, ssa_blkaddr,
968 segment_count_nat << log_blocks_per_seg);
969 return true;
970 }
971
972 if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
973 main_blkaddr) {
974 f2fs_msg(sb, KERN_INFO,
975 "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
976 ssa_blkaddr, main_blkaddr,
977 segment_count_ssa << log_blocks_per_seg);
978 return true;
979 }
980
981 if (main_blkaddr + (segment_count_main << log_blocks_per_seg) !=
982 segment0_blkaddr + (segment_count << log_blocks_per_seg)) {
983 f2fs_msg(sb, KERN_INFO,
984 "Wrong MAIN_AREA boundary, start(%u) end(%u) blocks(%u)",
985 main_blkaddr,
986 segment0_blkaddr + (segment_count << log_blocks_per_seg),
987 segment_count_main << log_blocks_per_seg);
988 return true;
989 }
990
991 return false;
992}
993
921static int sanity_check_raw_super(struct super_block *sb, 994static int sanity_check_raw_super(struct super_block *sb,
922 struct f2fs_super_block *raw_super) 995 struct f2fs_super_block *raw_super)
923{ 996{
@@ -947,6 +1020,14 @@ static int sanity_check_raw_super(struct super_block *sb,
947 return 1; 1020 return 1;
948 } 1021 }
949 1022
1023 /* check log blocks per segment */
1024 if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
1025 f2fs_msg(sb, KERN_INFO,
1026 "Invalid log blocks per segment (%u)\n",
1027 le32_to_cpu(raw_super->log_blocks_per_seg));
1028 return 1;
1029 }
1030
950 /* Currently, support 512/1024/2048/4096 bytes sector size */ 1031 /* Currently, support 512/1024/2048/4096 bytes sector size */
951 if (le32_to_cpu(raw_super->log_sectorsize) > 1032 if (le32_to_cpu(raw_super->log_sectorsize) >
952 F2FS_MAX_LOG_SECTOR_SIZE || 1033 F2FS_MAX_LOG_SECTOR_SIZE ||
@@ -965,6 +1046,30 @@ static int sanity_check_raw_super(struct super_block *sb,
965 le32_to_cpu(raw_super->log_sectorsize)); 1046 le32_to_cpu(raw_super->log_sectorsize));
966 return 1; 1047 return 1;
967 } 1048 }
1049
1050 /* check reserved ino info */
1051 if (le32_to_cpu(raw_super->node_ino) != 1 ||
1052 le32_to_cpu(raw_super->meta_ino) != 2 ||
1053 le32_to_cpu(raw_super->root_ino) != 3) {
1054 f2fs_msg(sb, KERN_INFO,
1055 "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
1056 le32_to_cpu(raw_super->node_ino),
1057 le32_to_cpu(raw_super->meta_ino),
1058 le32_to_cpu(raw_super->root_ino));
1059 return 1;
1060 }
1061
1062 if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) {
1063 f2fs_msg(sb, KERN_INFO,
1064 "Invalid segment count (%u)",
1065 le32_to_cpu(raw_super->segment_count));
1066 return 1;
1067 }
1068
1069 /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
1070 if (sanity_check_area_boundary(sb, raw_super))
1071 return 1;
1072
968 return 0; 1073 return 0;
969} 1074}
970 1075
@@ -973,6 +1078,8 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
973 unsigned int total, fsmeta; 1078 unsigned int total, fsmeta;
974 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 1079 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
975 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1080 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1081 unsigned int main_segs, blocks_per_seg;
1082 int i;
976 1083
977 total = le32_to_cpu(raw_super->segment_count); 1084 total = le32_to_cpu(raw_super->segment_count);
978 fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); 1085 fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
@@ -984,6 +1091,20 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
984 if (unlikely(fsmeta >= total)) 1091 if (unlikely(fsmeta >= total))
985 return 1; 1092 return 1;
986 1093
1094 main_segs = le32_to_cpu(raw_super->segment_count_main);
1095 blocks_per_seg = sbi->blocks_per_seg;
1096
1097 for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
1098 if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
1099 le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg)
1100 return 1;
1101 }
1102 for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
1103 if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs ||
1104 le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg)
1105 return 1;
1106 }
1107
987 if (unlikely(f2fs_cp_error(sbi))) { 1108 if (unlikely(f2fs_cp_error(sbi))) {
988 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); 1109 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
989 return 1; 1110 return 1;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 509411dd3698..cf644d52c0cf 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1269,6 +1269,16 @@ out:
1269 return 0; 1269 return 0;
1270} 1270}
1271 1271
1272static void fat_dummy_inode_init(struct inode *inode)
1273{
1274 /* Initialize this dummy inode to work as no-op. */
1275 MSDOS_I(inode)->mmu_private = 0;
1276 MSDOS_I(inode)->i_start = 0;
1277 MSDOS_I(inode)->i_logstart = 0;
1278 MSDOS_I(inode)->i_attrs = 0;
1279 MSDOS_I(inode)->i_pos = 0;
1280}
1281
1272static int fat_read_root(struct inode *inode) 1282static int fat_read_root(struct inode *inode)
1273{ 1283{
1274 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); 1284 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
@@ -1713,12 +1723,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1713 fat_inode = new_inode(sb); 1723 fat_inode = new_inode(sb);
1714 if (!fat_inode) 1724 if (!fat_inode)
1715 goto out_fail; 1725 goto out_fail;
1716 MSDOS_I(fat_inode)->i_pos = 0; 1726 fat_dummy_inode_init(fat_inode);
1717 sbi->fat_inode = fat_inode; 1727 sbi->fat_inode = fat_inode;
1718 1728
1719 fsinfo_inode = new_inode(sb); 1729 fsinfo_inode = new_inode(sb);
1720 if (!fsinfo_inode) 1730 if (!fsinfo_inode)
1721 goto out_fail; 1731 goto out_fail;
1732 fat_dummy_inode_init(fsinfo_inode);
1722 fsinfo_inode->i_ino = MSDOS_FSINFO_INO; 1733 fsinfo_inode->i_ino = MSDOS_FSINFO_INO;
1723 sbi->fsinfo_inode = fsinfo_inode; 1734 sbi->fsinfo_inode = fsinfo_inode;
1724 insert_inode_hash(fsinfo_inode); 1735 insert_inode_hash(fsinfo_inode);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index ee85cd4e136a..62376451bbce 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -740,16 +740,10 @@ static int __init fcntl_init(void)
740 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 740 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
741 * is defined as O_NONBLOCK on some platforms and not on others. 741 * is defined as O_NONBLOCK on some platforms and not on others.
742 */ 742 */
743 BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( 743 BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
744 O_RDONLY | O_WRONLY | O_RDWR | 744 HWEIGHT32(
745 O_CREAT | O_EXCL | O_NOCTTY | 745 (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
746 O_TRUNC | O_APPEND | /* O_NONBLOCK | */ 746 __FMODE_EXEC | __FMODE_NONOTIFY));
747 __O_SYNC | O_DSYNC | FASYNC |
748 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
749 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
750 __FMODE_EXEC | O_PATH | __O_TMPFILE |
751 __FMODE_NONOTIFY
752 ));
753 747
754 fasync_cache = kmem_cache_create("fasync_cache", 748 fasync_cache = kmem_cache_create("fasync_cache",
755 sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); 749 sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 7dca743b2ce1..940c683561dd 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -44,6 +44,7 @@ void set_fs_pwd(struct fs_struct *fs, const struct path *path)
44 if (old_pwd.dentry) 44 if (old_pwd.dentry)
45 path_put(&old_pwd); 45 path_put(&old_pwd);
46} 46}
47EXPORT_SYMBOL(set_fs_pwd);
47 48
48static inline int replace_path(struct path *p, const struct path *old, const struct path *new) 49static inline int replace_path(struct path *p, const struct path *old, const struct path *new)
49{ 50{
@@ -89,6 +90,7 @@ void free_fs_struct(struct fs_struct *fs)
89 path_put(&fs->pwd); 90 path_put(&fs->pwd);
90 kmem_cache_free(fs_cachep, fs); 91 kmem_cache_free(fs_cachep, fs);
91} 92}
93EXPORT_SYMBOL(free_fs_struct);
92 94
93void exit_fs(struct task_struct *tsk) 95void exit_fs(struct task_struct *tsk)
94{ 96{
@@ -127,6 +129,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
127 } 129 }
128 return fs; 130 return fs;
129} 131}
132EXPORT_SYMBOL_GPL(copy_fs_struct);
130 133
131int unshare_fs_struct(void) 134int unshare_fs_struct(void)
132{ 135{
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 4304072161aa..40d61077bead 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -542,6 +542,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
542 hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { 542 hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) {
543 if (invalidate) 543 if (invalidate)
544 set_bit(FSCACHE_OBJECT_RETIRED, &object->flags); 544 set_bit(FSCACHE_OBJECT_RETIRED, &object->flags);
545 clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
545 fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL); 546 fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL);
546 } 547 }
547 } else { 548 } else {
@@ -560,6 +561,10 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
560 wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t, 561 wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t,
561 TASK_UNINTERRUPTIBLE); 562 TASK_UNINTERRUPTIBLE);
562 563
564 /* Make sure any pending writes are cancelled. */
565 if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX)
566 fscache_invalidate_writes(cookie);
567
563 /* Reset the cookie state if it wasn't relinquished */ 568 /* Reset the cookie state if it wasn't relinquished */
564 if (!test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags)) { 569 if (!test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags)) {
565 atomic_inc(&cookie->n_active); 570 atomic_inc(&cookie->n_active);
diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c
index 9b28649df3a1..a8aa00be4444 100644
--- a/fs/fscache/netfs.c
+++ b/fs/fscache/netfs.c
@@ -48,6 +48,7 @@ int __fscache_register_netfs(struct fscache_netfs *netfs)
48 cookie->flags = 1 << FSCACHE_COOKIE_ENABLED; 48 cookie->flags = 1 << FSCACHE_COOKIE_ENABLED;
49 49
50 spin_lock_init(&cookie->lock); 50 spin_lock_init(&cookie->lock);
51 spin_lock_init(&cookie->stores_lock);
51 INIT_HLIST_HEAD(&cookie->backing_objects); 52 INIT_HLIST_HEAD(&cookie->backing_objects);
52 53
53 /* check the netfs type is not already present */ 54 /* check the netfs type is not already present */
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 9e792e30f4db..7a182c87f378 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -30,6 +30,7 @@ static const struct fscache_state *fscache_look_up_object(struct fscache_object
30static const struct fscache_state *fscache_object_available(struct fscache_object *, int); 30static const struct fscache_state *fscache_object_available(struct fscache_object *, int);
31static const struct fscache_state *fscache_parent_ready(struct fscache_object *, int); 31static const struct fscache_state *fscache_parent_ready(struct fscache_object *, int);
32static const struct fscache_state *fscache_update_object(struct fscache_object *, int); 32static const struct fscache_state *fscache_update_object(struct fscache_object *, int);
33static const struct fscache_state *fscache_object_dead(struct fscache_object *, int);
33 34
34#define __STATE_NAME(n) fscache_osm_##n 35#define __STATE_NAME(n) fscache_osm_##n
35#define STATE(n) (&__STATE_NAME(n)) 36#define STATE(n) (&__STATE_NAME(n))
@@ -91,7 +92,7 @@ static WORK_STATE(LOOKUP_FAILURE, "LCFL", fscache_lookup_failure);
91static WORK_STATE(KILL_OBJECT, "KILL", fscache_kill_object); 92static WORK_STATE(KILL_OBJECT, "KILL", fscache_kill_object);
92static WORK_STATE(KILL_DEPENDENTS, "KDEP", fscache_kill_dependents); 93static WORK_STATE(KILL_DEPENDENTS, "KDEP", fscache_kill_dependents);
93static WORK_STATE(DROP_OBJECT, "DROP", fscache_drop_object); 94static WORK_STATE(DROP_OBJECT, "DROP", fscache_drop_object);
94static WORK_STATE(OBJECT_DEAD, "DEAD", (void*)2UL); 95static WORK_STATE(OBJECT_DEAD, "DEAD", fscache_object_dead);
95 96
96static WAIT_STATE(WAIT_FOR_INIT, "?INI", 97static WAIT_STATE(WAIT_FOR_INIT, "?INI",
97 TRANSIT_TO(INIT_OBJECT, 1 << FSCACHE_OBJECT_EV_NEW_CHILD)); 98 TRANSIT_TO(INIT_OBJECT, 1 << FSCACHE_OBJECT_EV_NEW_CHILD));
@@ -229,6 +230,10 @@ execute_work_state:
229 event = -1; 230 event = -1;
230 if (new_state == NO_TRANSIT) { 231 if (new_state == NO_TRANSIT) {
231 _debug("{OBJ%x} %s notrans", object->debug_id, state->name); 232 _debug("{OBJ%x} %s notrans", object->debug_id, state->name);
233 if (unlikely(state == STATE(OBJECT_DEAD))) {
234 _leave(" [dead]");
235 return;
236 }
232 fscache_enqueue_object(object); 237 fscache_enqueue_object(object);
233 event_mask = object->oob_event_mask; 238 event_mask = object->oob_event_mask;
234 goto unmask_events; 239 goto unmask_events;
@@ -239,7 +244,7 @@ execute_work_state:
239 object->state = state = new_state; 244 object->state = state = new_state;
240 245
241 if (state->work) { 246 if (state->work) {
242 if (unlikely(state->work == ((void *)2UL))) { 247 if (unlikely(state == STATE(OBJECT_DEAD))) {
243 _leave(" [dead]"); 248 _leave(" [dead]");
244 return; 249 return;
245 } 250 }
@@ -645,6 +650,12 @@ static const struct fscache_state *fscache_kill_object(struct fscache_object *ob
645 fscache_mark_object_dead(object); 650 fscache_mark_object_dead(object);
646 object->oob_event_mask = 0; 651 object->oob_event_mask = 0;
647 652
653 if (test_bit(FSCACHE_OBJECT_RETIRED, &object->flags)) {
654 /* Reject any new read/write ops and abort any that are pending. */
655 clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
656 fscache_cancel_all_ops(object);
657 }
658
648 if (list_empty(&object->dependents) && 659 if (list_empty(&object->dependents) &&
649 object->n_ops == 0 && 660 object->n_ops == 0 &&
650 object->n_children == 0) 661 object->n_children == 0)
@@ -1077,3 +1088,20 @@ void fscache_object_mark_killed(struct fscache_object *object,
1077 } 1088 }
1078} 1089}
1079EXPORT_SYMBOL(fscache_object_mark_killed); 1090EXPORT_SYMBOL(fscache_object_mark_killed);
1091
1092/*
1093 * The object is dead. We can get here if an object gets queued by an event
1094 * that would lead to its death (such as EV_KILL) when the dispatcher is
1095 * already running (and so can be requeued) but hasn't yet cleared the event
1096 * mask.
1097 */
1098static const struct fscache_state *fscache_object_dead(struct fscache_object *object,
1099 int event)
1100{
1101 if (!test_and_set_bit(FSCACHE_OBJECT_RUN_AFTER_DEAD,
1102 &object->flags))
1103 return NO_TRANSIT;
1104
1105 WARN(true, "FS-Cache object redispatched after death");
1106 return NO_TRANSIT;
1107}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 7f18063f1655..fbfec06b054d 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -420,6 +420,10 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
420static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) 420static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
421{ 421{
422 spin_lock(&fiq->waitq.lock); 422 spin_lock(&fiq->waitq.lock);
423 if (test_bit(FR_FINISHED, &req->flags)) {
424 spin_unlock(&fiq->waitq.lock);
425 return;
426 }
423 if (list_empty(&req->intr_entry)) { 427 if (list_empty(&req->intr_entry)) {
424 list_add_tail(&req->intr_entry, &fiq->interrupts); 428 list_add_tail(&req->intr_entry, &fiq->interrupts);
425 wake_up_locked(&fiq->waitq); 429 wake_up_locked(&fiq->waitq);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 8821c380a71a..1a063cbfe503 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -46,7 +46,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
46{ 46{
47 struct fuse_file *ff; 47 struct fuse_file *ff;
48 48
49 ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); 49 ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL);
50 if (unlikely(!ff)) 50 if (unlikely(!ff))
51 return NULL; 51 return NULL;
52 52
@@ -100,6 +100,7 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
100 iput(req->misc.release.inode); 100 iput(req->misc.release.inode);
101 fuse_put_request(ff->fc, req); 101 fuse_put_request(ff->fc, req);
102 } else if (sync) { 102 } else if (sync) {
103 __set_bit(FR_FORCE, &req->flags);
103 __clear_bit(FR_BACKGROUND, &req->flags); 104 __clear_bit(FR_BACKGROUND, &req->flags);
104 fuse_request_send(ff->fc, req); 105 fuse_request_send(ff->fc, req);
105 iput(req->misc.release.inode); 106 iput(req->misc.release.inode);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index ad8a5b757cc7..a443c6e54412 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -760,7 +760,7 @@ static int get_first_leaf(struct gfs2_inode *dip, u32 index,
760 int error; 760 int error;
761 761
762 error = get_leaf_nr(dip, index, &leaf_no); 762 error = get_leaf_nr(dip, index, &leaf_no);
763 if (!error) 763 if (!IS_ERR_VALUE(error))
764 error = get_leaf(dip, leaf_no, bh_out); 764 error = get_leaf(dip, leaf_no, bh_out);
765 765
766 return error; 766 return error;
@@ -976,7 +976,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
976 976
977 index = name->hash >> (32 - dip->i_depth); 977 index = name->hash >> (32 - dip->i_depth);
978 error = get_leaf_nr(dip, index, &leaf_no); 978 error = get_leaf_nr(dip, index, &leaf_no);
979 if (error) 979 if (IS_ERR_VALUE(error))
980 return error; 980 return error;
981 981
982 /* Get the old leaf block */ 982 /* Get the old leaf block */
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 32e74710b1aa..070901e76653 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -80,9 +80,9 @@ static struct rhashtable_params ht_parms = {
80 80
81static struct rhashtable gl_hash_table; 81static struct rhashtable gl_hash_table;
82 82
83void gfs2_glock_free(struct gfs2_glock *gl) 83static void gfs2_glock_dealloc(struct rcu_head *rcu)
84{ 84{
85 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; 85 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
86 86
87 if (gl->gl_ops->go_flags & GLOF_ASPACE) { 87 if (gl->gl_ops->go_flags & GLOF_ASPACE) {
88 kmem_cache_free(gfs2_glock_aspace_cachep, gl); 88 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
@@ -90,6 +90,13 @@ void gfs2_glock_free(struct gfs2_glock *gl)
90 kfree(gl->gl_lksb.sb_lvbptr); 90 kfree(gl->gl_lksb.sb_lvbptr);
91 kmem_cache_free(gfs2_glock_cachep, gl); 91 kmem_cache_free(gfs2_glock_cachep, gl);
92 } 92 }
93}
94
95void gfs2_glock_free(struct gfs2_glock *gl)
96{
97 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
98
99 call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
93 if (atomic_dec_and_test(&sdp->sd_glock_disposal)) 100 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
94 wake_up(&sdp->sd_glock_wait); 101 wake_up(&sdp->sd_glock_wait);
95} 102}
@@ -651,9 +658,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
651 struct kmem_cache *cachep; 658 struct kmem_cache *cachep;
652 int ret, tries = 0; 659 int ret, tries = 0;
653 660
661 rcu_read_lock();
654 gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); 662 gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
655 if (gl && !lockref_get_not_dead(&gl->gl_lockref)) 663 if (gl && !lockref_get_not_dead(&gl->gl_lockref))
656 gl = NULL; 664 gl = NULL;
665 rcu_read_unlock();
657 666
658 *glp = gl; 667 *glp = gl;
659 if (gl) 668 if (gl)
@@ -721,15 +730,18 @@ again:
721 730
722 if (ret == -EEXIST) { 731 if (ret == -EEXIST) {
723 ret = 0; 732 ret = 0;
733 rcu_read_lock();
724 tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); 734 tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
725 if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) { 735 if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) {
726 if (++tries < 100) { 736 if (++tries < 100) {
737 rcu_read_unlock();
727 cond_resched(); 738 cond_resched();
728 goto again; 739 goto again;
729 } 740 }
730 tmp = NULL; 741 tmp = NULL;
731 ret = -ENOMEM; 742 ret = -ENOMEM;
732 } 743 }
744 rcu_read_unlock();
733 } else { 745 } else {
734 WARN_ON_ONCE(ret); 746 WARN_ON_ONCE(ret);
735 } 747 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index de7b4f97ac75..4a9077ec9313 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -207,7 +207,7 @@ struct lm_lockname {
207 struct gfs2_sbd *ln_sbd; 207 struct gfs2_sbd *ln_sbd;
208 u64 ln_number; 208 u64 ln_number;
209 unsigned int ln_type; 209 unsigned int ln_type;
210}; 210} __packed __aligned(sizeof(int));
211 211
212#define lm_name_equal(name1, name2) \ 212#define lm_name_equal(name1, name2) \
213 (((name1)->ln_number == (name2)->ln_number) && \ 213 (((name1)->ln_number == (name2)->ln_number) && \
@@ -367,6 +367,7 @@ struct gfs2_glock {
367 loff_t end; 367 loff_t end;
368 } gl_vm; 368 } gl_vm;
369 }; 369 };
370 struct rcu_head gl_rcu;
370 struct rhash_head gl_node; 371 struct rhash_head gl_node;
371}; 372};
372 373
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 595ebdb41846..a17da8b57fc6 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -191,7 +191,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
191 addr = ALIGN(addr, huge_page_size(h)); 191 addr = ALIGN(addr, huge_page_size(h));
192 vma = find_vma(mm, addr); 192 vma = find_vma(mm, addr);
193 if (TASK_SIZE - len >= addr && 193 if (TASK_SIZE - len >= addr &&
194 (!vma || addr + len <= vma->vm_start)) 194 (!vma || addr + len <= vm_start_gap(vma)))
195 return addr; 195 return addr;
196 } 196 }
197 197
diff --git a/fs/inode.c b/fs/inode.c
index 2c16b758831d..6a7234f0afea 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1721,7 +1721,7 @@ int dentry_needs_remove_privs(struct dentry *dentry)
1721} 1721}
1722EXPORT_SYMBOL(dentry_needs_remove_privs); 1722EXPORT_SYMBOL(dentry_needs_remove_privs);
1723 1723
1724static int __remove_privs(struct dentry *dentry, int kill) 1724static int __remove_privs(struct vfsmount *mnt, struct dentry *dentry, int kill)
1725{ 1725{
1726 struct iattr newattrs; 1726 struct iattr newattrs;
1727 1727
@@ -1730,7 +1730,7 @@ static int __remove_privs(struct dentry *dentry, int kill)
1730 * Note we call this on write, so notify_change will not 1730 * Note we call this on write, so notify_change will not
1731 * encounter any conflicting delegations: 1731 * encounter any conflicting delegations:
1732 */ 1732 */
1733 return notify_change(dentry, &newattrs, NULL); 1733 return notify_change2(mnt, dentry, &newattrs, NULL);
1734} 1734}
1735 1735
1736/* 1736/*
@@ -1752,7 +1752,7 @@ int file_remove_privs(struct file *file)
1752 if (kill < 0) 1752 if (kill < 0)
1753 return kill; 1753 return kill;
1754 if (kill) 1754 if (kill)
1755 error = __remove_privs(dentry, kill); 1755 error = __remove_privs(file->f_path.mnt, dentry, kill);
1756 if (!error) 1756 if (!error)
1757 inode_has_no_xattr(inode); 1757 inode_has_no_xattr(inode);
1758 1758
diff --git a/fs/internal.h b/fs/internal.h
index 71859c4d0b41..6387b35a1c0d 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -84,9 +84,11 @@ extern struct file *get_empty_filp(void);
84 * super.c 84 * super.c
85 */ 85 */
86extern int do_remount_sb(struct super_block *, int, void *, int); 86extern int do_remount_sb(struct super_block *, int, void *, int);
87extern int do_remount_sb2(struct vfsmount *, struct super_block *, int,
88 void *, int);
87extern bool trylock_super(struct super_block *sb); 89extern bool trylock_super(struct super_block *sb);
88extern struct dentry *mount_fs(struct file_system_type *, 90extern struct dentry *mount_fs(struct file_system_type *,
89 int, const char *, void *); 91 int, const char *, struct vfsmount *, void *);
90extern struct super_block *user_get_super(dev_t); 92extern struct super_block *user_get_super(dev_t);
91 93
92/* 94/*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index fa1b8e0dcacf..a2e724053919 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1876,7 +1876,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
1876 1876
1877 __blist_del_buffer(list, jh); 1877 __blist_del_buffer(list, jh);
1878 jh->b_jlist = BJ_None; 1878 jh->b_jlist = BJ_None;
1879 if (test_clear_buffer_jbddirty(bh)) 1879 if (transaction && is_journal_aborted(transaction->t_journal))
1880 clear_buffer_jbddirty(bh);
1881 else if (test_clear_buffer_jbddirty(bh))
1880 mark_buffer_dirty(bh); /* Expose it to the VM */ 1882 mark_buffer_dirty(bh); /* Expose it to the VM */
1881} 1883}
1882 1884
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 8f9176caf098..c8d58c5ac8ae 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -758,7 +758,7 @@ static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data,
758 sb->s_blocksize - offset : toread; 758 sb->s_blocksize - offset : toread;
759 759
760 tmp_bh.b_state = 0; 760 tmp_bh.b_state = 0;
761 tmp_bh.b_size = 1 << inode->i_blkbits; 761 tmp_bh.b_size = i_blocksize(inode);
762 err = jfs_get_block(inode, blk, &tmp_bh, 0); 762 err = jfs_get_block(inode, blk, &tmp_bh, 0);
763 if (err) 763 if (err)
764 return err; 764 return err;
@@ -798,7 +798,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
798 sb->s_blocksize - offset : towrite; 798 sb->s_blocksize - offset : towrite;
799 799
800 tmp_bh.b_state = 0; 800 tmp_bh.b_state = 0;
801 tmp_bh.b_size = 1 << inode->i_blkbits; 801 tmp_bh.b_size = i_blocksize(inode);
802 err = jfs_get_block(inode, blk, &tmp_bh, 1); 802 err = jfs_get_block(inode, blk, &tmp_bh, 1);
803 if (err) 803 if (err)
804 goto out; 804 goto out;
diff --git a/fs/mbcache2.c b/fs/mbcache2.c
new file mode 100644
index 000000000000..5c3e1a8c38f6
--- /dev/null
+++ b/fs/mbcache2.c
@@ -0,0 +1,359 @@
1#include <linux/spinlock.h>
2#include <linux/slab.h>
3#include <linux/list.h>
4#include <linux/list_bl.h>
5#include <linux/module.h>
6#include <linux/sched.h>
7#include <linux/mbcache2.h>
8
9/*
10 * Mbcache is a simple key-value store. Keys need not be unique, however
11 * key-value pairs are expected to be unique (we use this fact in
12 * mb2_cache_entry_delete_block()).
13 *
14 * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
15 * They use hash of a block contents as a key and block number as a value.
16 * That's why keys need not be unique (different xattr blocks may end up having
17 * the same hash). However block number always uniquely identifies a cache
18 * entry.
19 *
20 * We provide functions for creation and removal of entries, search by key,
21 * and a special "delete entry with given key-value pair" operation. Fixed
22 * size hash table is used for fast key lookups.
23 */
24
25struct mb2_cache {
26 /* Hash table of entries */
27 struct hlist_bl_head *c_hash;
28 /* log2 of hash table size */
29 int c_bucket_bits;
30 /* Protects c_lru_list, c_entry_count */
31 spinlock_t c_lru_list_lock;
32 struct list_head c_lru_list;
33 /* Number of entries in cache */
34 unsigned long c_entry_count;
35 struct shrinker c_shrink;
36};
37
38static struct kmem_cache *mb2_entry_cache;
39
40/*
41 * mb2_cache_entry_create - create entry in cache
42 * @cache - cache where the entry should be created
43 * @mask - gfp mask with which the entry should be allocated
44 * @key - key of the entry
45 * @block - block that contains data
46 *
47 * Creates entry in @cache with key @key and records that data is stored in
48 * block @block. The function returns -EBUSY if entry with the same key
49 * and for the same block already exists in cache. Otherwise 0 is returned.
50 */
51int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
52 sector_t block)
53{
54 struct mb2_cache_entry *entry, *dup;
55 struct hlist_bl_node *dup_node;
56 struct hlist_bl_head *head;
57
58 entry = kmem_cache_alloc(mb2_entry_cache, mask);
59 if (!entry)
60 return -ENOMEM;
61
62 INIT_LIST_HEAD(&entry->e_lru_list);
63 /* One ref for hash, one ref returned */
64 atomic_set(&entry->e_refcnt, 1);
65 entry->e_key = key;
66 entry->e_block = block;
67 head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
68 entry->e_hash_list_head = head;
69 hlist_bl_lock(head);
70 hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
71 if (dup->e_key == key && dup->e_block == block) {
72 hlist_bl_unlock(head);
73 kmem_cache_free(mb2_entry_cache, entry);
74 return -EBUSY;
75 }
76 }
77 hlist_bl_add_head(&entry->e_hash_list, head);
78 hlist_bl_unlock(head);
79
80 spin_lock(&cache->c_lru_list_lock);
81 list_add_tail(&entry->e_lru_list, &cache->c_lru_list);
82 /* Grab ref for LRU list */
83 atomic_inc(&entry->e_refcnt);
84 cache->c_entry_count++;
85 spin_unlock(&cache->c_lru_list_lock);
86
87 return 0;
88}
89EXPORT_SYMBOL(mb2_cache_entry_create);
90
91void __mb2_cache_entry_free(struct mb2_cache_entry *entry)
92{
93 kmem_cache_free(mb2_entry_cache, entry);
94}
95EXPORT_SYMBOL(__mb2_cache_entry_free);
96
97static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache,
98 struct mb2_cache_entry *entry,
99 u32 key)
100{
101 struct mb2_cache_entry *old_entry = entry;
102 struct hlist_bl_node *node;
103 struct hlist_bl_head *head;
104
105 if (entry)
106 head = entry->e_hash_list_head;
107 else
108 head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
109 hlist_bl_lock(head);
110 if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
111 node = entry->e_hash_list.next;
112 else
113 node = hlist_bl_first(head);
114 while (node) {
115 entry = hlist_bl_entry(node, struct mb2_cache_entry,
116 e_hash_list);
117 if (entry->e_key == key) {
118 atomic_inc(&entry->e_refcnt);
119 goto out;
120 }
121 node = node->next;
122 }
123 entry = NULL;
124out:
125 hlist_bl_unlock(head);
126 if (old_entry)
127 mb2_cache_entry_put(cache, old_entry);
128
129 return entry;
130}
131
132/*
133 * mb2_cache_entry_find_first - find the first entry in cache with given key
134 * @cache: cache where we should search
135 * @key: key to look for
136 *
137 * Search in @cache for entry with key @key. Grabs reference to the first
138 * entry found and returns the entry.
139 */
140struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache,
141 u32 key)
142{
143 return __entry_find(cache, NULL, key);
144}
145EXPORT_SYMBOL(mb2_cache_entry_find_first);
146
147/*
148 * mb2_cache_entry_find_next - find next entry in cache with the same
149 * @cache: cache where we should search
150 * @entry: entry to start search from
151 *
152 * Finds next entry in the hash chain which has the same key as @entry.
153 * If @entry is unhashed (which can happen when deletion of entry races
154 * with the search), finds the first entry in the hash chain. The function
155 * drops reference to @entry and returns with a reference to the found entry.
156 */
157struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache,
158 struct mb2_cache_entry *entry)
159{
160 return __entry_find(cache, entry, entry->e_key);
161}
162EXPORT_SYMBOL(mb2_cache_entry_find_next);
163
164/* mb2_cache_entry_delete_block - remove information about block from cache
165 * @cache - cache we work with
166 * @key - key of the entry to remove
167 * @block - block containing data for @key
168 *
169 * Remove entry from cache @cache with key @key with data stored in @block.
170 */
171void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key,
172 sector_t block)
173{
174 struct hlist_bl_node *node;
175 struct hlist_bl_head *head;
176 struct mb2_cache_entry *entry;
177
178 head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
179 hlist_bl_lock(head);
180 hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
181 if (entry->e_key == key && entry->e_block == block) {
182 /* We keep hash list reference to keep entry alive */
183 hlist_bl_del_init(&entry->e_hash_list);
184 hlist_bl_unlock(head);
185 spin_lock(&cache->c_lru_list_lock);
186 if (!list_empty(&entry->e_lru_list)) {
187 list_del_init(&entry->e_lru_list);
188 cache->c_entry_count--;
189 atomic_dec(&entry->e_refcnt);
190 }
191 spin_unlock(&cache->c_lru_list_lock);
192 mb2_cache_entry_put(cache, entry);
193 return;
194 }
195 }
196 hlist_bl_unlock(head);
197}
198EXPORT_SYMBOL(mb2_cache_entry_delete_block);
199
200/* mb2_cache_entry_touch - cache entry got used
201 * @cache - cache the entry belongs to
202 * @entry - entry that got used
203 *
204 * Move entry in lru list to reflect the fact that it was used.
205 */
206void mb2_cache_entry_touch(struct mb2_cache *cache,
207 struct mb2_cache_entry *entry)
208{
209 spin_lock(&cache->c_lru_list_lock);
210 if (!list_empty(&entry->e_lru_list))
211 list_move_tail(&cache->c_lru_list, &entry->e_lru_list);
212 spin_unlock(&cache->c_lru_list_lock);
213}
214EXPORT_SYMBOL(mb2_cache_entry_touch);
215
216static unsigned long mb2_cache_count(struct shrinker *shrink,
217 struct shrink_control *sc)
218{
219 struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
220 c_shrink);
221
222 return cache->c_entry_count;
223}
224
225/* Shrink number of entries in cache */
226static unsigned long mb2_cache_scan(struct shrinker *shrink,
227 struct shrink_control *sc)
228{
229 int nr_to_scan = sc->nr_to_scan;
230 struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
231 c_shrink);
232 struct mb2_cache_entry *entry;
233 struct hlist_bl_head *head;
234 unsigned int shrunk = 0;
235
236 spin_lock(&cache->c_lru_list_lock);
237 while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) {
238 entry = list_first_entry(&cache->c_lru_list,
239 struct mb2_cache_entry, e_lru_list);
240 list_del_init(&entry->e_lru_list);
241 cache->c_entry_count--;
242 /*
243 * We keep LRU list reference so that entry doesn't go away
244 * from under us.
245 */
246 spin_unlock(&cache->c_lru_list_lock);
247 head = entry->e_hash_list_head;
248 hlist_bl_lock(head);
249 if (!hlist_bl_unhashed(&entry->e_hash_list)) {
250 hlist_bl_del_init(&entry->e_hash_list);
251 atomic_dec(&entry->e_refcnt);
252 }
253 hlist_bl_unlock(head);
254 if (mb2_cache_entry_put(cache, entry))
255 shrunk++;
256 cond_resched();
257 spin_lock(&cache->c_lru_list_lock);
258 }
259 spin_unlock(&cache->c_lru_list_lock);
260
261 return shrunk;
262}
263
264/*
265 * mb2_cache_create - create cache
266 * @bucket_bits: log2 of the hash table size
267 *
268 * Create cache for keys with 2^bucket_bits hash entries.
269 */
270struct mb2_cache *mb2_cache_create(int bucket_bits)
271{
272 struct mb2_cache *cache;
273 int bucket_count = 1 << bucket_bits;
274 int i;
275
276 if (!try_module_get(THIS_MODULE))
277 return NULL;
278
279 cache = kzalloc(sizeof(struct mb2_cache), GFP_KERNEL);
280 if (!cache)
281 goto err_out;
282 cache->c_bucket_bits = bucket_bits;
283 INIT_LIST_HEAD(&cache->c_lru_list);
284 spin_lock_init(&cache->c_lru_list_lock);
285 cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
286 GFP_KERNEL);
287 if (!cache->c_hash) {
288 kfree(cache);
289 goto err_out;
290 }
291 for (i = 0; i < bucket_count; i++)
292 INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
293
294 cache->c_shrink.count_objects = mb2_cache_count;
295 cache->c_shrink.scan_objects = mb2_cache_scan;
296 cache->c_shrink.seeks = DEFAULT_SEEKS;
297 register_shrinker(&cache->c_shrink);
298
299 return cache;
300
301err_out:
302 module_put(THIS_MODULE);
303 return NULL;
304}
305EXPORT_SYMBOL(mb2_cache_create);
306
307/*
308 * mb2_cache_destroy - destroy cache
309 * @cache: the cache to destroy
310 *
311 * Free all entries in cache and cache itself. Caller must make sure nobody
312 * (except shrinker) can reach @cache when calling this.
313 */
314void mb2_cache_destroy(struct mb2_cache *cache)
315{
316 struct mb2_cache_entry *entry, *next;
317
318 unregister_shrinker(&cache->c_shrink);
319
320 /*
321 * We don't bother with any locking. Cache must not be used at this
322 * point.
323 */
324 list_for_each_entry_safe(entry, next, &cache->c_lru_list, e_lru_list) {
325 if (!hlist_bl_unhashed(&entry->e_hash_list)) {
326 hlist_bl_del_init(&entry->e_hash_list);
327 atomic_dec(&entry->e_refcnt);
328 } else
329 WARN_ON(1);
330 list_del(&entry->e_lru_list);
331 WARN_ON(atomic_read(&entry->e_refcnt) != 1);
332 mb2_cache_entry_put(cache, entry);
333 }
334 kfree(cache->c_hash);
335 kfree(cache);
336 module_put(THIS_MODULE);
337}
338EXPORT_SYMBOL(mb2_cache_destroy);
339
340static int __init mb2cache_init(void)
341{
342 mb2_entry_cache = kmem_cache_create("mbcache",
343 sizeof(struct mb2_cache_entry), 0,
344 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
345 BUG_ON(!mb2_entry_cache);
346 return 0;
347}
348
349static void __exit mb2cache_exit(void)
350{
351 kmem_cache_destroy(mb2_entry_cache);
352}
353
354module_init(mb2cache_init)
355module_exit(mb2cache_exit)
356
357MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
358MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
359MODULE_LICENSE("GPL");
diff --git a/fs/mount.h b/fs/mount.h
index 14db05d424f7..37c64bbe840c 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -13,6 +13,8 @@ struct mnt_namespace {
13 u64 seq; /* Sequence number to prevent loops */ 13 u64 seq; /* Sequence number to prevent loops */
14 wait_queue_head_t poll; 14 wait_queue_head_t poll;
15 u64 event; 15 u64 event;
16 unsigned int mounts; /* # of mounts in the namespace */
17 unsigned int pending_mounts;
16}; 18};
17 19
18struct mnt_pcp { 20struct mnt_pcp {
@@ -55,6 +57,7 @@ struct mount {
55 struct mnt_namespace *mnt_ns; /* containing namespace */ 57 struct mnt_namespace *mnt_ns; /* containing namespace */
56 struct mountpoint *mnt_mp; /* where is it mounted */ 58 struct mountpoint *mnt_mp; /* where is it mounted */
57 struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ 59 struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
60 struct list_head mnt_umounting; /* list entry for umount propagation */
58#ifdef CONFIG_FSNOTIFY 61#ifdef CONFIG_FSNOTIFY
59 struct hlist_head mnt_fsnotify_marks; 62 struct hlist_head mnt_fsnotify_marks;
60 __u32 mnt_fsnotify_mask; 63 __u32 mnt_fsnotify_mask;
@@ -86,7 +89,6 @@ static inline int is_mounted(struct vfsmount *mnt)
86} 89}
87 90
88extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *); 91extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
89extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
90 92
91extern int __legitimize_mnt(struct vfsmount *, unsigned); 93extern int __legitimize_mnt(struct vfsmount *, unsigned);
92extern bool legitimize_mnt(struct vfsmount *, unsigned); 94extern bool legitimize_mnt(struct vfsmount *, unsigned);
diff --git a/fs/mpage.c b/fs/mpage.c
index 5c65d8942692..f37bb01a333b 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -79,11 +79,17 @@ static struct bio *mpage_bio_submit(int rw, struct bio *bio)
79 struct page *first_page = bio->bi_io_vec[0].bv_page; 79 struct page *first_page = bio->bi_io_vec[0].bv_page;
80 80
81 if (first_page != NULL) { 81 if (first_page != NULL) {
82 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
83
84 path = android_fstrace_get_pathname(pathbuf,
85 MAX_TRACE_PATHBUF_LEN,
86 first_page->mapping->host);
82 trace_android_fs_dataread_start( 87 trace_android_fs_dataread_start(
83 first_page->mapping->host, 88 first_page->mapping->host,
84 page_offset(first_page), 89 page_offset(first_page),
85 bio->bi_iter.bi_size, 90 bio->bi_iter.bi_size,
86 current->pid, 91 current->pid,
92 path,
87 current->comm); 93 current->comm);
88 } 94 }
89 } 95 }
@@ -141,7 +147,7 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
141 SetPageUptodate(page); 147 SetPageUptodate(page);
142 return; 148 return;
143 } 149 }
144 create_empty_buffers(page, 1 << inode->i_blkbits, 0); 150 create_empty_buffers(page, i_blocksize(inode), 0);
145 } 151 }
146 head = page_buffers(page); 152 head = page_buffers(page);
147 page_bh = head; 153 page_bh = head;
diff --git a/fs/namei.c b/fs/namei.c
index 0b0acba72a71..f8eeea956503 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -373,9 +373,11 @@ EXPORT_SYMBOL(generic_permission);
373 * flag in inode->i_opflags, that says "this has not special 373 * flag in inode->i_opflags, that says "this has not special
374 * permission function, use the fast case". 374 * permission function, use the fast case".
375 */ 375 */
376static inline int do_inode_permission(struct inode *inode, int mask) 376static inline int do_inode_permission(struct vfsmount *mnt, struct inode *inode, int mask)
377{ 377{
378 if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) { 378 if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
379 if (likely(mnt && inode->i_op->permission2))
380 return inode->i_op->permission2(mnt, inode, mask);
379 if (likely(inode->i_op->permission)) 381 if (likely(inode->i_op->permission))
380 return inode->i_op->permission(inode, mask); 382 return inode->i_op->permission(inode, mask);
381 383
@@ -399,7 +401,7 @@ static inline int do_inode_permission(struct inode *inode, int mask)
399 * This does not check for a read-only file system. You probably want 401 * This does not check for a read-only file system. You probably want
400 * inode_permission(). 402 * inode_permission().
401 */ 403 */
402int __inode_permission(struct inode *inode, int mask) 404int __inode_permission2(struct vfsmount *mnt, struct inode *inode, int mask)
403{ 405{
404 int retval; 406 int retval;
405 407
@@ -411,7 +413,7 @@ int __inode_permission(struct inode *inode, int mask)
411 return -EACCES; 413 return -EACCES;
412 } 414 }
413 415
414 retval = do_inode_permission(inode, mask); 416 retval = do_inode_permission(mnt, inode, mask);
415 if (retval) 417 if (retval)
416 return retval; 418 return retval;
417 419
@@ -419,7 +421,14 @@ int __inode_permission(struct inode *inode, int mask)
419 if (retval) 421 if (retval)
420 return retval; 422 return retval;
421 423
422 return security_inode_permission(inode, mask); 424 retval = security_inode_permission(inode, mask);
425 return retval;
426}
427EXPORT_SYMBOL(__inode_permission2);
428
429int __inode_permission(struct inode *inode, int mask)
430{
431 return __inode_permission2(NULL, inode, mask);
423} 432}
424EXPORT_SYMBOL(__inode_permission); 433EXPORT_SYMBOL(__inode_permission);
425 434
@@ -455,14 +464,20 @@ static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
455 * 464 *
456 * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. 465 * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
457 */ 466 */
458int inode_permission(struct inode *inode, int mask) 467int inode_permission2(struct vfsmount *mnt, struct inode *inode, int mask)
459{ 468{
460 int retval; 469 int retval;
461 470
462 retval = sb_permission(inode->i_sb, inode, mask); 471 retval = sb_permission(inode->i_sb, inode, mask);
463 if (retval) 472 if (retval)
464 return retval; 473 return retval;
465 return __inode_permission(inode, mask); 474 return __inode_permission2(mnt, inode, mask);
475}
476EXPORT_SYMBOL(inode_permission2);
477
478int inode_permission(struct inode *inode, int mask)
479{
480 return inode_permission2(NULL, inode, mask);
466} 481}
467EXPORT_SYMBOL(inode_permission); 482EXPORT_SYMBOL(inode_permission);
468 483
@@ -1645,13 +1660,13 @@ static int lookup_slow(struct nameidata *nd, struct path *path)
1645static inline int may_lookup(struct nameidata *nd) 1660static inline int may_lookup(struct nameidata *nd)
1646{ 1661{
1647 if (nd->flags & LOOKUP_RCU) { 1662 if (nd->flags & LOOKUP_RCU) {
1648 int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK); 1663 int err = inode_permission2(nd->path.mnt, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
1649 if (err != -ECHILD) 1664 if (err != -ECHILD)
1650 return err; 1665 return err;
1651 if (unlazy_walk(nd, NULL, 0)) 1666 if (unlazy_walk(nd, NULL, 0))
1652 return -ECHILD; 1667 return -ECHILD;
1653 } 1668 }
1654 return inode_permission(nd->inode, MAY_EXEC); 1669 return inode_permission2(nd->path.mnt, nd->inode, MAY_EXEC);
1655} 1670}
1656 1671
1657static inline int handle_dots(struct nameidata *nd, int type) 1672static inline int handle_dots(struct nameidata *nd, int type)
@@ -2005,11 +2020,12 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
2005 nd->depth = 0; 2020 nd->depth = 0;
2006 if (flags & LOOKUP_ROOT) { 2021 if (flags & LOOKUP_ROOT) {
2007 struct dentry *root = nd->root.dentry; 2022 struct dentry *root = nd->root.dentry;
2023 struct vfsmount *mnt = nd->root.mnt;
2008 struct inode *inode = root->d_inode; 2024 struct inode *inode = root->d_inode;
2009 if (*s) { 2025 if (*s) {
2010 if (!d_can_lookup(root)) 2026 if (!d_can_lookup(root))
2011 return ERR_PTR(-ENOTDIR); 2027 return ERR_PTR(-ENOTDIR);
2012 retval = inode_permission(inode, MAY_EXEC); 2028 retval = inode_permission2(mnt, inode, MAY_EXEC);
2013 if (retval) 2029 if (retval)
2014 return ERR_PTR(retval); 2030 return ERR_PTR(retval);
2015 } 2031 }
@@ -2280,13 +2296,14 @@ EXPORT_SYMBOL(vfs_path_lookup);
2280/** 2296/**
2281 * lookup_one_len - filesystem helper to lookup single pathname component 2297 * lookup_one_len - filesystem helper to lookup single pathname component
2282 * @name: pathname component to lookup 2298 * @name: pathname component to lookup
2299 * @mnt: mount we are looking up on
2283 * @base: base directory to lookup from 2300 * @base: base directory to lookup from
2284 * @len: maximum length @len should be interpreted to 2301 * @len: maximum length @len should be interpreted to
2285 * 2302 *
2286 * Note that this routine is purely a helper for filesystem usage and should 2303 * Note that this routine is purely a helper for filesystem usage and should
2287 * not be called by generic code. 2304 * not be called by generic code.
2288 */ 2305 */
2289struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 2306struct dentry *lookup_one_len2(const char *name, struct vfsmount *mnt, struct dentry *base, int len)
2290{ 2307{
2291 struct qstr this; 2308 struct qstr this;
2292 unsigned int c; 2309 unsigned int c;
@@ -2320,12 +2337,18 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
2320 return ERR_PTR(err); 2337 return ERR_PTR(err);
2321 } 2338 }
2322 2339
2323 err = inode_permission(base->d_inode, MAY_EXEC); 2340 err = inode_permission2(mnt, base->d_inode, MAY_EXEC);
2324 if (err) 2341 if (err)
2325 return ERR_PTR(err); 2342 return ERR_PTR(err);
2326 2343
2327 return __lookup_hash(&this, base, 0); 2344 return __lookup_hash(&this, base, 0);
2328} 2345}
2346EXPORT_SYMBOL(lookup_one_len2);
2347
2348struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
2349{
2350 return lookup_one_len2(name, NULL, base, len);
2351}
2329EXPORT_SYMBOL(lookup_one_len); 2352EXPORT_SYMBOL(lookup_one_len);
2330 2353
2331int user_path_at_empty(int dfd, const char __user *name, unsigned flags, 2354int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
@@ -2552,7 +2575,7 @@ EXPORT_SYMBOL(__check_sticky);
2552 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 2575 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
2553 * nfs_async_unlink(). 2576 * nfs_async_unlink().
2554 */ 2577 */
2555static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) 2578static int may_delete(struct vfsmount *mnt, struct inode *dir, struct dentry *victim, bool isdir)
2556{ 2579{
2557 struct inode *inode = d_backing_inode(victim); 2580 struct inode *inode = d_backing_inode(victim);
2558 int error; 2581 int error;
@@ -2564,7 +2587,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
2564 BUG_ON(victim->d_parent->d_inode != dir); 2587 BUG_ON(victim->d_parent->d_inode != dir);
2565 audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); 2588 audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
2566 2589
2567 error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 2590 error = inode_permission2(mnt, dir, MAY_WRITE | MAY_EXEC);
2568 if (error) 2591 if (error)
2569 return error; 2592 return error;
2570 if (IS_APPEND(dir)) 2593 if (IS_APPEND(dir))
@@ -2595,14 +2618,14 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
2595 * 3. We should have write and exec permissions on dir 2618 * 3. We should have write and exec permissions on dir
2596 * 4. We can't do it if dir is immutable (done in permission()) 2619 * 4. We can't do it if dir is immutable (done in permission())
2597 */ 2620 */
2598static inline int may_create(struct inode *dir, struct dentry *child) 2621static inline int may_create(struct vfsmount *mnt, struct inode *dir, struct dentry *child)
2599{ 2622{
2600 audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE); 2623 audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
2601 if (child->d_inode) 2624 if (child->d_inode)
2602 return -EEXIST; 2625 return -EEXIST;
2603 if (IS_DEADDIR(dir)) 2626 if (IS_DEADDIR(dir))
2604 return -ENOENT; 2627 return -ENOENT;
2605 return inode_permission(dir, MAY_WRITE | MAY_EXEC); 2628 return inode_permission2(mnt, dir, MAY_WRITE | MAY_EXEC);
2606} 2629}
2607 2630
2608/* 2631/*
@@ -2649,10 +2672,10 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
2649} 2672}
2650EXPORT_SYMBOL(unlock_rename); 2673EXPORT_SYMBOL(unlock_rename);
2651 2674
2652int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 2675int vfs_create2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry,
2653 bool want_excl) 2676 umode_t mode, bool want_excl)
2654{ 2677{
2655 int error = may_create(dir, dentry); 2678 int error = may_create(mnt, dir, dentry);
2656 if (error) 2679 if (error)
2657 return error; 2680 return error;
2658 2681
@@ -2668,11 +2691,19 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2668 fsnotify_create(dir, dentry); 2691 fsnotify_create(dir, dentry);
2669 return error; 2692 return error;
2670} 2693}
2694EXPORT_SYMBOL(vfs_create2);
2695
2696int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2697 bool want_excl)
2698{
2699 return vfs_create2(NULL, dir, dentry, mode, want_excl);
2700}
2671EXPORT_SYMBOL(vfs_create); 2701EXPORT_SYMBOL(vfs_create);
2672 2702
2673static int may_open(struct path *path, int acc_mode, int flag) 2703static int may_open(struct path *path, int acc_mode, int flag)
2674{ 2704{
2675 struct dentry *dentry = path->dentry; 2705 struct dentry *dentry = path->dentry;
2706 struct vfsmount *mnt = path->mnt;
2676 struct inode *inode = dentry->d_inode; 2707 struct inode *inode = dentry->d_inode;
2677 int error; 2708 int error;
2678 2709
@@ -2701,7 +2732,7 @@ static int may_open(struct path *path, int acc_mode, int flag)
2701 break; 2732 break;
2702 } 2733 }
2703 2734
2704 error = inode_permission(inode, acc_mode); 2735 error = inode_permission2(mnt, inode, acc_mode);
2705 if (error) 2736 if (error)
2706 return error; 2737 return error;
2707 2738
@@ -2736,7 +2767,7 @@ static int handle_truncate(struct file *filp)
2736 if (!error) 2767 if (!error)
2737 error = security_path_truncate(path); 2768 error = security_path_truncate(path);
2738 if (!error) { 2769 if (!error) {
2739 error = do_truncate(path->dentry, 0, 2770 error = do_truncate2(path->mnt, path->dentry, 0,
2740 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, 2771 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
2741 filp); 2772 filp);
2742 } 2773 }
@@ -2757,7 +2788,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
2757 if (error) 2788 if (error)
2758 return error; 2789 return error;
2759 2790
2760 error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC); 2791 error = inode_permission2(dir->mnt, dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
2761 if (error) 2792 if (error)
2762 return error; 2793 return error;
2763 2794
@@ -2943,6 +2974,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
2943 bool got_write, int *opened) 2974 bool got_write, int *opened)
2944{ 2975{
2945 struct dentry *dir = nd->path.dentry; 2976 struct dentry *dir = nd->path.dentry;
2977 struct vfsmount *mnt = nd->path.mnt;
2946 struct inode *dir_inode = dir->d_inode; 2978 struct inode *dir_inode = dir->d_inode;
2947 struct dentry *dentry; 2979 struct dentry *dentry;
2948 int error; 2980 int error;
@@ -2990,7 +3022,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
2990 error = security_path_mknod(&nd->path, dentry, mode, 0); 3022 error = security_path_mknod(&nd->path, dentry, mode, 0);
2991 if (error) 3023 if (error)
2992 goto out_dput; 3024 goto out_dput;
2993 error = vfs_create(dir->d_inode, dentry, mode, 3025 error = vfs_create2(mnt, dir->d_inode, dentry, mode,
2994 nd->flags & LOOKUP_EXCL); 3026 nd->flags & LOOKUP_EXCL);
2995 if (error) 3027 if (error)
2996 goto out_dput; 3028 goto out_dput;
@@ -3252,7 +3284,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
3252 goto out; 3284 goto out;
3253 dir = path.dentry->d_inode; 3285 dir = path.dentry->d_inode;
3254 /* we want directory to be writable */ 3286 /* we want directory to be writable */
3255 error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 3287 error = inode_permission2(path.mnt, dir, MAY_WRITE | MAY_EXEC);
3256 if (error) 3288 if (error)
3257 goto out2; 3289 goto out2;
3258 if (!dir->i_op->tmpfile) { 3290 if (!dir->i_op->tmpfile) {
@@ -3486,9 +3518,9 @@ inline struct dentry *user_path_create(int dfd, const char __user *pathname,
3486} 3518}
3487EXPORT_SYMBOL(user_path_create); 3519EXPORT_SYMBOL(user_path_create);
3488 3520
3489int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) 3521int vfs_mknod2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
3490{ 3522{
3491 int error = may_create(dir, dentry); 3523 int error = may_create(mnt, dir, dentry);
3492 3524
3493 if (error) 3525 if (error)
3494 return error; 3526 return error;
@@ -3512,6 +3544,12 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
3512 fsnotify_create(dir, dentry); 3544 fsnotify_create(dir, dentry);
3513 return error; 3545 return error;
3514} 3546}
3547EXPORT_SYMBOL(vfs_mknod2);
3548
3549int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
3550{
3551 return vfs_mknod2(NULL, dir, dentry, mode, dev);
3552}
3515EXPORT_SYMBOL(vfs_mknod); 3553EXPORT_SYMBOL(vfs_mknod);
3516 3554
3517static int may_mknod(umode_t mode) 3555static int may_mknod(umode_t mode)
@@ -3554,10 +3592,10 @@ retry:
3554 goto out; 3592 goto out;
3555 switch (mode & S_IFMT) { 3593 switch (mode & S_IFMT) {
3556 case 0: case S_IFREG: 3594 case 0: case S_IFREG:
3557 error = vfs_create(path.dentry->d_inode,dentry,mode,true); 3595 error = vfs_create2(path.mnt, path.dentry->d_inode,dentry,mode,true);
3558 break; 3596 break;
3559 case S_IFCHR: case S_IFBLK: 3597 case S_IFCHR: case S_IFBLK:
3560 error = vfs_mknod(path.dentry->d_inode,dentry,mode, 3598 error = vfs_mknod2(path.mnt, path.dentry->d_inode,dentry,mode,
3561 new_decode_dev(dev)); 3599 new_decode_dev(dev));
3562 break; 3600 break;
3563 case S_IFIFO: case S_IFSOCK: 3601 case S_IFIFO: case S_IFSOCK:
@@ -3578,9 +3616,9 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
3578 return sys_mknodat(AT_FDCWD, filename, mode, dev); 3616 return sys_mknodat(AT_FDCWD, filename, mode, dev);
3579} 3617}
3580 3618
3581int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 3619int vfs_mkdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, umode_t mode)
3582{ 3620{
3583 int error = may_create(dir, dentry); 3621 int error = may_create(mnt, dir, dentry);
3584 unsigned max_links = dir->i_sb->s_max_links; 3622 unsigned max_links = dir->i_sb->s_max_links;
3585 3623
3586 if (error) 3624 if (error)
@@ -3602,6 +3640,12 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
3602 fsnotify_mkdir(dir, dentry); 3640 fsnotify_mkdir(dir, dentry);
3603 return error; 3641 return error;
3604} 3642}
3643EXPORT_SYMBOL(vfs_mkdir2);
3644
3645int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
3646{
3647 return vfs_mkdir2(NULL, dir, dentry, mode);
3648}
3605EXPORT_SYMBOL(vfs_mkdir); 3649EXPORT_SYMBOL(vfs_mkdir);
3606 3650
3607SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) 3651SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
@@ -3620,7 +3664,7 @@ retry:
3620 mode &= ~current_umask(); 3664 mode &= ~current_umask();
3621 error = security_path_mkdir(&path, dentry, mode); 3665 error = security_path_mkdir(&path, dentry, mode);
3622 if (!error) 3666 if (!error)
3623 error = vfs_mkdir(path.dentry->d_inode, dentry, mode); 3667 error = vfs_mkdir2(path.mnt, path.dentry->d_inode, dentry, mode);
3624 done_path_create(&path, dentry); 3668 done_path_create(&path, dentry);
3625 if (retry_estale(error, lookup_flags)) { 3669 if (retry_estale(error, lookup_flags)) {
3626 lookup_flags |= LOOKUP_REVAL; 3670 lookup_flags |= LOOKUP_REVAL;
@@ -3659,9 +3703,9 @@ void dentry_unhash(struct dentry *dentry)
3659} 3703}
3660EXPORT_SYMBOL(dentry_unhash); 3704EXPORT_SYMBOL(dentry_unhash);
3661 3705
3662int vfs_rmdir(struct inode *dir, struct dentry *dentry) 3706int vfs_rmdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry)
3663{ 3707{
3664 int error = may_delete(dir, dentry, 1); 3708 int error = may_delete(mnt, dir, dentry, 1);
3665 3709
3666 if (error) 3710 if (error)
3667 return error; 3711 return error;
@@ -3696,6 +3740,12 @@ out:
3696 d_delete(dentry); 3740 d_delete(dentry);
3697 return error; 3741 return error;
3698} 3742}
3743EXPORT_SYMBOL(vfs_rmdir2);
3744
3745int vfs_rmdir(struct inode *dir, struct dentry *dentry)
3746{
3747 return vfs_rmdir2(NULL, dir, dentry);
3748}
3699EXPORT_SYMBOL(vfs_rmdir); 3749EXPORT_SYMBOL(vfs_rmdir);
3700 3750
3701static long do_rmdir(int dfd, const char __user *pathname) 3751static long do_rmdir(int dfd, const char __user *pathname)
@@ -3741,7 +3791,7 @@ retry:
3741 error = security_path_rmdir(&path, dentry); 3791 error = security_path_rmdir(&path, dentry);
3742 if (error) 3792 if (error)
3743 goto exit3; 3793 goto exit3;
3744 error = vfs_rmdir(path.dentry->d_inode, dentry); 3794 error = vfs_rmdir2(path.mnt, path.dentry->d_inode, dentry);
3745exit3: 3795exit3:
3746 dput(dentry); 3796 dput(dentry);
3747exit2: 3797exit2:
@@ -3780,10 +3830,10 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
3780 * be appropriate for callers that expect the underlying filesystem not 3830 * be appropriate for callers that expect the underlying filesystem not
3781 * to be NFS exported. 3831 * to be NFS exported.
3782 */ 3832 */
3783int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) 3833int vfs_unlink2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
3784{ 3834{
3785 struct inode *target = dentry->d_inode; 3835 struct inode *target = dentry->d_inode;
3786 int error = may_delete(dir, dentry, 0); 3836 int error = may_delete(mnt, dir, dentry, 0);
3787 3837
3788 if (error) 3838 if (error)
3789 return error; 3839 return error;
@@ -3818,6 +3868,12 @@ out:
3818 3868
3819 return error; 3869 return error;
3820} 3870}
3871EXPORT_SYMBOL(vfs_unlink2);
3872
3873int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
3874{
3875 return vfs_unlink2(NULL, dir, dentry, delegated_inode);
3876}
3821EXPORT_SYMBOL(vfs_unlink); 3877EXPORT_SYMBOL(vfs_unlink);
3822 3878
3823/* 3879/*
@@ -3865,7 +3921,7 @@ retry_deleg:
3865 error = security_path_unlink(&path, dentry); 3921 error = security_path_unlink(&path, dentry);
3866 if (error) 3922 if (error)
3867 goto exit2; 3923 goto exit2;
3868 error = vfs_unlink(path.dentry->d_inode, dentry, &delegated_inode); 3924 error = vfs_unlink2(path.mnt, path.dentry->d_inode, dentry, &delegated_inode);
3869exit2: 3925exit2:
3870 dput(dentry); 3926 dput(dentry);
3871 } 3927 }
@@ -3915,9 +3971,9 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname)
3915 return do_unlinkat(AT_FDCWD, pathname); 3971 return do_unlinkat(AT_FDCWD, pathname);
3916} 3972}
3917 3973
3918int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) 3974int vfs_symlink2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, const char *oldname)
3919{ 3975{
3920 int error = may_create(dir, dentry); 3976 int error = may_create(mnt, dir, dentry);
3921 3977
3922 if (error) 3978 if (error)
3923 return error; 3979 return error;
@@ -3934,6 +3990,12 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
3934 fsnotify_create(dir, dentry); 3990 fsnotify_create(dir, dentry);
3935 return error; 3991 return error;
3936} 3992}
3993EXPORT_SYMBOL(vfs_symlink2);
3994
3995int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
3996{
3997 return vfs_symlink2(NULL, dir, dentry, oldname);
3998}
3937EXPORT_SYMBOL(vfs_symlink); 3999EXPORT_SYMBOL(vfs_symlink);
3938 4000
3939SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, 4001SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
@@ -3956,7 +4018,7 @@ retry:
3956 4018
3957 error = security_path_symlink(&path, dentry, from->name); 4019 error = security_path_symlink(&path, dentry, from->name);
3958 if (!error) 4020 if (!error)
3959 error = vfs_symlink(path.dentry->d_inode, dentry, from->name); 4021 error = vfs_symlink2(path.mnt, path.dentry->d_inode, dentry, from->name);
3960 done_path_create(&path, dentry); 4022 done_path_create(&path, dentry);
3961 if (retry_estale(error, lookup_flags)) { 4023 if (retry_estale(error, lookup_flags)) {
3962 lookup_flags |= LOOKUP_REVAL; 4024 lookup_flags |= LOOKUP_REVAL;
@@ -3991,7 +4053,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
3991 * be appropriate for callers that expect the underlying filesystem not 4053 * be appropriate for callers that expect the underlying filesystem not
3992 * to be NFS exported. 4054 * to be NFS exported.
3993 */ 4055 */
3994int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode) 4056int vfs_link2(struct vfsmount *mnt, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
3995{ 4057{
3996 struct inode *inode = old_dentry->d_inode; 4058 struct inode *inode = old_dentry->d_inode;
3997 unsigned max_links = dir->i_sb->s_max_links; 4059 unsigned max_links = dir->i_sb->s_max_links;
@@ -4000,7 +4062,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
4000 if (!inode) 4062 if (!inode)
4001 return -ENOENT; 4063 return -ENOENT;
4002 4064
4003 error = may_create(dir, new_dentry); 4065 error = may_create(mnt, dir, new_dentry);
4004 if (error) 4066 if (error)
4005 return error; 4067 return error;
4006 4068
@@ -4043,6 +4105,12 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
4043 fsnotify_link(dir, inode, new_dentry); 4105 fsnotify_link(dir, inode, new_dentry);
4044 return error; 4106 return error;
4045} 4107}
4108EXPORT_SYMBOL(vfs_link2);
4109
4110int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
4111{
4112 return vfs_link2(NULL, old_dentry, dir, new_dentry, delegated_inode);
4113}
4046EXPORT_SYMBOL(vfs_link); 4114EXPORT_SYMBOL(vfs_link);
4047 4115
4048/* 4116/*
@@ -4098,7 +4166,7 @@ retry:
4098 error = security_path_link(old_path.dentry, &new_path, new_dentry); 4166 error = security_path_link(old_path.dentry, &new_path, new_dentry);
4099 if (error) 4167 if (error)
4100 goto out_dput; 4168 goto out_dput;
4101 error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode); 4169 error = vfs_link2(old_path.mnt, old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
4102out_dput: 4170out_dput:
4103 done_path_create(&new_path, new_dentry); 4171 done_path_create(&new_path, new_dentry);
4104 if (delegated_inode) { 4172 if (delegated_inode) {
@@ -4173,17 +4241,18 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
4173 * ->i_mutex on parents, which works but leads to some truly excessive 4241 * ->i_mutex on parents, which works but leads to some truly excessive
4174 * locking]. 4242 * locking].
4175 */ 4243 */
4176int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 4244int vfs_rename2(struct vfsmount *mnt,
4245 struct inode *old_dir, struct dentry *old_dentry,
4177 struct inode *new_dir, struct dentry *new_dentry, 4246 struct inode *new_dir, struct dentry *new_dentry,
4178 struct inode **delegated_inode, unsigned int flags) 4247 struct inode **delegated_inode, unsigned int flags)
4179{ 4248{
4180 int error; 4249 int error;
4181 bool is_dir = d_is_dir(old_dentry); 4250 bool is_dir = d_is_dir(old_dentry);
4182 const unsigned char *old_name;
4183 struct inode *source = old_dentry->d_inode; 4251 struct inode *source = old_dentry->d_inode;
4184 struct inode *target = new_dentry->d_inode; 4252 struct inode *target = new_dentry->d_inode;
4185 bool new_is_dir = false; 4253 bool new_is_dir = false;
4186 unsigned max_links = new_dir->i_sb->s_max_links; 4254 unsigned max_links = new_dir->i_sb->s_max_links;
4255 struct name_snapshot old_name;
4187 4256
4188 /* 4257 /*
4189 * Check source == target. 4258 * Check source == target.
@@ -4192,19 +4261,19 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4192 if (vfs_select_inode(old_dentry, 0) == vfs_select_inode(new_dentry, 0)) 4261 if (vfs_select_inode(old_dentry, 0) == vfs_select_inode(new_dentry, 0))
4193 return 0; 4262 return 0;
4194 4263
4195 error = may_delete(old_dir, old_dentry, is_dir); 4264 error = may_delete(mnt, old_dir, old_dentry, is_dir);
4196 if (error) 4265 if (error)
4197 return error; 4266 return error;
4198 4267
4199 if (!target) { 4268 if (!target) {
4200 error = may_create(new_dir, new_dentry); 4269 error = may_create(mnt, new_dir, new_dentry);
4201 } else { 4270 } else {
4202 new_is_dir = d_is_dir(new_dentry); 4271 new_is_dir = d_is_dir(new_dentry);
4203 4272
4204 if (!(flags & RENAME_EXCHANGE)) 4273 if (!(flags & RENAME_EXCHANGE))
4205 error = may_delete(new_dir, new_dentry, is_dir); 4274 error = may_delete(mnt, new_dir, new_dentry, is_dir);
4206 else 4275 else
4207 error = may_delete(new_dir, new_dentry, new_is_dir); 4276 error = may_delete(mnt, new_dir, new_dentry, new_is_dir);
4208 } 4277 }
4209 if (error) 4278 if (error)
4210 return error; 4279 return error;
@@ -4221,12 +4290,12 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4221 */ 4290 */
4222 if (new_dir != old_dir) { 4291 if (new_dir != old_dir) {
4223 if (is_dir) { 4292 if (is_dir) {
4224 error = inode_permission(source, MAY_WRITE); 4293 error = inode_permission2(mnt, source, MAY_WRITE);
4225 if (error) 4294 if (error)
4226 return error; 4295 return error;
4227 } 4296 }
4228 if ((flags & RENAME_EXCHANGE) && new_is_dir) { 4297 if ((flags & RENAME_EXCHANGE) && new_is_dir) {
4229 error = inode_permission(target, MAY_WRITE); 4298 error = inode_permission2(mnt, target, MAY_WRITE);
4230 if (error) 4299 if (error)
4231 return error; 4300 return error;
4232 } 4301 }
@@ -4237,7 +4306,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4237 if (error) 4306 if (error)
4238 return error; 4307 return error;
4239 4308
4240 old_name = fsnotify_oldname_init(old_dentry->d_name.name); 4309 take_dentry_name_snapshot(&old_name, old_dentry);
4241 dget(new_dentry); 4310 dget(new_dentry);
4242 if (!is_dir || (flags & RENAME_EXCHANGE)) 4311 if (!is_dir || (flags & RENAME_EXCHANGE))
4243 lock_two_nondirectories(source, target); 4312 lock_two_nondirectories(source, target);
@@ -4298,17 +4367,25 @@ out:
4298 mutex_unlock(&target->i_mutex); 4367 mutex_unlock(&target->i_mutex);
4299 dput(new_dentry); 4368 dput(new_dentry);
4300 if (!error) { 4369 if (!error) {
4301 fsnotify_move(old_dir, new_dir, old_name, is_dir, 4370 fsnotify_move(old_dir, new_dir, old_name.name, is_dir,
4302 !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry); 4371 !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry);
4303 if (flags & RENAME_EXCHANGE) { 4372 if (flags & RENAME_EXCHANGE) {
4304 fsnotify_move(new_dir, old_dir, old_dentry->d_name.name, 4373 fsnotify_move(new_dir, old_dir, old_dentry->d_name.name,
4305 new_is_dir, NULL, new_dentry); 4374 new_is_dir, NULL, new_dentry);
4306 } 4375 }
4307 } 4376 }
4308 fsnotify_oldname_free(old_name); 4377 release_dentry_name_snapshot(&old_name);
4309 4378
4310 return error; 4379 return error;
4311} 4380}
4381EXPORT_SYMBOL(vfs_rename2);
4382
4383int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4384 struct inode *new_dir, struct dentry *new_dentry,
4385 struct inode **delegated_inode, unsigned int flags)
4386{
4387 return vfs_rename2(NULL, old_dir, old_dentry, new_dir, new_dentry, delegated_inode, flags);
4388}
4312EXPORT_SYMBOL(vfs_rename); 4389EXPORT_SYMBOL(vfs_rename);
4313 4390
4314SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, 4391SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
@@ -4422,7 +4499,7 @@ retry_deleg:
4422 &new_path, new_dentry, flags); 4499 &new_path, new_dentry, flags);
4423 if (error) 4500 if (error)
4424 goto exit5; 4501 goto exit5;
4425 error = vfs_rename(old_path.dentry->d_inode, old_dentry, 4502 error = vfs_rename2(old_path.mnt, old_path.dentry->d_inode, old_dentry,
4426 new_path.dentry->d_inode, new_dentry, 4503 new_path.dentry->d_inode, new_dentry,
4427 &delegated_inode, flags); 4504 &delegated_inode, flags);
4428exit5: 4505exit5:
@@ -4467,7 +4544,7 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
4467 4544
4468int vfs_whiteout(struct inode *dir, struct dentry *dentry) 4545int vfs_whiteout(struct inode *dir, struct dentry *dentry)
4469{ 4546{
4470 int error = may_create(dir, dentry); 4547 int error = may_create(NULL, dir, dentry);
4471 if (error) 4548 if (error)
4472 return error; 4549 return error;
4473 4550
diff --git a/fs/namespace.c b/fs/namespace.c
index da98a1bbd8b5..15b91b36ecab 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -27,6 +27,9 @@
27#include "pnode.h" 27#include "pnode.h"
28#include "internal.h" 28#include "internal.h"
29 29
30/* Maximum number of mounts in a mount namespace */
31unsigned int sysctl_mount_max __read_mostly = 100000;
32
30static unsigned int m_hash_mask __read_mostly; 33static unsigned int m_hash_mask __read_mostly;
31static unsigned int m_hash_shift __read_mostly; 34static unsigned int m_hash_shift __read_mostly;
32static unsigned int mp_hash_mask __read_mostly; 35static unsigned int mp_hash_mask __read_mostly;
@@ -234,6 +237,7 @@ static struct mount *alloc_vfsmnt(const char *name)
234 INIT_LIST_HEAD(&mnt->mnt_slave_list); 237 INIT_LIST_HEAD(&mnt->mnt_slave_list);
235 INIT_LIST_HEAD(&mnt->mnt_slave); 238 INIT_LIST_HEAD(&mnt->mnt_slave);
236 INIT_HLIST_NODE(&mnt->mnt_mp_list); 239 INIT_HLIST_NODE(&mnt->mnt_mp_list);
240 INIT_LIST_HEAD(&mnt->mnt_umounting);
237#ifdef CONFIG_FSNOTIFY 241#ifdef CONFIG_FSNOTIFY
238 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); 242 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
239#endif 243#endif
@@ -577,6 +581,7 @@ int sb_prepare_remount_readonly(struct super_block *sb)
577 581
578static void free_vfsmnt(struct mount *mnt) 582static void free_vfsmnt(struct mount *mnt)
579{ 583{
584 kfree(mnt->mnt.data);
580 kfree_const(mnt->mnt_devname); 585 kfree_const(mnt->mnt_devname);
581#ifdef CONFIG_SMP 586#ifdef CONFIG_SMP
582 free_percpu(mnt->mnt_pcp); 587 free_percpu(mnt->mnt_pcp);
@@ -638,28 +643,6 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
638} 643}
639 644
640/* 645/*
641 * find the last mount at @dentry on vfsmount @mnt.
642 * mount_lock must be held.
643 */
644struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
645{
646 struct mount *p, *res = NULL;
647 p = __lookup_mnt(mnt, dentry);
648 if (!p)
649 goto out;
650 if (!(p->mnt.mnt_flags & MNT_UMOUNT))
651 res = p;
652 hlist_for_each_entry_continue(p, mnt_hash) {
653 if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
654 break;
655 if (!(p->mnt.mnt_flags & MNT_UMOUNT))
656 res = p;
657 }
658out:
659 return res;
660}
661
662/*
663 * lookup_mnt - Return the first child mount mounted at path 646 * lookup_mnt - Return the first child mount mounted at path
664 * 647 *
665 * "First" means first mounted chronologically. If you create the 648 * "First" means first mounted chronologically. If you create the
@@ -879,6 +862,13 @@ void mnt_set_mountpoint(struct mount *mnt,
879 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list); 862 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
880} 863}
881 864
865static void __attach_mnt(struct mount *mnt, struct mount *parent)
866{
867 hlist_add_head_rcu(&mnt->mnt_hash,
868 m_hash(&parent->mnt, mnt->mnt_mountpoint));
869 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
870}
871
882/* 872/*
883 * vfsmount lock must be held for write 873 * vfsmount lock must be held for write
884 */ 874 */
@@ -887,28 +877,45 @@ static void attach_mnt(struct mount *mnt,
887 struct mountpoint *mp) 877 struct mountpoint *mp)
888{ 878{
889 mnt_set_mountpoint(parent, mp, mnt); 879 mnt_set_mountpoint(parent, mp, mnt);
890 hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); 880 __attach_mnt(mnt, parent);
891 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
892} 881}
893 882
894static void attach_shadowed(struct mount *mnt, 883void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
895 struct mount *parent,
896 struct mount *shadows)
897{ 884{
898 if (shadows) { 885 struct mountpoint *old_mp = mnt->mnt_mp;
899 hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash); 886 struct dentry *old_mountpoint = mnt->mnt_mountpoint;
900 list_add(&mnt->mnt_child, &shadows->mnt_child); 887 struct mount *old_parent = mnt->mnt_parent;
901 } else { 888
902 hlist_add_head_rcu(&mnt->mnt_hash, 889 list_del_init(&mnt->mnt_child);
903 m_hash(&parent->mnt, mnt->mnt_mountpoint)); 890 hlist_del_init(&mnt->mnt_mp_list);
904 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); 891 hlist_del_init_rcu(&mnt->mnt_hash);
905 } 892
893 attach_mnt(mnt, parent, mp);
894
895 put_mountpoint(old_mp);
896
897 /*
898 * Safely avoid even the suggestion this code might sleep or
899 * lock the mount hash by taking advantage of the knowledge that
900 * mnt_change_mountpoint will not release the final reference
901 * to a mountpoint.
902 *
903 * During mounting, the mount passed in as the parent mount will
904 * continue to use the old mountpoint and during unmounting, the
905 * old mountpoint will continue to exist until namespace_unlock,
906 * which happens well after mnt_change_mountpoint.
907 */
908 spin_lock(&old_mountpoint->d_lock);
909 old_mountpoint->d_lockref.count--;
910 spin_unlock(&old_mountpoint->d_lock);
911
912 mnt_add_count(old_parent, -1);
906} 913}
907 914
908/* 915/*
909 * vfsmount lock must be held for write 916 * vfsmount lock must be held for write
910 */ 917 */
911static void commit_tree(struct mount *mnt, struct mount *shadows) 918static void commit_tree(struct mount *mnt)
912{ 919{
913 struct mount *parent = mnt->mnt_parent; 920 struct mount *parent = mnt->mnt_parent;
914 struct mount *m; 921 struct mount *m;
@@ -923,7 +930,10 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
923 930
924 list_splice(&head, n->list.prev); 931 list_splice(&head, n->list.prev);
925 932
926 attach_shadowed(mnt, parent, shadows); 933 n->mounts += n->pending_mounts;
934 n->pending_mounts = 0;
935
936 __attach_mnt(mnt, parent);
927 touch_mnt_namespace(n); 937 touch_mnt_namespace(n);
928} 938}
929 939
@@ -966,11 +976,21 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
966 if (!mnt) 976 if (!mnt)
967 return ERR_PTR(-ENOMEM); 977 return ERR_PTR(-ENOMEM);
968 978
979 mnt->mnt.data = NULL;
980 if (type->alloc_mnt_data) {
981 mnt->mnt.data = type->alloc_mnt_data();
982 if (!mnt->mnt.data) {
983 mnt_free_id(mnt);
984 free_vfsmnt(mnt);
985 return ERR_PTR(-ENOMEM);
986 }
987 }
969 if (flags & MS_KERNMOUNT) 988 if (flags & MS_KERNMOUNT)
970 mnt->mnt.mnt_flags = MNT_INTERNAL; 989 mnt->mnt.mnt_flags = MNT_INTERNAL;
971 990
972 root = mount_fs(type, flags, name, data); 991 root = mount_fs(type, flags, name, &mnt->mnt, data);
973 if (IS_ERR(root)) { 992 if (IS_ERR(root)) {
993 kfree(mnt->mnt.data);
974 mnt_free_id(mnt); 994 mnt_free_id(mnt);
975 free_vfsmnt(mnt); 995 free_vfsmnt(mnt);
976 return ERR_CAST(root); 996 return ERR_CAST(root);
@@ -998,6 +1018,14 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
998 if (!mnt) 1018 if (!mnt)
999 return ERR_PTR(-ENOMEM); 1019 return ERR_PTR(-ENOMEM);
1000 1020
1021 if (sb->s_op->clone_mnt_data) {
1022 mnt->mnt.data = sb->s_op->clone_mnt_data(old->mnt.data);
1023 if (!mnt->mnt.data) {
1024 err = -ENOMEM;
1025 goto out_free;
1026 }
1027 }
1028
1001 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE)) 1029 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
1002 mnt->mnt_group_id = 0; /* not a peer of original */ 1030 mnt->mnt_group_id = 0; /* not a peer of original */
1003 else 1031 else
@@ -1066,6 +1094,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
1066 return mnt; 1094 return mnt;
1067 1095
1068 out_free: 1096 out_free:
1097 kfree(mnt->mnt.data);
1069 mnt_free_id(mnt); 1098 mnt_free_id(mnt);
1070 free_vfsmnt(mnt); 1099 free_vfsmnt(mnt);
1071 return ERR_PTR(err); 1100 return ERR_PTR(err);
@@ -1443,11 +1472,16 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1443 propagate_umount(&tmp_list); 1472 propagate_umount(&tmp_list);
1444 1473
1445 while (!list_empty(&tmp_list)) { 1474 while (!list_empty(&tmp_list)) {
1475 struct mnt_namespace *ns;
1446 bool disconnect; 1476 bool disconnect;
1447 p = list_first_entry(&tmp_list, struct mount, mnt_list); 1477 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1448 list_del_init(&p->mnt_expire); 1478 list_del_init(&p->mnt_expire);
1449 list_del_init(&p->mnt_list); 1479 list_del_init(&p->mnt_list);
1450 __touch_mnt_namespace(p->mnt_ns); 1480 ns = p->mnt_ns;
1481 if (ns) {
1482 ns->mounts--;
1483 __touch_mnt_namespace(ns);
1484 }
1451 p->mnt_ns = NULL; 1485 p->mnt_ns = NULL;
1452 if (how & UMOUNT_SYNC) 1486 if (how & UMOUNT_SYNC)
1453 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; 1487 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
@@ -1718,7 +1752,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1718 continue; 1752 continue;
1719 1753
1720 for (s = r; s; s = next_mnt(s, r)) { 1754 for (s = r; s; s = next_mnt(s, r)) {
1721 struct mount *t = NULL;
1722 if (!(flag & CL_COPY_UNBINDABLE) && 1755 if (!(flag & CL_COPY_UNBINDABLE) &&
1723 IS_MNT_UNBINDABLE(s)) { 1756 IS_MNT_UNBINDABLE(s)) {
1724 s = skip_mnt_tree(s); 1757 s = skip_mnt_tree(s);
@@ -1740,14 +1773,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1740 goto out; 1773 goto out;
1741 lock_mount_hash(); 1774 lock_mount_hash();
1742 list_add_tail(&q->mnt_list, &res->mnt_list); 1775 list_add_tail(&q->mnt_list, &res->mnt_list);
1743 mnt_set_mountpoint(parent, p->mnt_mp, q); 1776 attach_mnt(q, parent, p->mnt_mp);
1744 if (!list_empty(&parent->mnt_mounts)) {
1745 t = list_last_entry(&parent->mnt_mounts,
1746 struct mount, mnt_child);
1747 if (t->mnt_mp != p->mnt_mp)
1748 t = NULL;
1749 }
1750 attach_shadowed(q, parent, t);
1751 unlock_mount_hash(); 1777 unlock_mount_hash();
1752 } 1778 }
1753 } 1779 }
@@ -1856,6 +1882,28 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
1856 return 0; 1882 return 0;
1857} 1883}
1858 1884
1885int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
1886{
1887 unsigned int max = READ_ONCE(sysctl_mount_max);
1888 unsigned int mounts = 0, old, pending, sum;
1889 struct mount *p;
1890
1891 for (p = mnt; p; p = next_mnt(p, mnt))
1892 mounts++;
1893
1894 old = ns->mounts;
1895 pending = ns->pending_mounts;
1896 sum = old + pending;
1897 if ((old > sum) ||
1898 (pending > sum) ||
1899 (max < sum) ||
1900 (mounts > (max - sum)))
1901 return -ENOSPC;
1902
1903 ns->pending_mounts = pending + mounts;
1904 return 0;
1905}
1906
1859/* 1907/*
1860 * @source_mnt : mount tree to be attached 1908 * @source_mnt : mount tree to be attached
1861 * @nd : place the mount tree @source_mnt is attached 1909 * @nd : place the mount tree @source_mnt is attached
@@ -1925,10 +1973,26 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1925 struct path *parent_path) 1973 struct path *parent_path)
1926{ 1974{
1927 HLIST_HEAD(tree_list); 1975 HLIST_HEAD(tree_list);
1976 struct mnt_namespace *ns = dest_mnt->mnt_ns;
1977 struct mountpoint *smp;
1928 struct mount *child, *p; 1978 struct mount *child, *p;
1929 struct hlist_node *n; 1979 struct hlist_node *n;
1930 int err; 1980 int err;
1931 1981
1982 /* Preallocate a mountpoint in case the new mounts need
1983 * to be tucked under other mounts.
1984 */
1985 smp = get_mountpoint(source_mnt->mnt.mnt_root);
1986 if (IS_ERR(smp))
1987 return PTR_ERR(smp);
1988
1989 /* Is there space to add these mounts to the mount namespace? */
1990 if (!parent_path) {
1991 err = count_mounts(ns, source_mnt);
1992 if (err)
1993 goto out;
1994 }
1995
1932 if (IS_MNT_SHARED(dest_mnt)) { 1996 if (IS_MNT_SHARED(dest_mnt)) {
1933 err = invent_group_ids(source_mnt, true); 1997 err = invent_group_ids(source_mnt, true);
1934 if (err) 1998 if (err)
@@ -1948,16 +2012,19 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1948 touch_mnt_namespace(source_mnt->mnt_ns); 2012 touch_mnt_namespace(source_mnt->mnt_ns);
1949 } else { 2013 } else {
1950 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); 2014 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
1951 commit_tree(source_mnt, NULL); 2015 commit_tree(source_mnt);
1952 } 2016 }
1953 2017
1954 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { 2018 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
1955 struct mount *q; 2019 struct mount *q;
1956 hlist_del_init(&child->mnt_hash); 2020 hlist_del_init(&child->mnt_hash);
1957 q = __lookup_mnt_last(&child->mnt_parent->mnt, 2021 q = __lookup_mnt(&child->mnt_parent->mnt,
1958 child->mnt_mountpoint); 2022 child->mnt_mountpoint);
1959 commit_tree(child, q); 2023 if (q)
2024 mnt_change_mountpoint(child, smp, q);
2025 commit_tree(child);
1960 } 2026 }
2027 put_mountpoint(smp);
1961 unlock_mount_hash(); 2028 unlock_mount_hash();
1962 2029
1963 return 0; 2030 return 0;
@@ -1965,11 +2032,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1965 out_cleanup_ids: 2032 out_cleanup_ids:
1966 while (!hlist_empty(&tree_list)) { 2033 while (!hlist_empty(&tree_list)) {
1967 child = hlist_entry(tree_list.first, struct mount, mnt_hash); 2034 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
2035 child->mnt_parent->mnt_ns->pending_mounts = 0;
1968 umount_tree(child, UMOUNT_SYNC); 2036 umount_tree(child, UMOUNT_SYNC);
1969 } 2037 }
1970 unlock_mount_hash(); 2038 unlock_mount_hash();
1971 cleanup_group_ids(source_mnt, NULL); 2039 cleanup_group_ids(source_mnt, NULL);
1972 out: 2040 out:
2041 ns->pending_mounts = 0;
2042
2043 read_seqlock_excl(&mount_lock);
2044 put_mountpoint(smp);
2045 read_sequnlock_excl(&mount_lock);
2046
1973 return err; 2047 return err;
1974} 2048}
1975 2049
@@ -2234,8 +2308,14 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
2234 err = change_mount_flags(path->mnt, flags); 2308 err = change_mount_flags(path->mnt, flags);
2235 else if (!capable(CAP_SYS_ADMIN)) 2309 else if (!capable(CAP_SYS_ADMIN))
2236 err = -EPERM; 2310 err = -EPERM;
2237 else 2311 else {
2238 err = do_remount_sb(sb, flags, data, 0); 2312 err = do_remount_sb2(path->mnt, sb, flags, data, 0);
2313 namespace_lock();
2314 lock_mount_hash();
2315 propagate_remount(mnt);
2316 unlock_mount_hash();
2317 namespace_unlock();
2318 }
2239 if (!err) { 2319 if (!err) {
2240 lock_mount_hash(); 2320 lock_mount_hash();
2241 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; 2321 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
@@ -2795,6 +2875,8 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2795 init_waitqueue_head(&new_ns->poll); 2875 init_waitqueue_head(&new_ns->poll);
2796 new_ns->event = 0; 2876 new_ns->event = 0;
2797 new_ns->user_ns = get_user_ns(user_ns); 2877 new_ns->user_ns = get_user_ns(user_ns);
2878 new_ns->mounts = 0;
2879 new_ns->pending_mounts = 0;
2798 return new_ns; 2880 return new_ns;
2799} 2881}
2800 2882
@@ -2844,6 +2926,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2844 q = new; 2926 q = new;
2845 while (p) { 2927 while (p) {
2846 q->mnt_ns = new_ns; 2928 q->mnt_ns = new_ns;
2929 new_ns->mounts++;
2847 if (new_fs) { 2930 if (new_fs) {
2848 if (&p->mnt == new_fs->root.mnt) { 2931 if (&p->mnt == new_fs->root.mnt) {
2849 new_fs->root.mnt = mntget(&q->mnt); 2932 new_fs->root.mnt = mntget(&q->mnt);
@@ -2882,6 +2965,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2882 struct mount *mnt = real_mount(m); 2965 struct mount *mnt = real_mount(m);
2883 mnt->mnt_ns = new_ns; 2966 mnt->mnt_ns = new_ns;
2884 new_ns->root = mnt; 2967 new_ns->root = mnt;
2968 new_ns->mounts++;
2885 list_add(&mnt->mnt_list, &new_ns->list); 2969 list_add(&mnt->mnt_list, &new_ns->list);
2886 } else { 2970 } else {
2887 mntput(m); 2971 mntput(m);
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index f31fd0dd92c6..b1daeafbea92 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -121,6 +121,7 @@ config PNFS_FILE_LAYOUT
121config PNFS_BLOCK 121config PNFS_BLOCK
122 tristate 122 tristate
123 depends on NFS_V4_1 && BLK_DEV_DM 123 depends on NFS_V4_1 && BLK_DEV_DM
124 depends on 64BIT || LBDAF
124 default NFS_V4 125 default NFS_V4
125 126
126config PNFS_OBJLAYOUT 127config PNFS_OBJLAYOUT
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 52ee0b73ab4a..348e0a05bd18 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1135,11 +1135,13 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1135 /* Force a full look up iff the parent directory has changed */ 1135 /* Force a full look up iff the parent directory has changed */
1136 if (!nfs_is_exclusive_create(dir, flags) && 1136 if (!nfs_is_exclusive_create(dir, flags) &&
1137 nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) { 1137 nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
1138 1138 error = nfs_lookup_verify_inode(inode, flags);
1139 if (nfs_lookup_verify_inode(inode, flags)) { 1139 if (error) {
1140 if (flags & LOOKUP_RCU) 1140 if (flags & LOOKUP_RCU)
1141 return -ECHILD; 1141 return -ECHILD;
1142 goto out_zap_parent; 1142 if (error == -ESTALE)
1143 goto out_zap_parent;
1144 goto out_error;
1143 } 1145 }
1144 goto out_valid; 1146 goto out_valid;
1145 } 1147 }
@@ -1163,8 +1165,10 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1163 trace_nfs_lookup_revalidate_enter(dir, dentry, flags); 1165 trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
1164 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); 1166 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
1165 trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error); 1167 trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
1166 if (error) 1168 if (error == -ESTALE || error == -ENOENT)
1167 goto out_bad; 1169 goto out_bad;
1170 if (error)
1171 goto out_error;
1168 if (nfs_compare_fh(NFS_FH(inode), fhandle)) 1172 if (nfs_compare_fh(NFS_FH(inode), fhandle))
1169 goto out_bad; 1173 goto out_bad;
1170 if ((error = nfs_refresh_inode(inode, fattr)) != 0) 1174 if ((error = nfs_refresh_inode(inode, fattr)) != 0)
@@ -2421,6 +2425,20 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
2421} 2425}
2422EXPORT_SYMBOL_GPL(nfs_may_open); 2426EXPORT_SYMBOL_GPL(nfs_may_open);
2423 2427
2428static int nfs_execute_ok(struct inode *inode, int mask)
2429{
2430 struct nfs_server *server = NFS_SERVER(inode);
2431 int ret;
2432
2433 if (mask & MAY_NOT_BLOCK)
2434 ret = nfs_revalidate_inode_rcu(server, inode);
2435 else
2436 ret = nfs_revalidate_inode(server, inode);
2437 if (ret == 0 && !execute_ok(inode))
2438 ret = -EACCES;
2439 return ret;
2440}
2441
2424int nfs_permission(struct inode *inode, int mask) 2442int nfs_permission(struct inode *inode, int mask)
2425{ 2443{
2426 struct rpc_cred *cred; 2444 struct rpc_cred *cred;
@@ -2438,6 +2456,9 @@ int nfs_permission(struct inode *inode, int mask)
2438 case S_IFLNK: 2456 case S_IFLNK:
2439 goto out; 2457 goto out;
2440 case S_IFREG: 2458 case S_IFREG:
2459 if ((mask & MAY_OPEN) &&
2460 nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN))
2461 return 0;
2441 break; 2462 break;
2442 case S_IFDIR: 2463 case S_IFDIR:
2443 /* 2464 /*
@@ -2470,8 +2491,8 @@ force_lookup:
2470 res = PTR_ERR(cred); 2491 res = PTR_ERR(cred);
2471 } 2492 }
2472out: 2493out:
2473 if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) 2494 if (!res && (mask & MAY_EXEC))
2474 res = -EACCES; 2495 res = nfs_execute_ok(inode, mask);
2475 2496
2476 dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n", 2497 dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
2477 inode->i_sb->s_id, inode->i_ino, mask, res); 2498 inode->i_sb->s_id, inode->i_ino, mask, res);
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e125e55de86d..2603d7589946 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -30,6 +30,7 @@ void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds)
30{ 30{
31 nfs4_print_deviceid(&mirror_ds->id_node.deviceid); 31 nfs4_print_deviceid(&mirror_ds->id_node.deviceid);
32 nfs4_pnfs_ds_put(mirror_ds->ds); 32 nfs4_pnfs_ds_put(mirror_ds->ds);
33 kfree(mirror_ds->ds_versions);
33 kfree_rcu(mirror_ds, id_node.rcu); 34 kfree_rcu(mirror_ds, id_node.rcu);
34} 35}
35 36
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f714b98cfd74..668ac19af58f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1241,9 +1241,9 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
1241 return 0; 1241 return 0;
1242 /* Has the inode gone and changed behind our back? */ 1242 /* Has the inode gone and changed behind our back? */
1243 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) 1243 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
1244 return -EIO; 1244 return -ESTALE;
1245 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) 1245 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
1246 return -EIO; 1246 return -ESTALE;
1247 1247
1248 if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && 1248 if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
1249 inode->i_version != fattr->change_attr) 1249 inode->i_version != fattr->change_attr)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3c69299c01ab..8e425f2c5ddd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2188,8 +2188,6 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
2188 if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0) 2188 if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0)
2189 return 0; 2189 return 0;
2190 2190
2191 /* even though OPEN succeeded, access is denied. Close the file */
2192 nfs4_close_state(state, fmode);
2193 return -EACCES; 2191 return -EACCES;
2194} 2192}
2195 2193
@@ -2422,7 +2420,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata,
2422 sattr->ia_valid |= ATTR_MTIME; 2420 sattr->ia_valid |= ATTR_MTIME;
2423 2421
2424 /* Except MODE, it seems harmless of setting twice. */ 2422 /* Except MODE, it seems harmless of setting twice. */
2425 if ((attrset[1] & FATTR4_WORD1_MODE)) 2423 if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE &&
2424 attrset[1] & FATTR4_WORD1_MODE)
2426 sattr->ia_valid &= ~ATTR_MODE; 2425 sattr->ia_valid &= ~ATTR_MODE;
2427 2426
2428 if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL) 2427 if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL)
@@ -2451,6 +2450,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
2451 ret = PTR_ERR(state); 2450 ret = PTR_ERR(state);
2452 if (IS_ERR(state)) 2451 if (IS_ERR(state))
2453 goto out; 2452 goto out;
2453 ctx->state = state;
2454 if (server->caps & NFS_CAP_POSIX_LOCK) 2454 if (server->caps & NFS_CAP_POSIX_LOCK)
2455 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); 2455 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
2456 2456
@@ -2473,7 +2473,6 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
2473 if (ret != 0) 2473 if (ret != 0)
2474 goto out; 2474 goto out;
2475 2475
2476 ctx->state = state;
2477 if (d_inode(dentry) == state->inode) { 2476 if (d_inode(dentry) == state->inode) {
2478 nfs_inode_attach_open_context(ctx); 2477 nfs_inode_attach_open_context(ctx);
2479 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) 2478 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
@@ -4710,7 +4709,7 @@ out:
4710 */ 4709 */
4711static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) 4710static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
4712{ 4711{
4713 struct page *pages[NFS4ACL_MAXPAGES] = {NULL, }; 4712 struct page *pages[NFS4ACL_MAXPAGES + 1] = {NULL, };
4714 struct nfs_getaclargs args = { 4713 struct nfs_getaclargs args = {
4715 .fh = NFS_FH(inode), 4714 .fh = NFS_FH(inode),
4716 .acl_pages = pages, 4715 .acl_pages = pages,
@@ -4724,13 +4723,9 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
4724 .rpc_argp = &args, 4723 .rpc_argp = &args,
4725 .rpc_resp = &res, 4724 .rpc_resp = &res,
4726 }; 4725 };
4727 unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); 4726 unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1;
4728 int ret = -ENOMEM, i; 4727 int ret = -ENOMEM, i;
4729 4728
4730 /* As long as we're doing a round trip to the server anyway,
4731 * let's be prepared for a page of acl data. */
4732 if (npages == 0)
4733 npages = 1;
4734 if (npages > ARRAY_SIZE(pages)) 4729 if (npages > ARRAY_SIZE(pages))
4735 return -ERANGE; 4730 return -ERANGE;
4736 4731
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 82dc3035ea45..e8d1d6c5000c 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1072,6 +1072,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
1072 case -NFS4ERR_BADXDR: 1072 case -NFS4ERR_BADXDR:
1073 case -NFS4ERR_RESOURCE: 1073 case -NFS4ERR_RESOURCE:
1074 case -NFS4ERR_NOFILEHANDLE: 1074 case -NFS4ERR_NOFILEHANDLE:
1075 case -NFS4ERR_MOVED:
1075 /* Non-seqid mutating errors */ 1076 /* Non-seqid mutating errors */
1076 return; 1077 return;
1077 }; 1078 };
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e4441216804..1cb50bb898b0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2487,7 +2487,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
2487 encode_compound_hdr(xdr, req, &hdr); 2487 encode_compound_hdr(xdr, req, &hdr);
2488 encode_sequence(xdr, &args->seq_args, &hdr); 2488 encode_sequence(xdr, &args->seq_args, &hdr);
2489 encode_putfh(xdr, args->fh, &hdr); 2489 encode_putfh(xdr, args->fh, &hdr);
2490 replen = hdr.replen + op_decode_hdr_maxsz + 1; 2490 replen = hdr.replen + op_decode_hdr_maxsz;
2491 encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); 2491 encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
2492 2492
2493 xdr_inline_pages(&req->rq_rcv_buf, replen << 2, 2493 xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index c29d9421bd5e..0976f8dad4ce 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -50,7 +50,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
50{ 50{
51 struct nfsd4_layout_seg *seg = &args->lg_seg; 51 struct nfsd4_layout_seg *seg = &args->lg_seg;
52 struct super_block *sb = inode->i_sb; 52 struct super_block *sb = inode->i_sb;
53 u32 block_size = (1 << inode->i_blkbits); 53 u32 block_size = i_blocksize(inode);
54 struct pnfs_block_extent *bex; 54 struct pnfs_block_extent *bex;
55 struct iomap iomap; 55 struct iomap iomap;
56 u32 device_generation = 0; 56 u32 device_generation = 0;
@@ -151,7 +151,7 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
151 int error; 151 int error;
152 152
153 nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, 153 nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
154 lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits); 154 lcp->lc_up_len, &iomaps, i_blocksize(inode));
155 if (nr_iomaps < 0) 155 if (nr_iomaps < 0)
156 return nfserrno(nr_iomaps); 156 return nfserrno(nr_iomaps);
157 157
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 00575d776d91..7162ab7bc093 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -358,6 +358,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
358{ 358{
359 unsigned int len, v, hdr, dlen; 359 unsigned int len, v, hdr, dlen;
360 u32 max_blocksize = svc_max_payload(rqstp); 360 u32 max_blocksize = svc_max_payload(rqstp);
361 struct kvec *head = rqstp->rq_arg.head;
361 362
362 p = decode_fh(p, &args->fh); 363 p = decode_fh(p, &args->fh);
363 if (!p) 364 if (!p)
@@ -367,6 +368,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
367 args->count = ntohl(*p++); 368 args->count = ntohl(*p++);
368 args->stable = ntohl(*p++); 369 args->stable = ntohl(*p++);
369 len = args->len = ntohl(*p++); 370 len = args->len = ntohl(*p++);
371 if ((void *)p > head->iov_base + head->iov_len)
372 return 0;
370 /* 373 /*
371 * The count must equal the amount of data passed. 374 * The count must equal the amount of data passed.
372 */ 375 */
@@ -377,9 +380,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
377 * Check to make sure that we got the right number of 380 * Check to make sure that we got the right number of
378 * bytes. 381 * bytes.
379 */ 382 */
380 hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; 383 hdr = (void*)p - head->iov_base;
381 dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len 384 dlen = head->iov_len + rqstp->rq_arg.page_len - hdr;
382 - hdr;
383 /* 385 /*
384 * Round the length of the data which was specified up to 386 * Round the length of the data which was specified up to
385 * the next multiple of XDR units and then compare that 387 * the next multiple of XDR units and then compare that
@@ -396,7 +398,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
396 len = args->len = max_blocksize; 398 len = args->len = max_blocksize;
397 } 399 }
398 rqstp->rq_vec[0].iov_base = (void*)p; 400 rqstp->rq_vec[0].iov_base = (void*)p;
399 rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; 401 rqstp->rq_vec[0].iov_len = head->iov_len - hdr;
400 v = 0; 402 v = 0;
401 while (len > rqstp->rq_vec[v].iov_len) { 403 while (len > rqstp->rq_vec[v].iov_len) {
402 len -= rqstp->rq_vec[v].iov_len; 404 len -= rqstp->rq_vec[v].iov_len;
@@ -471,6 +473,8 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
471 /* first copy and check from the first page */ 473 /* first copy and check from the first page */
472 old = (char*)p; 474 old = (char*)p;
473 vec = &rqstp->rq_arg.head[0]; 475 vec = &rqstp->rq_arg.head[0];
476 if ((void *)old > vec->iov_base + vec->iov_len)
477 return 0;
474 avail = vec->iov_len - (old - (char*)vec->iov_base); 478 avail = vec->iov_len - (old - (char*)vec->iov_base);
475 while (len && avail && *old) { 479 while (len && avail && *old) {
476 *new++ = *old++; 480 *new++ = *old++;
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index c9d6c715c0fb..9eed219f57a5 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -189,10 +189,11 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
189 struct nfs4_layout_stateid *ls; 189 struct nfs4_layout_stateid *ls;
190 struct nfs4_stid *stp; 190 struct nfs4_stid *stp;
191 191
192 stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache); 192 stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache,
193 nfsd4_free_layout_stateid);
193 if (!stp) 194 if (!stp)
194 return NULL; 195 return NULL;
195 stp->sc_free = nfsd4_free_layout_stateid; 196
196 get_nfs4_file(fp); 197 get_nfs4_file(fp);
197 stp->sc_file = fp; 198 stp->sc_file = fp;
198 199
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 7d5351cd67fb..209dbfc50cd4 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1690,6 +1690,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1690 opdesc->op_get_currentstateid(cstate, &op->u); 1690 opdesc->op_get_currentstateid(cstate, &op->u);
1691 op->status = opdesc->op_func(rqstp, cstate, &op->u); 1691 op->status = opdesc->op_func(rqstp, cstate, &op->u);
1692 1692
1693 /* Only from SEQUENCE */
1694 if (cstate->status == nfserr_replay_cache) {
1695 dprintk("%s NFS4.1 replay from cache\n", __func__);
1696 status = op->status;
1697 goto out;
1698 }
1693 if (!op->status) { 1699 if (!op->status) {
1694 if (opdesc->op_set_currentstateid) 1700 if (opdesc->op_set_currentstateid)
1695 opdesc->op_set_currentstateid(cstate, &op->u); 1701 opdesc->op_set_currentstateid(cstate, &op->u);
@@ -1700,14 +1706,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1700 if (need_wrongsec_check(rqstp)) 1706 if (need_wrongsec_check(rqstp))
1701 op->status = check_nfsd_access(current_fh->fh_export, rqstp); 1707 op->status = check_nfsd_access(current_fh->fh_export, rqstp);
1702 } 1708 }
1703
1704encode_op: 1709encode_op:
1705 /* Only from SEQUENCE */
1706 if (cstate->status == nfserr_replay_cache) {
1707 dprintk("%s NFS4.1 replay from cache\n", __func__);
1708 status = op->status;
1709 goto out;
1710 }
1711 if (op->status == nfserr_replay_me) { 1710 if (op->status == nfserr_replay_me) {
1712 op->replay = &cstate->replay_owner->so_replay; 1711 op->replay = &cstate->replay_owner->so_replay;
1713 nfsd4_encode_replay(&resp->xdr, op); 1712 nfsd4_encode_replay(&resp->xdr, op);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 55638110cb06..c7f1ce41442a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -553,8 +553,8 @@ out:
553 return co; 553 return co;
554} 554}
555 555
556struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, 556struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
557 struct kmem_cache *slab) 557 void (*sc_free)(struct nfs4_stid *))
558{ 558{
559 struct nfs4_stid *stid; 559 struct nfs4_stid *stid;
560 int new_id; 560 int new_id;
@@ -570,6 +570,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
570 idr_preload_end(); 570 idr_preload_end();
571 if (new_id < 0) 571 if (new_id < 0)
572 goto out_free; 572 goto out_free;
573
574 stid->sc_free = sc_free;
573 stid->sc_client = cl; 575 stid->sc_client = cl;
574 stid->sc_stateid.si_opaque.so_id = new_id; 576 stid->sc_stateid.si_opaque.so_id = new_id;
575 stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; 577 stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
@@ -595,15 +597,12 @@ out_free:
595static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) 597static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
596{ 598{
597 struct nfs4_stid *stid; 599 struct nfs4_stid *stid;
598 struct nfs4_ol_stateid *stp;
599 600
600 stid = nfs4_alloc_stid(clp, stateid_slab); 601 stid = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_ol_stateid);
601 if (!stid) 602 if (!stid)
602 return NULL; 603 return NULL;
603 604
604 stp = openlockstateid(stid); 605 return openlockstateid(stid);
605 stp->st_stid.sc_free = nfs4_free_ol_stateid;
606 return stp;
607} 606}
608 607
609static void nfs4_free_deleg(struct nfs4_stid *stid) 608static void nfs4_free_deleg(struct nfs4_stid *stid)
@@ -701,11 +700,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh,
701 goto out_dec; 700 goto out_dec;
702 if (delegation_blocked(&current_fh->fh_handle)) 701 if (delegation_blocked(&current_fh->fh_handle))
703 goto out_dec; 702 goto out_dec;
704 dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); 703 dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg));
705 if (dp == NULL) 704 if (dp == NULL)
706 goto out_dec; 705 goto out_dec;
707 706
708 dp->dl_stid.sc_free = nfs4_free_deleg;
709 /* 707 /*
710 * delegation seqid's are never incremented. The 4.1 special 708 * delegation seqid's are never incremented. The 4.1 special
711 * meaning of seqid 0 isn't meaningful, really, but let's avoid 709 * meaning of seqid 0 isn't meaningful, really, but let's avoid
@@ -5396,7 +5394,6 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
5396 stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); 5394 stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner);
5397 get_nfs4_file(fp); 5395 get_nfs4_file(fp);
5398 stp->st_stid.sc_file = fp; 5396 stp->st_stid.sc_file = fp;
5399 stp->st_stid.sc_free = nfs4_free_lock_stateid;
5400 stp->st_access_bmap = 0; 5397 stp->st_access_bmap = 0;
5401 stp->st_deny_bmap = open_stp->st_deny_bmap; 5398 stp->st_deny_bmap = open_stp->st_deny_bmap;
5402 stp->st_openstp = open_stp; 5399 stp->st_openstp = open_stp;
@@ -5439,7 +5436,7 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi,
5439 lst = find_lock_stateid(lo, fi); 5436 lst = find_lock_stateid(lo, fi);
5440 if (lst == NULL) { 5437 if (lst == NULL) {
5441 spin_unlock(&clp->cl_lock); 5438 spin_unlock(&clp->cl_lock);
5442 ns = nfs4_alloc_stid(clp, stateid_slab); 5439 ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid);
5443 if (ns == NULL) 5440 if (ns == NULL)
5444 return NULL; 5441 return NULL;
5445 5442
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 12935209deca..3f68a25f2169 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2753,9 +2753,16 @@ out_acl:
2753 } 2753 }
2754#endif /* CONFIG_NFSD_PNFS */ 2754#endif /* CONFIG_NFSD_PNFS */
2755 if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { 2755 if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
2756 status = nfsd4_encode_bitmap(xdr, NFSD_SUPPATTR_EXCLCREAT_WORD0, 2756 u32 supp[3];
2757 NFSD_SUPPATTR_EXCLCREAT_WORD1, 2757
2758 NFSD_SUPPATTR_EXCLCREAT_WORD2); 2758 supp[0] = nfsd_suppattrs0(minorversion);
2759 supp[1] = nfsd_suppattrs1(minorversion);
2760 supp[2] = nfsd_suppattrs2(minorversion);
2761 supp[0] &= NFSD_SUPPATTR_EXCLCREAT_WORD0;
2762 supp[1] &= NFSD_SUPPATTR_EXCLCREAT_WORD1;
2763 supp[2] &= NFSD_SUPPATTR_EXCLCREAT_WORD2;
2764
2765 status = nfsd4_encode_bitmap(xdr, supp[0], supp[1], supp[2]);
2759 if (status) 2766 if (status)
2760 goto out; 2767 goto out;
2761 } 2768 }
@@ -4041,8 +4048,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
4041 struct nfsd4_getdeviceinfo *gdev) 4048 struct nfsd4_getdeviceinfo *gdev)
4042{ 4049{
4043 struct xdr_stream *xdr = &resp->xdr; 4050 struct xdr_stream *xdr = &resp->xdr;
4044 const struct nfsd4_layout_ops *ops = 4051 const struct nfsd4_layout_ops *ops;
4045 nfsd4_layout_ops[gdev->gd_layout_type];
4046 u32 starting_len = xdr->buf->len, needed_len; 4052 u32 starting_len = xdr->buf->len, needed_len;
4047 __be32 *p; 4053 __be32 *p;
4048 4054
@@ -4059,6 +4065,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
4059 4065
4060 /* If maxcount is 0 then just update notifications */ 4066 /* If maxcount is 0 then just update notifications */
4061 if (gdev->gd_maxcount != 0) { 4067 if (gdev->gd_maxcount != 0) {
4068 ops = nfsd4_layout_ops[gdev->gd_layout_type];
4062 nfserr = ops->encode_getdeviceinfo(xdr, gdev); 4069 nfserr = ops->encode_getdeviceinfo(xdr, gdev);
4063 if (nfserr) { 4070 if (nfserr) {
4064 /* 4071 /*
@@ -4111,8 +4118,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
4111 struct nfsd4_layoutget *lgp) 4118 struct nfsd4_layoutget *lgp)
4112{ 4119{
4113 struct xdr_stream *xdr = &resp->xdr; 4120 struct xdr_stream *xdr = &resp->xdr;
4114 const struct nfsd4_layout_ops *ops = 4121 const struct nfsd4_layout_ops *ops;
4115 nfsd4_layout_ops[lgp->lg_layout_type];
4116 __be32 *p; 4122 __be32 *p;
4117 4123
4118 dprintk("%s: err %d\n", __func__, nfserr); 4124 dprintk("%s: err %d\n", __func__, nfserr);
@@ -4135,6 +4141,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
4135 *p++ = cpu_to_be32(lgp->lg_seg.iomode); 4141 *p++ = cpu_to_be32(lgp->lg_seg.iomode);
4136 *p++ = cpu_to_be32(lgp->lg_layout_type); 4142 *p++ = cpu_to_be32(lgp->lg_layout_type);
4137 4143
4144 ops = nfsd4_layout_ops[lgp->lg_layout_type];
4138 nfserr = ops->encode_layoutget(xdr, lgp); 4145 nfserr = ops->encode_layoutget(xdr, lgp);
4139out: 4146out:
4140 kfree(lgp->lg_content); 4147 kfree(lgp->lg_content);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index ad4e2377dd63..5be1fa6b676d 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -656,6 +656,37 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr)
656 return nfserr; 656 return nfserr;
657} 657}
658 658
659/*
660 * A write procedure can have a large argument, and a read procedure can
661 * have a large reply, but no NFSv2 or NFSv3 procedure has argument and
662 * reply that can both be larger than a page. The xdr code has taken
663 * advantage of this assumption to be a sloppy about bounds checking in
664 * some cases. Pending a rewrite of the NFSv2/v3 xdr code to fix that
665 * problem, we enforce these assumptions here:
666 */
667static bool nfs_request_too_big(struct svc_rqst *rqstp,
668 struct svc_procedure *proc)
669{
670 /*
671 * The ACL code has more careful bounds-checking and is not
672 * susceptible to this problem:
673 */
674 if (rqstp->rq_prog != NFS_PROGRAM)
675 return false;
676 /*
677 * Ditto NFSv4 (which can in theory have argument and reply both
678 * more than a page):
679 */
680 if (rqstp->rq_vers >= 4)
681 return false;
682 /* The reply will be small, we're OK: */
683 if (proc->pc_xdrressize > 0 &&
684 proc->pc_xdrressize < XDR_QUADLEN(PAGE_SIZE))
685 return false;
686
687 return rqstp->rq_arg.len > PAGE_SIZE;
688}
689
659int 690int
660nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) 691nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
661{ 692{
@@ -668,6 +699,11 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
668 rqstp->rq_vers, rqstp->rq_proc); 699 rqstp->rq_vers, rqstp->rq_proc);
669 proc = rqstp->rq_procinfo; 700 proc = rqstp->rq_procinfo;
670 701
702 if (nfs_request_too_big(rqstp, proc)) {
703 dprintk("nfsd: NFSv%d argument too large\n", rqstp->rq_vers);
704 *statp = rpc_garbage_args;
705 return 1;
706 }
671 /* 707 /*
672 * Give the xdr decoder a chance to change this if it wants 708 * Give the xdr decoder a chance to change this if it wants
673 * (necessary in the NFSv4.0 compound case) 709 * (necessary in the NFSv4.0 compound case)
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 79d964aa8079..bf913201a6ad 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -280,6 +280,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
280 struct nfsd_writeargs *args) 280 struct nfsd_writeargs *args)
281{ 281{
282 unsigned int len, hdr, dlen; 282 unsigned int len, hdr, dlen;
283 struct kvec *head = rqstp->rq_arg.head;
283 int v; 284 int v;
284 285
285 p = decode_fh(p, &args->fh); 286 p = decode_fh(p, &args->fh);
@@ -300,9 +301,10 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
300 * Check to make sure that we got the right number of 301 * Check to make sure that we got the right number of
301 * bytes. 302 * bytes.
302 */ 303 */
303 hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; 304 hdr = (void*)p - head->iov_base;
304 dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len 305 if (hdr > head->iov_len)
305 - hdr; 306 return 0;
307 dlen = head->iov_len + rqstp->rq_arg.page_len - hdr;
306 308
307 /* 309 /*
308 * Round the length of the data which was specified up to 310 * Round the length of the data which was specified up to
@@ -316,7 +318,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
316 return 0; 318 return 0;
317 319
318 rqstp->rq_vec[0].iov_base = (void*)p; 320 rqstp->rq_vec[0].iov_base = (void*)p;
319 rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; 321 rqstp->rq_vec[0].iov_len = head->iov_len - hdr;
320 v = 0; 322 v = 0;
321 while (len > rqstp->rq_vec[v].iov_len) { 323 while (len > rqstp->rq_vec[v].iov_len) {
322 len -= rqstp->rq_vec[v].iov_len; 324 len -= rqstp->rq_vec[v].iov_len;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 77860b75da9d..5134eedcb16c 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -583,8 +583,8 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
583__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, 583__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
584 stateid_t *stateid, unsigned char typemask, 584 stateid_t *stateid, unsigned char typemask,
585 struct nfs4_stid **s, struct nfsd_net *nn); 585 struct nfs4_stid **s, struct nfsd_net *nn);
586struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, 586struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
587 struct kmem_cache *slab); 587 void (*sc_free)(struct nfs4_stid *));
588void nfs4_unhash_stid(struct nfs4_stid *s); 588void nfs4_unhash_stid(struct nfs4_stid *s);
589void nfs4_put_stid(struct nfs4_stid *s); 589void nfs4_put_stid(struct nfs4_stid *s);
590void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); 590void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 994d66fbb446..91e0c5429b4d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -369,7 +369,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
369 __be32 err; 369 __be32 err;
370 int host_err; 370 int host_err;
371 bool get_write_count; 371 bool get_write_count;
372 int size_change = 0; 372 bool size_change = (iap->ia_valid & ATTR_SIZE);
373 373
374 if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) 374 if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
375 accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; 375 accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
@@ -382,11 +382,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
382 /* Get inode */ 382 /* Get inode */
383 err = fh_verify(rqstp, fhp, ftype, accmode); 383 err = fh_verify(rqstp, fhp, ftype, accmode);
384 if (err) 384 if (err)
385 goto out; 385 return err;
386 if (get_write_count) { 386 if (get_write_count) {
387 host_err = fh_want_write(fhp); 387 host_err = fh_want_write(fhp);
388 if (host_err) 388 if (host_err)
389 return nfserrno(host_err); 389 goto out;
390 } 390 }
391 391
392 dentry = fhp->fh_dentry; 392 dentry = fhp->fh_dentry;
@@ -397,20 +397,28 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
397 iap->ia_valid &= ~ATTR_MODE; 397 iap->ia_valid &= ~ATTR_MODE;
398 398
399 if (!iap->ia_valid) 399 if (!iap->ia_valid)
400 goto out; 400 return 0;
401 401
402 nfsd_sanitize_attrs(inode, iap); 402 nfsd_sanitize_attrs(inode, iap);
403 403
404 if (check_guard && guardtime != inode->i_ctime.tv_sec)
405 return nfserr_notsync;
406
404 /* 407 /*
405 * The size case is special, it changes the file in addition to the 408 * The size case is special, it changes the file in addition to the
406 * attributes. 409 * attributes, and file systems don't expect it to be mixed with
410 * "random" attribute changes. We thus split out the size change
411 * into a separate call to ->setattr, and do the rest as a separate
412 * setattr call.
407 */ 413 */
408 if (iap->ia_valid & ATTR_SIZE) { 414 if (size_change) {
409 err = nfsd_get_write_access(rqstp, fhp, iap); 415 err = nfsd_get_write_access(rqstp, fhp, iap);
410 if (err) 416 if (err)
411 goto out; 417 return err;
412 size_change = 1; 418 }
413 419
420 fh_lock(fhp);
421 if (size_change) {
414 /* 422 /*
415 * RFC5661, Section 18.30.4: 423 * RFC5661, Section 18.30.4:
416 * Changing the size of a file with SETATTR indirectly 424 * Changing the size of a file with SETATTR indirectly
@@ -418,29 +426,36 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
418 * 426 *
419 * (and similar for the older RFCs) 427 * (and similar for the older RFCs)
420 */ 428 */
421 if (iap->ia_size != i_size_read(inode)) 429 struct iattr size_attr = {
422 iap->ia_valid |= ATTR_MTIME; 430 .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
423 } 431 .ia_size = iap->ia_size,
432 };
424 433
425 iap->ia_valid |= ATTR_CTIME; 434 host_err = notify_change(dentry, &size_attr, NULL);
435 if (host_err)
436 goto out_unlock;
437 iap->ia_valid &= ~ATTR_SIZE;
426 438
427 if (check_guard && guardtime != inode->i_ctime.tv_sec) { 439 /*
428 err = nfserr_notsync; 440 * Avoid the additional setattr call below if the only other
429 goto out_put_write_access; 441 * attribute that the client sends is the mtime, as we update
442 * it as part of the size change above.
443 */
444 if ((iap->ia_valid & ~ATTR_MTIME) == 0)
445 goto out_unlock;
430 } 446 }
431 447
432 fh_lock(fhp); 448 iap->ia_valid |= ATTR_CTIME;
433 host_err = notify_change(dentry, iap, NULL); 449 host_err = notify_change(dentry, iap, NULL);
434 fh_unlock(fhp);
435 err = nfserrno(host_err);
436 450
437out_put_write_access: 451out_unlock:
452 fh_unlock(fhp);
438 if (size_change) 453 if (size_change)
439 put_write_access(inode); 454 put_write_access(inode);
440 if (!err)
441 err = nfserrno(commit_metadata(fhp));
442out: 455out:
443 return err; 456 if (!host_err)
457 host_err = commit_metadata(fhp);
458 return nfserrno(host_err);
444} 459}
445 460
446#if defined(CONFIG_NFSD_V4) 461#if defined(CONFIG_NFSD_V4)
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index a35ae35e6932..cd39b57288c2 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -55,7 +55,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
55 brelse(bh); 55 brelse(bh);
56 BUG(); 56 BUG();
57 } 57 }
58 memset(bh->b_data, 0, 1 << inode->i_blkbits); 58 memset(bh->b_data, 0, i_blocksize(inode));
59 bh->b_bdev = inode->i_sb->s_bdev; 59 bh->b_bdev = inode->i_sb->s_bdev;
60 bh->b_blocknr = blocknr; 60 bh->b_blocknr = blocknr;
61 set_buffer_mapped(bh); 61 set_buffer_mapped(bh);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index ac2f64943ff4..00877ef0b120 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -55,7 +55,7 @@ void nilfs_inode_add_blocks(struct inode *inode, int n)
55{ 55{
56 struct nilfs_root *root = NILFS_I(inode)->i_root; 56 struct nilfs_root *root = NILFS_I(inode)->i_root;
57 57
58 inode_add_bytes(inode, (1 << inode->i_blkbits) * n); 58 inode_add_bytes(inode, i_blocksize(inode) * n);
59 if (root) 59 if (root)
60 atomic64_add(n, &root->blocks_count); 60 atomic64_add(n, &root->blocks_count);
61} 61}
@@ -64,7 +64,7 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n)
64{ 64{
65 struct nilfs_root *root = NILFS_I(inode)->i_root; 65 struct nilfs_root *root = NILFS_I(inode)->i_root;
66 66
67 inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); 67 inode_sub_bytes(inode, i_blocksize(inode) * n);
68 if (root) 68 if (root)
69 atomic64_sub(n, &root->blocks_count); 69 atomic64_sub(n, &root->blocks_count);
70} 70}
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 1125f40233ff..612a2457243d 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -60,7 +60,7 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
60 set_buffer_mapped(bh); 60 set_buffer_mapped(bh);
61 61
62 kaddr = kmap_atomic(bh->b_page); 62 kaddr = kmap_atomic(bh->b_page);
63 memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits); 63 memset(kaddr + bh_offset(bh), 0, i_blocksize(inode));
64 if (init_block) 64 if (init_block)
65 init_block(inode, bh, kaddr); 65 init_block(inode, bh, kaddr);
66 flush_dcache_page(bh->b_page); 66 flush_dcache_page(bh->b_page);
@@ -503,7 +503,7 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size,
503 struct nilfs_mdt_info *mi = NILFS_MDT(inode); 503 struct nilfs_mdt_info *mi = NILFS_MDT(inode);
504 504
505 mi->mi_entry_size = entry_size; 505 mi->mi_entry_size = entry_size;
506 mi->mi_entries_per_block = (1 << inode->i_blkbits) / entry_size; 506 mi->mi_entries_per_block = i_blocksize(inode) / entry_size;
507 mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); 507 mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
508} 508}
509 509
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 3b65adaae7e4..2f27c935bd57 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -719,7 +719,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
719 719
720 lock_page(page); 720 lock_page(page);
721 if (!page_has_buffers(page)) 721 if (!page_has_buffers(page))
722 create_empty_buffers(page, 1 << inode->i_blkbits, 0); 722 create_empty_buffers(page, i_blocksize(inode), 0);
723 unlock_page(page); 723 unlock_page(page);
724 724
725 bh = head = page_buffers(page); 725 bh = head = page_buffers(page);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index a64313868d3a..2958e7a81f9c 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -488,7 +488,7 @@ static int fanotify_find_path(int dfd, const char __user *filename,
488 } 488 }
489 489
490 /* you can only watch an inode if you have read permissions on it */ 490 /* you can only watch an inode if you have read permissions on it */
491 ret = inode_permission(path->dentry->d_inode, MAY_READ); 491 ret = inode_permission2(path->mnt, path->dentry->d_inode, MAY_READ);
492 if (ret) 492 if (ret)
493 path_put(path); 493 path_put(path);
494out: 494out:
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index db39de2dd4cb..a64adc2fced9 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -104,16 +104,20 @@ int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
104 if (unlikely(!fsnotify_inode_watches_children(p_inode))) 104 if (unlikely(!fsnotify_inode_watches_children(p_inode)))
105 __fsnotify_update_child_dentry_flags(p_inode); 105 __fsnotify_update_child_dentry_flags(p_inode);
106 else if (p_inode->i_fsnotify_mask & mask) { 106 else if (p_inode->i_fsnotify_mask & mask) {
107 struct name_snapshot name;
108
107 /* we are notifying a parent so come up with the new mask which 109 /* we are notifying a parent so come up with the new mask which
108 * specifies these are events which came from a child. */ 110 * specifies these are events which came from a child. */
109 mask |= FS_EVENT_ON_CHILD; 111 mask |= FS_EVENT_ON_CHILD;
110 112
113 take_dentry_name_snapshot(&name, dentry);
111 if (path) 114 if (path)
112 ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH, 115 ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
113 dentry->d_name.name, 0); 116 name.name, 0);
114 else 117 else
115 ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, 118 ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
116 dentry->d_name.name, 0); 119 name.name, 0);
120 release_dentry_name_snapshot(&name);
117 } 121 }
118 122
119 dput(parent); 123 dput(parent);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index e2893f17dde2..4c5b43d15e6e 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -337,7 +337,7 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
337 if (error) 337 if (error)
338 return error; 338 return error;
339 /* you can only watch an inode if you have read permissions on it */ 339 /* you can only watch an inode if you have read permissions on it */
340 error = inode_permission(path->dentry->d_inode, MAY_READ); 340 error = inode_permission2(path->mnt, path->dentry->d_inode, MAY_READ);
341 if (error) 341 if (error)
342 path_put(path); 342 path_put(path);
343 return error; 343 return error;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index e6795c7c76a8..e4184bd2a954 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1103,7 +1103,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
1103 int ret = 0; 1103 int ret = 0;
1104 struct buffer_head *head, *bh, *wait[2], **wait_bh = wait; 1104 struct buffer_head *head, *bh, *wait[2], **wait_bh = wait;
1105 unsigned int block_end, block_start; 1105 unsigned int block_end, block_start;
1106 unsigned int bsize = 1 << inode->i_blkbits; 1106 unsigned int bsize = i_blocksize(inode);
1107 1107
1108 if (!page_has_buffers(page)) 1108 if (!page_has_buffers(page))
1109 create_empty_buffers(page, bsize, 0); 1109 create_empty_buffers(page, bsize, 0);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 709fbbd44c65..acebc350e98d 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -2070,13 +2070,13 @@ unlock:
2070 spin_unlock(&o2hb_live_lock); 2070 spin_unlock(&o2hb_live_lock);
2071} 2071}
2072 2072
2073static ssize_t o2hb_heartbeat_group_threshold_show(struct config_item *item, 2073static ssize_t o2hb_heartbeat_group_dead_threshold_show(struct config_item *item,
2074 char *page) 2074 char *page)
2075{ 2075{
2076 return sprintf(page, "%u\n", o2hb_dead_threshold); 2076 return sprintf(page, "%u\n", o2hb_dead_threshold);
2077} 2077}
2078 2078
2079static ssize_t o2hb_heartbeat_group_threshold_store(struct config_item *item, 2079static ssize_t o2hb_heartbeat_group_dead_threshold_store(struct config_item *item,
2080 const char *page, size_t count) 2080 const char *page, size_t count)
2081{ 2081{
2082 unsigned long tmp; 2082 unsigned long tmp;
@@ -2125,11 +2125,11 @@ static ssize_t o2hb_heartbeat_group_mode_store(struct config_item *item,
2125 2125
2126} 2126}
2127 2127
2128CONFIGFS_ATTR(o2hb_heartbeat_group_, threshold); 2128CONFIGFS_ATTR(o2hb_heartbeat_group_, dead_threshold);
2129CONFIGFS_ATTR(o2hb_heartbeat_group_, mode); 2129CONFIGFS_ATTR(o2hb_heartbeat_group_, mode);
2130 2130
2131static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { 2131static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = {
2132 &o2hb_heartbeat_group_attr_threshold, 2132 &o2hb_heartbeat_group_attr_dead_threshold,
2133 &o2hb_heartbeat_group_attr_mode, 2133 &o2hb_heartbeat_group_attr_mode,
2134 NULL, 2134 NULL,
2135}; 2135};
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 56dd3957cc91..1d738723a41a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -808,7 +808,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
808 /* We know that zero_from is block aligned */ 808 /* We know that zero_from is block aligned */
809 for (block_start = zero_from; block_start < zero_to; 809 for (block_start = zero_from; block_start < zero_to;
810 block_start = block_end) { 810 block_start = block_end) {
811 block_end = block_start + (1 << inode->i_blkbits); 811 block_end = block_start + i_blocksize(inode);
812 812
813 /* 813 /*
814 * block_start is block-aligned. Bump it by one to force 814 * block_start is block-aligned. Bump it by one to force
diff --git a/fs/open.c b/fs/open.c
index 157b9940dd73..1fd96c5d3895 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -34,8 +34,8 @@
34 34
35#include "internal.h" 35#include "internal.h"
36 36
37int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, 37int do_truncate2(struct vfsmount *mnt, struct dentry *dentry, loff_t length,
38 struct file *filp) 38 unsigned int time_attrs, struct file *filp)
39{ 39{
40 int ret; 40 int ret;
41 struct iattr newattrs; 41 struct iattr newattrs;
@@ -60,17 +60,24 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
60 60
61 mutex_lock(&dentry->d_inode->i_mutex); 61 mutex_lock(&dentry->d_inode->i_mutex);
62 /* Note any delegations or leases have already been broken: */ 62 /* Note any delegations or leases have already been broken: */
63 ret = notify_change(dentry, &newattrs, NULL); 63 ret = notify_change2(mnt, dentry, &newattrs, NULL);
64 mutex_unlock(&dentry->d_inode->i_mutex); 64 mutex_unlock(&dentry->d_inode->i_mutex);
65 return ret; 65 return ret;
66} 66}
67int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
68 struct file *filp)
69{
70 return do_truncate2(NULL, dentry, length, time_attrs, filp);
71}
67 72
68long vfs_truncate(struct path *path, loff_t length) 73long vfs_truncate(struct path *path, loff_t length)
69{ 74{
70 struct inode *inode; 75 struct inode *inode;
76 struct vfsmount *mnt;
71 long error; 77 long error;
72 78
73 inode = path->dentry->d_inode; 79 inode = path->dentry->d_inode;
80 mnt = path->mnt;
74 81
75 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ 82 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
76 if (S_ISDIR(inode->i_mode)) 83 if (S_ISDIR(inode->i_mode))
@@ -82,7 +89,7 @@ long vfs_truncate(struct path *path, loff_t length)
82 if (error) 89 if (error)
83 goto out; 90 goto out;
84 91
85 error = inode_permission(inode, MAY_WRITE); 92 error = inode_permission2(mnt, inode, MAY_WRITE);
86 if (error) 93 if (error)
87 goto mnt_drop_write_and_out; 94 goto mnt_drop_write_and_out;
88 95
@@ -106,7 +113,7 @@ long vfs_truncate(struct path *path, loff_t length)
106 if (!error) 113 if (!error)
107 error = security_path_truncate(path); 114 error = security_path_truncate(path);
108 if (!error) 115 if (!error)
109 error = do_truncate(path->dentry, length, 0, NULL); 116 error = do_truncate2(mnt, path->dentry, length, 0, NULL);
110 117
111put_write_and_out: 118put_write_and_out:
112 put_write_access(inode); 119 put_write_access(inode);
@@ -155,6 +162,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
155{ 162{
156 struct inode *inode; 163 struct inode *inode;
157 struct dentry *dentry; 164 struct dentry *dentry;
165 struct vfsmount *mnt;
158 struct fd f; 166 struct fd f;
159 int error; 167 int error;
160 168
@@ -171,6 +179,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
171 small = 0; 179 small = 0;
172 180
173 dentry = f.file->f_path.dentry; 181 dentry = f.file->f_path.dentry;
182 mnt = f.file->f_path.mnt;
174 inode = dentry->d_inode; 183 inode = dentry->d_inode;
175 error = -EINVAL; 184 error = -EINVAL;
176 if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE)) 185 if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
@@ -190,7 +199,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
190 if (!error) 199 if (!error)
191 error = security_path_truncate(&f.file->f_path); 200 error = security_path_truncate(&f.file->f_path);
192 if (!error) 201 if (!error)
193 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); 202 error = do_truncate2(mnt, dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
194 sb_end_write(inode->i_sb); 203 sb_end_write(inode->i_sb);
195out_putf: 204out_putf:
196 fdput(f); 205 fdput(f);
@@ -340,6 +349,7 @@ SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
340 struct cred *override_cred; 349 struct cred *override_cred;
341 struct path path; 350 struct path path;
342 struct inode *inode; 351 struct inode *inode;
352 struct vfsmount *mnt;
343 int res; 353 int res;
344 unsigned int lookup_flags = LOOKUP_FOLLOW; 354 unsigned int lookup_flags = LOOKUP_FOLLOW;
345 355
@@ -370,6 +380,7 @@ retry:
370 goto out; 380 goto out;
371 381
372 inode = d_backing_inode(path.dentry); 382 inode = d_backing_inode(path.dentry);
383 mnt = path.mnt;
373 384
374 if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) { 385 if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
375 /* 386 /*
@@ -381,7 +392,7 @@ retry:
381 goto out_path_release; 392 goto out_path_release;
382 } 393 }
383 394
384 res = inode_permission(inode, mode | MAY_ACCESS); 395 res = inode_permission2(mnt, inode, mode | MAY_ACCESS);
385 /* SuS v2 requires we report a read only fs too */ 396 /* SuS v2 requires we report a read only fs too */
386 if (res || !(mode & S_IWOTH) || special_file(inode->i_mode)) 397 if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
387 goto out_path_release; 398 goto out_path_release;
@@ -425,7 +436,7 @@ retry:
425 if (error) 436 if (error)
426 goto out; 437 goto out;
427 438
428 error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); 439 error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
429 if (error) 440 if (error)
430 goto dput_and_out; 441 goto dput_and_out;
431 442
@@ -445,6 +456,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
445{ 456{
446 struct fd f = fdget_raw(fd); 457 struct fd f = fdget_raw(fd);
447 struct inode *inode; 458 struct inode *inode;
459 struct vfsmount *mnt;
448 int error = -EBADF; 460 int error = -EBADF;
449 461
450 error = -EBADF; 462 error = -EBADF;
@@ -452,12 +464,13 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
452 goto out; 464 goto out;
453 465
454 inode = file_inode(f.file); 466 inode = file_inode(f.file);
467 mnt = f.file->f_path.mnt;
455 468
456 error = -ENOTDIR; 469 error = -ENOTDIR;
457 if (!S_ISDIR(inode->i_mode)) 470 if (!S_ISDIR(inode->i_mode))
458 goto out_putf; 471 goto out_putf;
459 472
460 error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); 473 error = inode_permission2(mnt, inode, MAY_EXEC | MAY_CHDIR);
461 if (!error) 474 if (!error)
462 set_fs_pwd(current->fs, &f.file->f_path); 475 set_fs_pwd(current->fs, &f.file->f_path);
463out_putf: 476out_putf:
@@ -476,7 +489,7 @@ retry:
476 if (error) 489 if (error)
477 goto out; 490 goto out;
478 491
479 error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); 492 error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
480 if (error) 493 if (error)
481 goto dput_and_out; 494 goto dput_and_out;
482 495
@@ -516,7 +529,7 @@ retry_deleg:
516 goto out_unlock; 529 goto out_unlock;
517 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 530 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
518 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 531 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
519 error = notify_change(path->dentry, &newattrs, &delegated_inode); 532 error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode);
520out_unlock: 533out_unlock:
521 mutex_unlock(&inode->i_mutex); 534 mutex_unlock(&inode->i_mutex);
522 if (delegated_inode) { 535 if (delegated_inode) {
@@ -596,7 +609,7 @@ retry_deleg:
596 mutex_lock(&inode->i_mutex); 609 mutex_lock(&inode->i_mutex);
597 error = security_path_chown(path, uid, gid); 610 error = security_path_chown(path, uid, gid);
598 if (!error) 611 if (!error)
599 error = notify_change(path->dentry, &newattrs, &delegated_inode); 612 error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode);
600 mutex_unlock(&inode->i_mutex); 613 mutex_unlock(&inode->i_mutex);
601 if (delegated_inode) { 614 if (delegated_inode) {
602 error = break_deleg_wait(&delegated_inode); 615 error = break_deleg_wait(&delegated_inode);
@@ -885,6 +898,12 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
885 int lookup_flags = 0; 898 int lookup_flags = 0;
886 int acc_mode; 899 int acc_mode;
887 900
901 /*
902 * Clear out all open flags we don't know about so that we don't report
903 * them in fcntl(F_GETFD) or similar interfaces.
904 */
905 flags &= VALID_OPEN_FLAGS;
906
888 if (flags & (O_CREAT | __O_TMPFILE)) 907 if (flags & (O_CREAT | __O_TMPFILE))
889 op->mode = (mode & S_IALLUGO) | S_IFREG; 908 op->mode = (mode & S_IALLUGO) | S_IFREG;
890 else 909 else
diff --git a/fs/pnode.c b/fs/pnode.c
index 99899705b105..1cafb8c2bdb0 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -24,6 +24,11 @@ static inline struct mount *first_slave(struct mount *p)
24 return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave); 24 return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
25} 25}
26 26
27static inline struct mount *last_slave(struct mount *p)
28{
29 return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
30}
31
27static inline struct mount *next_slave(struct mount *p) 32static inline struct mount *next_slave(struct mount *p)
28{ 33{
29 return list_entry(p->mnt_slave.next, struct mount, mnt_slave); 34 return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
@@ -164,6 +169,19 @@ static struct mount *propagation_next(struct mount *m,
164 } 169 }
165} 170}
166 171
172static struct mount *skip_propagation_subtree(struct mount *m,
173 struct mount *origin)
174{
175 /*
176 * Advance m such that propagation_next will not return
177 * the slaves of m.
178 */
179 if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
180 m = last_slave(m);
181
182 return m;
183}
184
167static struct mount *next_group(struct mount *m, struct mount *origin) 185static struct mount *next_group(struct mount *m, struct mount *origin)
168{ 186{
169 while (1) { 187 while (1) {
@@ -259,7 +277,7 @@ static int propagate_one(struct mount *m)
259 read_sequnlock_excl(&mount_lock); 277 read_sequnlock_excl(&mount_lock);
260 } 278 }
261 hlist_add_head(&child->mnt_hash, list); 279 hlist_add_head(&child->mnt_hash, list);
262 return 0; 280 return count_mounts(m->mnt_ns, child);
263} 281}
264 282
265/* 283/*
@@ -324,6 +342,21 @@ out:
324 return ret; 342 return ret;
325} 343}
326 344
345static struct mount *find_topper(struct mount *mnt)
346{
347 /* If there is exactly one mount covering mnt completely return it. */
348 struct mount *child;
349
350 if (!list_is_singular(&mnt->mnt_mounts))
351 return NULL;
352
353 child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
354 if (child->mnt_mountpoint != mnt->mnt.mnt_root)
355 return NULL;
356
357 return child;
358}
359
327/* 360/*
328 * return true if the refcount is greater than count 361 * return true if the refcount is greater than count
329 */ 362 */
@@ -344,9 +377,8 @@ static inline int do_refcount_check(struct mount *mnt, int count)
344 */ 377 */
345int propagate_mount_busy(struct mount *mnt, int refcnt) 378int propagate_mount_busy(struct mount *mnt, int refcnt)
346{ 379{
347 struct mount *m, *child; 380 struct mount *m, *child, *topper;
348 struct mount *parent = mnt->mnt_parent; 381 struct mount *parent = mnt->mnt_parent;
349 int ret = 0;
350 382
351 if (mnt == parent) 383 if (mnt == parent)
352 return do_refcount_check(mnt, refcnt); 384 return do_refcount_check(mnt, refcnt);
@@ -361,12 +393,24 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
361 393
362 for (m = propagation_next(parent, parent); m; 394 for (m = propagation_next(parent, parent); m;
363 m = propagation_next(m, parent)) { 395 m = propagation_next(m, parent)) {
364 child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); 396 int count = 1;
365 if (child && list_empty(&child->mnt_mounts) && 397 child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
366 (ret = do_refcount_check(child, 1))) 398 if (!child)
367 break; 399 continue;
400
401 /* Is there exactly one mount on the child that covers
402 * it completely whose reference should be ignored?
403 */
404 topper = find_topper(child);
405 if (topper)
406 count += 1;
407 else if (!list_empty(&child->mnt_mounts))
408 continue;
409
410 if (do_refcount_check(child, count))
411 return 1;
368 } 412 }
369 return ret; 413 return 0;
370} 414}
371 415
372/* 416/*
@@ -383,63 +427,113 @@ void propagate_mount_unlock(struct mount *mnt)
383 427
384 for (m = propagation_next(parent, parent); m; 428 for (m = propagation_next(parent, parent); m;
385 m = propagation_next(m, parent)) { 429 m = propagation_next(m, parent)) {
386 child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); 430 child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
387 if (child) 431 if (child)
388 child->mnt.mnt_flags &= ~MNT_LOCKED; 432 child->mnt.mnt_flags &= ~MNT_LOCKED;
389 } 433 }
390} 434}
391 435
392/* 436static void umount_one(struct mount *mnt, struct list_head *to_umount)
393 * Mark all mounts that the MNT_LOCKED logic will allow to be unmounted.
394 */
395static void mark_umount_candidates(struct mount *mnt)
396{ 437{
397 struct mount *parent = mnt->mnt_parent; 438 CLEAR_MNT_MARK(mnt);
398 struct mount *m; 439 mnt->mnt.mnt_flags |= MNT_UMOUNT;
399 440 list_del_init(&mnt->mnt_child);
400 BUG_ON(parent == mnt); 441 list_del_init(&mnt->mnt_umounting);
401 442 list_move_tail(&mnt->mnt_list, to_umount);
402 for (m = propagation_next(parent, parent); m;
403 m = propagation_next(m, parent)) {
404 struct mount *child = __lookup_mnt_last(&m->mnt,
405 mnt->mnt_mountpoint);
406 if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) {
407 SET_MNT_MARK(child);
408 }
409 }
410} 443}
411 444
412/* 445/*
413 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its 446 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
414 * parent propagates to. 447 * parent propagates to.
415 */ 448 */
416static void __propagate_umount(struct mount *mnt) 449static bool __propagate_umount(struct mount *mnt,
450 struct list_head *to_umount,
451 struct list_head *to_restore)
417{ 452{
418 struct mount *parent = mnt->mnt_parent; 453 bool progress = false;
419 struct mount *m; 454 struct mount *child;
420
421 BUG_ON(parent == mnt);
422 455
423 for (m = propagation_next(parent, parent); m; 456 /*
424 m = propagation_next(m, parent)) { 457 * The state of the parent won't change if this mount is
458 * already unmounted or marked as without children.
459 */
460 if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED))
461 goto out;
425 462
426 struct mount *child = __lookup_mnt_last(&m->mnt, 463 /* Verify topper is the only grandchild that has not been
427 mnt->mnt_mountpoint); 464 * speculatively unmounted.
428 /* 465 */
429 * umount the child only if the child has no children 466 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
430 * and the child is marked safe to unmount. 467 if (child->mnt_mountpoint == mnt->mnt.mnt_root)
431 */ 468 continue;
432 if (!child || !IS_MNT_MARKED(child)) 469 if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child))
433 continue; 470 continue;
434 CLEAR_MNT_MARK(child); 471 /* Found a mounted child */
435 if (list_empty(&child->mnt_mounts)) { 472 goto children;
436 list_del_init(&child->mnt_child); 473 }
437 child->mnt.mnt_flags |= MNT_UMOUNT; 474
438 list_move_tail(&child->mnt_list, &mnt->mnt_list); 475 /* Mark mounts that can be unmounted if not locked */
476 SET_MNT_MARK(mnt);
477 progress = true;
478
479 /* If a mount is without children and not locked umount it. */
480 if (!IS_MNT_LOCKED(mnt)) {
481 umount_one(mnt, to_umount);
482 } else {
483children:
484 list_move_tail(&mnt->mnt_umounting, to_restore);
485 }
486out:
487 return progress;
488}
489
490static void umount_list(struct list_head *to_umount,
491 struct list_head *to_restore)
492{
493 struct mount *mnt, *child, *tmp;
494 list_for_each_entry(mnt, to_umount, mnt_list) {
495 list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) {
496 /* topper? */
497 if (child->mnt_mountpoint == mnt->mnt.mnt_root)
498 list_move_tail(&child->mnt_umounting, to_restore);
499 else
500 umount_one(child, to_umount);
439 } 501 }
440 } 502 }
441} 503}
442 504
505static void restore_mounts(struct list_head *to_restore)
506{
507 /* Restore mounts to a clean working state */
508 while (!list_empty(to_restore)) {
509 struct mount *mnt, *parent;
510 struct mountpoint *mp;
511
512 mnt = list_first_entry(to_restore, struct mount, mnt_umounting);
513 CLEAR_MNT_MARK(mnt);
514 list_del_init(&mnt->mnt_umounting);
515
516 /* Should this mount be reparented? */
517 mp = mnt->mnt_mp;
518 parent = mnt->mnt_parent;
519 while (parent->mnt.mnt_flags & MNT_UMOUNT) {
520 mp = parent->mnt_mp;
521 parent = parent->mnt_parent;
522 }
523 if (parent != mnt->mnt_parent)
524 mnt_change_mountpoint(parent, mp, mnt);
525 }
526}
527
528static void cleanup_umount_visitations(struct list_head *visited)
529{
530 while (!list_empty(visited)) {
531 struct mount *mnt =
532 list_first_entry(visited, struct mount, mnt_umounting);
533 list_del_init(&mnt->mnt_umounting);
534 }
535}
536
443/* 537/*
444 * collect all mounts that receive propagation from the mount in @list, 538 * collect all mounts that receive propagation from the mount in @list,
445 * and return these additional mounts in the same list. 539 * and return these additional mounts in the same list.
@@ -450,11 +544,97 @@ static void __propagate_umount(struct mount *mnt)
450int propagate_umount(struct list_head *list) 544int propagate_umount(struct list_head *list)
451{ 545{
452 struct mount *mnt; 546 struct mount *mnt;
547 LIST_HEAD(to_restore);
548 LIST_HEAD(to_umount);
549 LIST_HEAD(visited);
453 550
454 list_for_each_entry_reverse(mnt, list, mnt_list) 551 /* Find candidates for unmounting */
455 mark_umount_candidates(mnt); 552 list_for_each_entry_reverse(mnt, list, mnt_list) {
553 struct mount *parent = mnt->mnt_parent;
554 struct mount *m;
555
556 /*
557 * If this mount has already been visited it is known that it's
558 * entire peer group and all of their slaves in the propagation
559 * tree for the mountpoint has already been visited and there is
560 * no need to visit them again.
561 */
562 if (!list_empty(&mnt->mnt_umounting))
563 continue;
564
565 list_add_tail(&mnt->mnt_umounting, &visited);
566 for (m = propagation_next(parent, parent); m;
567 m = propagation_next(m, parent)) {
568 struct mount *child = __lookup_mnt(&m->mnt,
569 mnt->mnt_mountpoint);
570 if (!child)
571 continue;
572
573 if (!list_empty(&child->mnt_umounting)) {
574 /*
575 * If the child has already been visited it is
576 * know that it's entire peer group and all of
577 * their slaves in the propgation tree for the
578 * mountpoint has already been visited and there
579 * is no need to visit this subtree again.
580 */
581 m = skip_propagation_subtree(m, parent);
582 continue;
583 } else if (child->mnt.mnt_flags & MNT_UMOUNT) {
584 /*
585 * We have come accross an partially unmounted
586 * mount in list that has not been visited yet.
587 * Remember it has been visited and continue
588 * about our merry way.
589 */
590 list_add_tail(&child->mnt_umounting, &visited);
591 continue;
592 }
593
594 /* Check the child and parents while progress is made */
595 while (__propagate_umount(child,
596 &to_umount, &to_restore)) {
597 /* Is the parent a umount candidate? */
598 child = child->mnt_parent;
599 if (list_empty(&child->mnt_umounting))
600 break;
601 }
602 }
603 }
604
605 umount_list(&to_umount, &to_restore);
606 restore_mounts(&to_restore);
607 cleanup_umount_visitations(&visited);
608 list_splice_tail(&to_umount, list);
456 609
457 list_for_each_entry(mnt, list, mnt_list)
458 __propagate_umount(mnt);
459 return 0; 610 return 0;
460} 611}
612
613/*
614 * Iterates over all slaves, and slaves of slaves.
615 */
616static struct mount *next_descendent(struct mount *root, struct mount *cur)
617{
618 if (!IS_MNT_NEW(cur) && !list_empty(&cur->mnt_slave_list))
619 return first_slave(cur);
620 do {
621 if (cur->mnt_slave.next != &cur->mnt_master->mnt_slave_list)
622 return next_slave(cur);
623 cur = cur->mnt_master;
624 } while (cur != root);
625 return NULL;
626}
627
628void propagate_remount(struct mount *mnt)
629{
630 struct mount *m = mnt;
631 struct super_block *sb = mnt->mnt.mnt_sb;
632
633 if (sb->s_op->copy_mnt_data) {
634 m = next_descendent(mnt, m);
635 while (m) {
636 sb->s_op->copy_mnt_data(m->mnt.data, mnt->mnt.data);
637 m = next_descendent(mnt, m);
638 }
639 }
640}
diff --git a/fs/pnode.h b/fs/pnode.h
index 0fcdbe7ca648..a9a6576540ad 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -44,12 +44,16 @@ int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
44int propagate_umount(struct list_head *); 44int propagate_umount(struct list_head *);
45int propagate_mount_busy(struct mount *, int); 45int propagate_mount_busy(struct mount *, int);
46void propagate_mount_unlock(struct mount *); 46void propagate_mount_unlock(struct mount *);
47void propagate_remount(struct mount *);
47void mnt_release_group_id(struct mount *); 48void mnt_release_group_id(struct mount *);
48int get_dominating_id(struct mount *mnt, const struct path *root); 49int get_dominating_id(struct mount *mnt, const struct path *root);
49unsigned int mnt_get_count(struct mount *mnt); 50unsigned int mnt_get_count(struct mount *mnt);
50void mnt_set_mountpoint(struct mount *, struct mountpoint *, 51void mnt_set_mountpoint(struct mount *, struct mountpoint *,
51 struct mount *); 52 struct mount *);
53void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp,
54 struct mount *mnt);
52struct mount *copy_tree(struct mount *, struct dentry *, int); 55struct mount *copy_tree(struct mount *, struct dentry *, int);
53bool is_path_reachable(struct mount *, struct dentry *, 56bool is_path_reachable(struct mount *, struct dentry *,
54 const struct path *root); 57 const struct path *root);
58int count_mounts(struct mnt_namespace *ns, struct mount *mnt);
55#endif /* _LINUX_PNODE_H */ 59#endif /* _LINUX_PNODE_H */
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 0c9ea52ab399..deafb880368b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3145,6 +3145,8 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
3145 iter.tgid += 1, iter = next_tgid(ns, iter)) { 3145 iter.tgid += 1, iter = next_tgid(ns, iter)) {
3146 char name[PROC_NUMBUF]; 3146 char name[PROC_NUMBUF];
3147 int len; 3147 int len;
3148
3149 cond_resched();
3148 if (!has_pid_permissions(ns, iter.task, 2)) 3150 if (!has_pid_permissions(ns, iter.task, 2))
3149 continue; 3151 continue;
3150 3152
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index ff3ffc76a937..3773335791da 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -469,6 +469,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name)
469 ent->data = NULL; 469 ent->data = NULL;
470 ent->proc_fops = NULL; 470 ent->proc_fops = NULL;
471 ent->proc_iops = NULL; 471 ent->proc_iops = NULL;
472 parent->nlink++;
472 if (proc_register(parent, ent) < 0) { 473 if (proc_register(parent, ent) < 0) {
473 kfree(ent); 474 kfree(ent);
474 parent->nlink--; 475 parent->nlink--;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 200e3b29aa22..38be952b2fe7 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -345,11 +345,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
345 345
346 /* We don't show the stack guard page in /proc/maps */ 346 /* We don't show the stack guard page in /proc/maps */
347 start = vma->vm_start; 347 start = vma->vm_start;
348 if (stack_guard_page_start(vma, start))
349 start += PAGE_SIZE;
350 end = vma->vm_end; 348 end = vma->vm_end;
351 if (stack_guard_page_end(vma, end))
352 end -= PAGE_SIZE;
353 349
354 seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); 350 seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
355 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", 351 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
@@ -865,7 +861,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
865static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, 861static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
866 unsigned long addr, pmd_t *pmdp) 862 unsigned long addr, pmd_t *pmdp)
867{ 863{
868 pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp); 864 pmd_t pmd = *pmdp;
865
866 /* See comment in change_huge_pmd() */
867 pmdp_invalidate(vma, addr, pmdp);
868 if (pmd_dirty(*pmdp))
869 pmd = pmd_mkdirty(pmd);
870 if (pmd_young(*pmdp))
871 pmd = pmd_mkyoung(pmd);
869 872
870 pmd = pmd_wrprotect(pmd); 873 pmd = pmd_wrprotect(pmd);
871 pmd = pmd_clear_soft_dirty(pmd); 874 pmd = pmd_clear_soft_dirty(pmd);
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 87645955990d..961e597acfc6 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -118,7 +118,9 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
118 if (err) 118 if (err)
119 goto out; 119 goto out;
120 show_mnt_opts(m, mnt); 120 show_mnt_opts(m, mnt);
121 if (sb->s_op->show_options) 121 if (sb->s_op->show_options2)
122 err = sb->s_op->show_options2(mnt, m, mnt_path.dentry);
123 else if (sb->s_op->show_options)
122 err = sb->s_op->show_options(m, mnt_path.dentry); 124 err = sb->s_op->show_options(m, mnt_path.dentry);
123 seq_puts(m, " 0 0\n"); 125 seq_puts(m, " 0 0\n");
124out: 126out:
@@ -178,7 +180,9 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
178 err = show_sb_opts(m, sb); 180 err = show_sb_opts(m, sb);
179 if (err) 181 if (err)
180 goto out; 182 goto out;
181 if (sb->s_op->show_options) 183 if (sb->s_op->show_options2) {
184 err = sb->s_op->show_options2(mnt, m, mnt->mnt_root);
185 } else if (sb->s_op->show_options)
182 err = sb->s_op->show_options(m, mnt->mnt_root); 186 err = sb->s_op->show_options(m, mnt->mnt_root);
183 seq_putc(m, '\n'); 187 seq_putc(m, '\n');
184out: 188out:
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 8d1e5e2db6a1..c9e4bc47c79d 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -434,7 +434,7 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
434 for (i = 0; i < cxt->max_dump_cnt; i++) { 434 for (i = 0; i < cxt->max_dump_cnt; i++) {
435 cxt->przs[i] = persistent_ram_new(*paddr, cxt->record_size, 0, 435 cxt->przs[i] = persistent_ram_new(*paddr, cxt->record_size, 0,
436 &cxt->ecc_info, 436 &cxt->ecc_info,
437 cxt->memtype); 437 cxt->memtype, 0);
438 if (IS_ERR(cxt->przs[i])) { 438 if (IS_ERR(cxt->przs[i])) {
439 err = PTR_ERR(cxt->przs[i]); 439 err = PTR_ERR(cxt->przs[i]);
440 dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", 440 dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n",
@@ -471,7 +471,8 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
471 return -ENOMEM; 471 return -ENOMEM;
472 } 472 }
473 473
474 *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, cxt->memtype); 474 *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info,
475 cxt->memtype, 0);
475 if (IS_ERR(*prz)) { 476 if (IS_ERR(*prz)) {
476 int err = PTR_ERR(*prz); 477 int err = PTR_ERR(*prz);
477 478
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 3975deec02f8..e11672aa4575 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -48,16 +48,15 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz)
48 return atomic_read(&prz->buffer->start); 48 return atomic_read(&prz->buffer->start);
49} 49}
50 50
51static DEFINE_RAW_SPINLOCK(buffer_lock);
52
53/* increase and wrap the start pointer, returning the old value */ 51/* increase and wrap the start pointer, returning the old value */
54static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a) 52static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
55{ 53{
56 int old; 54 int old;
57 int new; 55 int new;
58 unsigned long flags; 56 unsigned long flags = 0;
59 57
60 raw_spin_lock_irqsave(&buffer_lock, flags); 58 if (!(prz->flags & PRZ_FLAG_NO_LOCK))
59 raw_spin_lock_irqsave(&prz->buffer_lock, flags);
61 60
62 old = atomic_read(&prz->buffer->start); 61 old = atomic_read(&prz->buffer->start);
63 new = old + a; 62 new = old + a;
@@ -65,7 +64,8 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
65 new -= prz->buffer_size; 64 new -= prz->buffer_size;
66 atomic_set(&prz->buffer->start, new); 65 atomic_set(&prz->buffer->start, new);
67 66
68 raw_spin_unlock_irqrestore(&buffer_lock, flags); 67 if (!(prz->flags & PRZ_FLAG_NO_LOCK))
68 raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
69 69
70 return old; 70 return old;
71} 71}
@@ -75,9 +75,10 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
75{ 75{
76 size_t old; 76 size_t old;
77 size_t new; 77 size_t new;
78 unsigned long flags; 78 unsigned long flags = 0;
79 79
80 raw_spin_lock_irqsave(&buffer_lock, flags); 80 if (!(prz->flags & PRZ_FLAG_NO_LOCK))
81 raw_spin_lock_irqsave(&prz->buffer_lock, flags);
81 82
82 old = atomic_read(&prz->buffer->size); 83 old = atomic_read(&prz->buffer->size);
83 if (old == prz->buffer_size) 84 if (old == prz->buffer_size)
@@ -89,7 +90,8 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
89 atomic_set(&prz->buffer->size, new); 90 atomic_set(&prz->buffer->size, new);
90 91
91exit: 92exit:
92 raw_spin_unlock_irqrestore(&buffer_lock, flags); 93 if (!(prz->flags & PRZ_FLAG_NO_LOCK))
94 raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
93} 95}
94 96
95static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz, 97static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz,
@@ -491,6 +493,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
491 prz->buffer->sig); 493 prz->buffer->sig);
492 } 494 }
493 495
496 /* Rewind missing or invalid memory area. */
494 prz->buffer->sig = sig; 497 prz->buffer->sig = sig;
495 persistent_ram_zap(prz); 498 persistent_ram_zap(prz);
496 499
@@ -517,7 +520,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
517 520
518struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, 521struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
519 u32 sig, struct persistent_ram_ecc_info *ecc_info, 522 u32 sig, struct persistent_ram_ecc_info *ecc_info,
520 unsigned int memtype) 523 unsigned int memtype, u32 flags)
521{ 524{
522 struct persistent_ram_zone *prz; 525 struct persistent_ram_zone *prz;
523 int ret = -ENOMEM; 526 int ret = -ENOMEM;
@@ -528,6 +531,10 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
528 goto err; 531 goto err;
529 } 532 }
530 533
534 /* Initialize general buffer state. */
535 raw_spin_lock_init(&prz->buffer_lock);
536 prz->flags = flags;
537
531 ret = persistent_ram_buffer_map(start, size, prz, memtype); 538 ret = persistent_ram_buffer_map(start, size, prz, memtype);
532 if (ret) 539 if (ret)
533 goto err; 540 goto err;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 8f5ccdf81c25..38187300a2b4 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -189,7 +189,7 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
189 int ret = 0; 189 int ret = 0;
190 190
191 th.t_trans_id = 0; 191 th.t_trans_id = 0;
192 blocksize = 1 << inode->i_blkbits; 192 blocksize = i_blocksize(inode);
193 193
194 if (logit) { 194 if (logit) {
195 reiserfs_write_lock(s); 195 reiserfs_write_lock(s);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 3d8e7e671d5b..60ba35087d12 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -524,7 +524,7 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
524 * referenced in convert_tail_for_hole() that may be called from 524 * referenced in convert_tail_for_hole() that may be called from
525 * reiserfs_get_block() 525 * reiserfs_get_block()
526 */ 526 */
527 bh_result->b_size = (1 << inode->i_blkbits); 527 bh_result->b_size = i_blocksize(inode);
528 528
529 ret = reiserfs_get_block(inode, iblock, bh_result, 529 ret = reiserfs_get_block(inode, iblock, bh_result,
530 create | GET_BLOCK_NO_DANGLE); 530 create | GET_BLOCK_NO_DANGLE);
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 268733cda397..5f4f1882dc7d 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -74,6 +74,7 @@
74#include <linux/highmem.h> 74#include <linux/highmem.h>
75#include <linux/pagemap.h> 75#include <linux/pagemap.h>
76#include <linux/uaccess.h> 76#include <linux/uaccess.h>
77#include <linux/major.h>
77#include "internal.h" 78#include "internal.h"
78 79
79static struct kmem_cache *romfs_inode_cachep; 80static struct kmem_cache *romfs_inode_cachep;
@@ -415,7 +416,22 @@ static void romfs_destroy_inode(struct inode *inode)
415static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) 416static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf)
416{ 417{
417 struct super_block *sb = dentry->d_sb; 418 struct super_block *sb = dentry->d_sb;
418 u64 id = huge_encode_dev(sb->s_bdev->bd_dev); 419 u64 id = 0;
420
421 /* When calling huge_encode_dev(),
422 * use sb->s_bdev->bd_dev when,
423 * - CONFIG_ROMFS_ON_BLOCK defined
424 * use sb->s_dev when,
425 * - CONFIG_ROMFS_ON_BLOCK undefined and
426 * - CONFIG_ROMFS_ON_MTD defined
427 * leave id as 0 when,
428 * - CONFIG_ROMFS_ON_BLOCK undefined and
429 * - CONFIG_ROMFS_ON_MTD undefined
430 */
431 if (sb->s_bdev)
432 id = huge_encode_dev(sb->s_bdev->bd_dev);
433 else if (sb->s_dev)
434 id = huge_encode_dev(sb->s_dev);
419 435
420 buf->f_type = ROMFS_MAGIC; 436 buf->f_type = ROMFS_MAGIC;
421 buf->f_namelen = ROMFS_MAXFN; 437 buf->f_namelen = ROMFS_MAXFN;
@@ -488,6 +504,11 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
488 sb->s_flags |= MS_RDONLY | MS_NOATIME; 504 sb->s_flags |= MS_RDONLY | MS_NOATIME;
489 sb->s_op = &romfs_super_ops; 505 sb->s_op = &romfs_super_ops;
490 506
507#ifdef CONFIG_ROMFS_ON_MTD
508 /* Use same dev ID from the underlying mtdblock device */
509 if (sb->s_mtd)
510 sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, sb->s_mtd->index);
511#endif
491 /* read the image superblock and check it */ 512 /* read the image superblock and check it */
492 rsb = kmalloc(512, GFP_KERNEL); 513 rsb = kmalloc(512, GFP_KERNEL);
493 if (!rsb) 514 if (!rsb)
diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c
index 971928ab6c21..7a19e77fce99 100644
--- a/fs/sdcardfs/dentry.c
+++ b/fs/sdcardfs/dentry.c
@@ -46,7 +46,8 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags)
46 spin_unlock(&dentry->d_lock); 46 spin_unlock(&dentry->d_lock);
47 47
48 /* check uninitialized obb_dentry and 48 /* check uninitialized obb_dentry and
49 * whether the base obbpath has been changed or not */ 49 * whether the base obbpath has been changed or not
50 */
50 if (is_obbpath_invalid(dentry)) { 51 if (is_obbpath_invalid(dentry)) {
51 d_drop(dentry); 52 d_drop(dentry);
52 return 0; 53 return 0;
@@ -59,6 +60,14 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags)
59 lower_dentry = lower_path.dentry; 60 lower_dentry = lower_path.dentry;
60 lower_cur_parent_dentry = dget_parent(lower_dentry); 61 lower_cur_parent_dentry = dget_parent(lower_dentry);
61 62
63 if ((lower_dentry->d_flags & DCACHE_OP_REVALIDATE)) {
64 err = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
65 if (err == 0) {
66 d_drop(dentry);
67 goto out;
68 }
69 }
70
62 spin_lock(&lower_dentry->d_lock); 71 spin_lock(&lower_dentry->d_lock);
63 if (d_unhashed(lower_dentry)) { 72 if (d_unhashed(lower_dentry)) {
64 spin_unlock(&lower_dentry->d_lock); 73 spin_unlock(&lower_dentry->d_lock);
@@ -76,17 +85,13 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags)
76 85
77 if (dentry < lower_dentry) { 86 if (dentry < lower_dentry) {
78 spin_lock(&dentry->d_lock); 87 spin_lock(&dentry->d_lock);
79 spin_lock(&lower_dentry->d_lock); 88 spin_lock_nested(&lower_dentry->d_lock, DENTRY_D_LOCK_NESTED);
80 } else { 89 } else {
81 spin_lock(&lower_dentry->d_lock); 90 spin_lock(&lower_dentry->d_lock);
82 spin_lock(&dentry->d_lock); 91 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
83 } 92 }
84 93
85 if (dentry->d_name.len != lower_dentry->d_name.len) { 94 if (!qstr_case_eq(&dentry->d_name, &lower_dentry->d_name)) {
86 __d_drop(dentry);
87 err = 0;
88 } else if (strncasecmp(dentry->d_name.name, lower_dentry->d_name.name,
89 dentry->d_name.len) != 0) {
90 __d_drop(dentry); 95 __d_drop(dentry);
91 err = 0; 96 err = 0;
92 } 97 }
@@ -110,12 +115,10 @@ out:
110static void sdcardfs_d_release(struct dentry *dentry) 115static void sdcardfs_d_release(struct dentry *dentry)
111{ 116{
112 /* release and reset the lower paths */ 117 /* release and reset the lower paths */
113 if(has_graft_path(dentry)) { 118 if (has_graft_path(dentry))
114 sdcardfs_put_reset_orig_path(dentry); 119 sdcardfs_put_reset_orig_path(dentry);
115 }
116 sdcardfs_put_reset_lower_path(dentry); 120 sdcardfs_put_reset_lower_path(dentry);
117 free_dentry_private_data(dentry); 121 free_dentry_private_data(dentry);
118 return;
119} 122}
120 123
121static int sdcardfs_hash_ci(const struct dentry *dentry, 124static int sdcardfs_hash_ci(const struct dentry *dentry,
@@ -132,12 +135,10 @@ static int sdcardfs_hash_ci(const struct dentry *dentry,
132 unsigned long hash; 135 unsigned long hash;
133 136
134 name = qstr->name; 137 name = qstr->name;
135 //len = vfat_striptail_len(qstr);
136 len = qstr->len; 138 len = qstr->len;
137 139
138 hash = init_name_hash(); 140 hash = init_name_hash();
139 while (len--) 141 while (len--)
140 //hash = partial_name_hash(nls_tolower(t, *name++), hash);
141 hash = partial_name_hash(tolower(*name++), hash); 142 hash = partial_name_hash(tolower(*name++), hash);
142 qstr->hash = end_name_hash(hash); 143 qstr->hash = end_name_hash(hash);
143 144
@@ -151,35 +152,25 @@ static int sdcardfs_cmp_ci(const struct dentry *parent,
151 const struct dentry *dentry, 152 const struct dentry *dentry,
152 unsigned int len, const char *str, const struct qstr *name) 153 unsigned int len, const char *str, const struct qstr *name)
153{ 154{
154 /* This function is copy of vfat_cmpi */ 155 /* FIXME Should we support national language? */
155 // FIXME Should we support national language?
156 //struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io;
157 //unsigned int alen, blen;
158 156
159 /* A filename cannot end in '.' or we treat it like it has none */
160 /*
161 alen = vfat_striptail_len(name);
162 blen = __vfat_striptail_len(len, str);
163 if (alen == blen) {
164 if (nls_strnicmp(t, name->name, str, alen) == 0)
165 return 0;
166 }
167 */
168 if (name->len == len) { 157 if (name->len == len) {
169 if (strncasecmp(name->name, str, len) == 0) 158 if (str_n_case_eq(name->name, str, len))
170 return 0; 159 return 0;
171 } 160 }
172 return 1; 161 return 1;
173} 162}
174 163
175static void sdcardfs_canonical_path(const struct path *path, struct path *actual_path) { 164static void sdcardfs_canonical_path(const struct path *path,
165 struct path *actual_path)
166{
176 sdcardfs_get_real_lower(path->dentry, actual_path); 167 sdcardfs_get_real_lower(path->dentry, actual_path);
177} 168}
178 169
179const struct dentry_operations sdcardfs_ci_dops = { 170const struct dentry_operations sdcardfs_ci_dops = {
180 .d_revalidate = sdcardfs_d_revalidate, 171 .d_revalidate = sdcardfs_d_revalidate,
181 .d_release = sdcardfs_d_release, 172 .d_release = sdcardfs_d_release,
182 .d_hash = sdcardfs_hash_ci, 173 .d_hash = sdcardfs_hash_ci,
183 .d_compare = sdcardfs_cmp_ci, 174 .d_compare = sdcardfs_cmp_ci,
184 .d_canonical_path = sdcardfs_canonical_path, 175 .d_canonical_path = sdcardfs_canonical_path,
185}; 176};
diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c
index 41e0e11b3c35..b4595aab5713 100644
--- a/fs/sdcardfs/derived_perm.c
+++ b/fs/sdcardfs/derived_perm.c
@@ -30,11 +30,15 @@ static void inherit_derived_state(struct inode *parent, struct inode *child)
30 ci->userid = pi->userid; 30 ci->userid = pi->userid;
31 ci->d_uid = pi->d_uid; 31 ci->d_uid = pi->d_uid;
32 ci->under_android = pi->under_android; 32 ci->under_android = pi->under_android;
33 ci->under_cache = pi->under_cache;
34 ci->under_obb = pi->under_obb;
35 set_top(ci, pi->top);
33} 36}
34 37
35/* helper function for derived state */ 38/* helper function for derived state */
36void setup_derived_state(struct inode *inode, perm_t perm, 39void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid,
37 userid_t userid, uid_t uid, bool under_android) 40 uid_t uid, bool under_android,
41 struct inode *top)
38{ 42{
39 struct sdcardfs_inode_info *info = SDCARDFS_I(inode); 43 struct sdcardfs_inode_info *info = SDCARDFS_I(inode);
40 44
@@ -42,84 +46,276 @@ void setup_derived_state(struct inode *inode, perm_t perm,
42 info->userid = userid; 46 info->userid = userid;
43 info->d_uid = uid; 47 info->d_uid = uid;
44 info->under_android = under_android; 48 info->under_android = under_android;
49 info->under_cache = false;
50 info->under_obb = false;
51 set_top(info, top);
45} 52}
46 53
47/* While renaming, there is a point where we want the path from dentry, but the name from newdentry */ 54/* While renaming, there is a point where we want the path from dentry,
48void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, struct dentry *newdentry) 55 * but the name from newdentry
56 */
57void get_derived_permission_new(struct dentry *parent, struct dentry *dentry,
58 const struct qstr *name)
49{ 59{
50 struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); 60 struct sdcardfs_inode_info *info = SDCARDFS_I(d_inode(dentry));
51 struct sdcardfs_inode_info *info = SDCARDFS_I(dentry->d_inode); 61 struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent));
52 struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode);
53 appid_t appid; 62 appid_t appid;
63 unsigned long user_num;
64 int err;
65 struct qstr q_Android = QSTR_LITERAL("Android");
66 struct qstr q_data = QSTR_LITERAL("data");
67 struct qstr q_obb = QSTR_LITERAL("obb");
68 struct qstr q_media = QSTR_LITERAL("media");
69 struct qstr q_cache = QSTR_LITERAL("cache");
54 70
55 /* By default, each inode inherits from its parent. 71 /* By default, each inode inherits from its parent.
56 * the properties are maintained on its private fields 72 * the properties are maintained on its private fields
57 * because the inode attributes will be modified with that of 73 * because the inode attributes will be modified with that of
58 * its lower inode. 74 * its lower inode.
59 * The derived state will be updated on the last 75 * These values are used by our custom permission call instead
60 * stage of each system call by fix_derived_permission(inode). 76 * of using the inode permissions.
61 */ 77 */
62 78
63 inherit_derived_state(parent->d_inode, dentry->d_inode); 79 inherit_derived_state(d_inode(parent), d_inode(dentry));
64 80
81 /* Files don't get special labels */
82 if (!S_ISDIR(d_inode(dentry)->i_mode))
83 return;
65 /* Derive custom permissions based on parent and current node */ 84 /* Derive custom permissions based on parent and current node */
66 switch (parent_info->perm) { 85 switch (parent_info->perm) {
67 case PERM_INHERIT: 86 case PERM_INHERIT:
68 /* Already inherited above */ 87 case PERM_ANDROID_PACKAGE_CACHE:
69 break; 88 /* Already inherited above */
70 case PERM_PRE_ROOT: 89 break;
71 /* Legacy internal layout places users at top level */ 90 case PERM_PRE_ROOT:
72 info->perm = PERM_ROOT; 91 /* Legacy internal layout places users at top level */
73 info->userid = simple_strtoul(newdentry->d_name.name, NULL, 10); 92 info->perm = PERM_ROOT;
74 break; 93 err = kstrtoul(name->name, 10, &user_num);
75 case PERM_ROOT: 94 if (err)
76 /* Assume masked off by default. */ 95 info->userid = 0;
77 if (!strcasecmp(newdentry->d_name.name, "Android")) { 96 else
78 /* App-specific directories inside; let anyone traverse */ 97 info->userid = user_num;
79 info->perm = PERM_ANDROID; 98 set_top(info, &info->vfs_inode);
80 info->under_android = true; 99 break;
81 } 100 case PERM_ROOT:
82 break; 101 /* Assume masked off by default. */
83 case PERM_ANDROID: 102 if (qstr_case_eq(name, &q_Android)) {
84 if (!strcasecmp(newdentry->d_name.name, "data")) { 103 /* App-specific directories inside; let anyone traverse */
85 /* App-specific directories inside; let anyone traverse */ 104 info->perm = PERM_ANDROID;
86 info->perm = PERM_ANDROID_DATA; 105 info->under_android = true;
87 } else if (!strcasecmp(newdentry->d_name.name, "obb")) { 106 set_top(info, &info->vfs_inode);
88 /* App-specific directories inside; let anyone traverse */ 107 }
89 info->perm = PERM_ANDROID_OBB; 108 break;
90 /* Single OBB directory is always shared */ 109 case PERM_ANDROID:
91 } else if (!strcasecmp(newdentry->d_name.name, "media")) { 110 if (qstr_case_eq(name, &q_data)) {
92 /* App-specific directories inside; let anyone traverse */ 111 /* App-specific directories inside; let anyone traverse */
93 info->perm = PERM_ANDROID_MEDIA; 112 info->perm = PERM_ANDROID_DATA;
94 } 113 set_top(info, &info->vfs_inode);
95 break; 114 } else if (qstr_case_eq(name, &q_obb)) {
96 case PERM_ANDROID_DATA: 115 /* App-specific directories inside; let anyone traverse */
97 case PERM_ANDROID_OBB: 116 info->perm = PERM_ANDROID_OBB;
98 case PERM_ANDROID_MEDIA: 117 info->under_obb = true;
99 appid = get_appid(sbi->pkgl_id, newdentry->d_name.name); 118 set_top(info, &info->vfs_inode);
100 if (appid != 0) { 119 /* Single OBB directory is always shared */
101 info->d_uid = multiuser_get_uid(parent_info->userid, appid); 120 } else if (qstr_case_eq(name, &q_media)) {
102 } 121 /* App-specific directories inside; let anyone traverse */
103 break; 122 info->perm = PERM_ANDROID_MEDIA;
123 set_top(info, &info->vfs_inode);
124 }
125 break;
126 case PERM_ANDROID_OBB:
127 case PERM_ANDROID_DATA:
128 case PERM_ANDROID_MEDIA:
129 info->perm = PERM_ANDROID_PACKAGE;
130 appid = get_appid(name->name);
131 if (appid != 0 && !is_excluded(name->name, parent_info->userid))
132 info->d_uid = multiuser_get_uid(parent_info->userid, appid);
133 set_top(info, &info->vfs_inode);
134 break;
135 case PERM_ANDROID_PACKAGE:
136 if (qstr_case_eq(name, &q_cache)) {
137 info->perm = PERM_ANDROID_PACKAGE_CACHE;
138 info->under_cache = true;
139 }
140 break;
104 } 141 }
105} 142}
106 143
107void get_derived_permission(struct dentry *parent, struct dentry *dentry) 144void get_derived_permission(struct dentry *parent, struct dentry *dentry)
108{ 145{
109 get_derived_permission_new(parent, dentry, dentry); 146 get_derived_permission_new(parent, dentry, &dentry->d_name);
110} 147}
111 148
112void get_derive_permissions_recursive(struct dentry *parent) { 149static appid_t get_type(const char *name)
113 struct dentry *dentry; 150{
114 list_for_each_entry(dentry, &parent->d_subdirs, d_child) { 151 const char *ext = strrchr(name, '.');
115 if (dentry->d_inode) { 152 appid_t id;
116 mutex_lock(&dentry->d_inode->i_mutex); 153
117 get_derived_permission(parent, dentry); 154 if (ext && ext[0]) {
118 fix_derived_permission(dentry->d_inode); 155 ext = &ext[1];
119 get_derive_permissions_recursive(dentry); 156 id = get_ext_gid(ext);
120 mutex_unlock(&dentry->d_inode->i_mutex); 157 return id?:AID_MEDIA_RW;
158 }
159 return AID_MEDIA_RW;
160}
161
162void fixup_lower_ownership(struct dentry *dentry, const char *name)
163{
164 struct path path;
165 struct inode *inode;
166 struct inode *delegated_inode = NULL;
167 int error;
168 struct sdcardfs_inode_info *info;
169 struct sdcardfs_inode_info *info_top;
170 perm_t perm;
171 struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
172 uid_t uid = sbi->options.fs_low_uid;
173 gid_t gid = sbi->options.fs_low_gid;
174 struct iattr newattrs;
175
176 info = SDCARDFS_I(d_inode(dentry));
177 perm = info->perm;
178 if (info->under_obb) {
179 perm = PERM_ANDROID_OBB;
180 } else if (info->under_cache) {
181 perm = PERM_ANDROID_PACKAGE_CACHE;
182 } else if (perm == PERM_INHERIT) {
183 info_top = SDCARDFS_I(grab_top(info));
184 perm = info_top->perm;
185 release_top(info);
186 }
187
188 switch (perm) {
189 case PERM_ROOT:
190 case PERM_ANDROID:
191 case PERM_ANDROID_DATA:
192 case PERM_ANDROID_MEDIA:
193 case PERM_ANDROID_PACKAGE:
194 case PERM_ANDROID_PACKAGE_CACHE:
195 uid = multiuser_get_uid(info->userid, uid);
196 break;
197 case PERM_ANDROID_OBB:
198 uid = AID_MEDIA_OBB;
199 break;
200 case PERM_PRE_ROOT:
201 default:
202 break;
203 }
204 switch (perm) {
205 case PERM_ROOT:
206 case PERM_ANDROID:
207 case PERM_ANDROID_DATA:
208 case PERM_ANDROID_MEDIA:
209 if (S_ISDIR(d_inode(dentry)->i_mode))
210 gid = multiuser_get_uid(info->userid, AID_MEDIA_RW);
211 else
212 gid = multiuser_get_uid(info->userid, get_type(name));
213 break;
214 case PERM_ANDROID_OBB:
215 gid = AID_MEDIA_OBB;
216 break;
217 case PERM_ANDROID_PACKAGE:
218 if (uid_is_app(info->d_uid))
219 gid = multiuser_get_ext_gid(info->d_uid);
220 else
221 gid = multiuser_get_uid(info->userid, AID_MEDIA_RW);
222 break;
223 case PERM_ANDROID_PACKAGE_CACHE:
224 if (uid_is_app(info->d_uid))
225 gid = multiuser_get_ext_cache_gid(info->d_uid);
226 else
227 gid = multiuser_get_uid(info->userid, AID_MEDIA_RW);
228 break;
229 case PERM_PRE_ROOT:
230 default:
231 break;
232 }
233
234 sdcardfs_get_lower_path(dentry, &path);
235 inode = d_inode(path.dentry);
236 if (d_inode(path.dentry)->i_gid.val != gid || d_inode(path.dentry)->i_uid.val != uid) {
237retry_deleg:
238 newattrs.ia_valid = ATTR_GID | ATTR_UID | ATTR_FORCE;
239 newattrs.ia_uid = make_kuid(current_user_ns(), uid);
240 newattrs.ia_gid = make_kgid(current_user_ns(), gid);
241 if (!S_ISDIR(inode->i_mode))
242 newattrs.ia_valid |=
243 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
244 mutex_lock(&inode->i_mutex);
245 error = security_path_chown(&path, newattrs.ia_uid, newattrs.ia_gid);
246 if (!error)
247 error = notify_change2(path.mnt, path.dentry, &newattrs, &delegated_inode);
248 mutex_unlock(&inode->i_mutex);
249 if (delegated_inode) {
250 error = break_deleg_wait(&delegated_inode);
251 if (!error)
252 goto retry_deleg;
121 } 253 }
254 if (error)
255 pr_debug("sdcardfs: Failed to touch up lower fs gid/uid for %s\n", name);
122 } 256 }
257 sdcardfs_put_lower_path(dentry, &path);
258}
259
260static int descendant_may_need_fixup(struct sdcardfs_inode_info *info, struct limit_search *limit)
261{
262 if (info->perm == PERM_ROOT)
263 return (limit->flags & BY_USERID)?info->userid == limit->userid:1;
264 if (info->perm == PERM_PRE_ROOT || info->perm == PERM_ANDROID)
265 return 1;
266 return 0;
267}
268
269static int needs_fixup(perm_t perm)
270{
271 if (perm == PERM_ANDROID_DATA || perm == PERM_ANDROID_OBB
272 || perm == PERM_ANDROID_MEDIA)
273 return 1;
274 return 0;
275}
276
277static void __fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit, int depth)
278{
279 struct dentry *child;
280 struct sdcardfs_inode_info *info;
281
282 /*
283 * All paths will terminate their recursion on hitting PERM_ANDROID_OBB,
284 * PERM_ANDROID_MEDIA, or PERM_ANDROID_DATA. This happens at a depth of
285 * at most 3.
286 */
287 WARN(depth > 3, "%s: Max expected depth exceeded!\n", __func__);
288 spin_lock_nested(&dentry->d_lock, depth);
289 if (!d_inode(dentry)) {
290 spin_unlock(&dentry->d_lock);
291 return;
292 }
293 info = SDCARDFS_I(d_inode(dentry));
294
295 if (needs_fixup(info->perm)) {
296 list_for_each_entry(child, &dentry->d_subdirs, d_child) {
297 spin_lock_nested(&child->d_lock, depth + 1);
298 if (!(limit->flags & BY_NAME) || qstr_case_eq(&child->d_name, &limit->name)) {
299 if (d_inode(child)) {
300 get_derived_permission(dentry, child);
301 fixup_tmp_permissions(d_inode(child));
302 spin_unlock(&child->d_lock);
303 break;
304 }
305 }
306 spin_unlock(&child->d_lock);
307 }
308 } else if (descendant_may_need_fixup(info, limit)) {
309 list_for_each_entry(child, &dentry->d_subdirs, d_child) {
310 __fixup_perms_recursive(child, limit, depth + 1);
311 }
312 }
313 spin_unlock(&dentry->d_lock);
314}
315
316void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit)
317{
318 __fixup_perms_recursive(dentry, limit, 0);
123} 319}
124 320
125/* main function for updating derived permission */ 321/* main function for updating derived permission */
@@ -127,40 +323,37 @@ inline void update_derived_permission_lock(struct dentry *dentry)
127{ 323{
128 struct dentry *parent; 324 struct dentry *parent;
129 325
130 if(!dentry || !dentry->d_inode) { 326 if (!dentry || !d_inode(dentry)) {
131 printk(KERN_ERR "sdcardfs: %s: invalid dentry\n", __func__); 327 pr_err("sdcardfs: %s: invalid dentry\n", __func__);
132 return; 328 return;
133 } 329 }
134 /* FIXME: 330 /* FIXME:
135 * 1. need to check whether the dentry is updated or not 331 * 1. need to check whether the dentry is updated or not
136 * 2. remove the root dentry update 332 * 2. remove the root dentry update
137 */ 333 */
138 mutex_lock(&dentry->d_inode->i_mutex); 334 if (!IS_ROOT(dentry)) {
139 if(IS_ROOT(dentry)) {
140 //setup_default_pre_root_state(dentry->d_inode);
141 } else {
142 parent = dget_parent(dentry); 335 parent = dget_parent(dentry);
143 if(parent) { 336 if (parent) {
144 get_derived_permission(parent, dentry); 337 get_derived_permission(parent, dentry);
145 dput(parent); 338 dput(parent);
146 } 339 }
147 } 340 }
148 fix_derived_permission(dentry->d_inode); 341 fixup_tmp_permissions(d_inode(dentry));
149 mutex_unlock(&dentry->d_inode->i_mutex);
150} 342}
151 343
152int need_graft_path(struct dentry *dentry) 344int need_graft_path(struct dentry *dentry)
153{ 345{
154 int ret = 0; 346 int ret = 0;
155 struct dentry *parent = dget_parent(dentry); 347 struct dentry *parent = dget_parent(dentry);
156 struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode); 348 struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent));
157 struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); 349 struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
350 struct qstr obb = QSTR_LITERAL("obb");
158 351
159 if(parent_info->perm == PERM_ANDROID && 352 if (parent_info->perm == PERM_ANDROID &&
160 !strcasecmp(dentry->d_name.name, "obb")) { 353 qstr_case_eq(&dentry->d_name, &obb)) {
161 354
162 /* /Android/obb is the base obbpath of DERIVED_UNIFIED */ 355 /* /Android/obb is the base obbpath of DERIVED_UNIFIED */
163 if(!(sbi->options.multiuser == false 356 if (!(sbi->options.multiuser == false
164 && parent_info->userid == 0)) { 357 && parent_info->userid == 0)) {
165 ret = 1; 358 ret = 1;
166 } 359 }
@@ -175,36 +368,40 @@ int is_obbpath_invalid(struct dentry *dent)
175 struct sdcardfs_dentry_info *di = SDCARDFS_D(dent); 368 struct sdcardfs_dentry_info *di = SDCARDFS_D(dent);
176 struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dent->d_sb); 369 struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dent->d_sb);
177 char *path_buf, *obbpath_s; 370 char *path_buf, *obbpath_s;
371 int need_put = 0;
372 struct path lower_path;
178 373
179 /* check the base obbpath has been changed. 374 /* check the base obbpath has been changed.
180 * this routine can check an uninitialized obb dentry as well. 375 * this routine can check an uninitialized obb dentry as well.
181 * regarding the uninitialized obb, refer to the sdcardfs_mkdir() */ 376 * regarding the uninitialized obb, refer to the sdcardfs_mkdir()
377 */
182 spin_lock(&di->lock); 378 spin_lock(&di->lock);
183 if(di->orig_path.dentry) { 379 if (di->orig_path.dentry) {
184 if(!di->lower_path.dentry) { 380 if (!di->lower_path.dentry) {
185 ret = 1; 381 ret = 1;
186 } else { 382 } else {
187 path_get(&di->lower_path); 383 path_get(&di->lower_path);
188 //lower_parent = lock_parent(lower_path->dentry);
189 384
190 path_buf = kmalloc(PATH_MAX, GFP_ATOMIC); 385 path_buf = kmalloc(PATH_MAX, GFP_ATOMIC);
191 if(!path_buf) { 386 if (!path_buf) {
192 ret = 1; 387 ret = 1;
193 printk(KERN_ERR "sdcardfs: fail to allocate path_buf in %s.\n", __func__); 388 pr_err("sdcardfs: fail to allocate path_buf in %s.\n", __func__);
194 } else { 389 } else {
195 obbpath_s = d_path(&di->lower_path, path_buf, PATH_MAX); 390 obbpath_s = d_path(&di->lower_path, path_buf, PATH_MAX);
196 if (d_unhashed(di->lower_path.dentry) || 391 if (d_unhashed(di->lower_path.dentry) ||
197 strcasecmp(sbi->obbpath_s, obbpath_s)) { 392 !str_case_eq(sbi->obbpath_s, obbpath_s)) {
198 ret = 1; 393 ret = 1;
199 } 394 }
200 kfree(path_buf); 395 kfree(path_buf);
201 } 396 }
202 397
203 //unlock_dir(lower_parent); 398 pathcpy(&lower_path, &di->lower_path);
204 path_put(&di->lower_path); 399 need_put = 1;
205 } 400 }
206 } 401 }
207 spin_unlock(&di->lock); 402 spin_unlock(&di->lock);
403 if (need_put)
404 path_put(&lower_path);
208 return ret; 405 return ret;
209} 406}
210 407
@@ -212,17 +409,18 @@ int is_base_obbpath(struct dentry *dentry)
212{ 409{
213 int ret = 0; 410 int ret = 0;
214 struct dentry *parent = dget_parent(dentry); 411 struct dentry *parent = dget_parent(dentry);
215 struct sdcardfs_inode_info *parent_info= SDCARDFS_I(parent->d_inode); 412 struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent));
216 struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); 413 struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
414 struct qstr q_obb = QSTR_LITERAL("obb");
217 415
218 spin_lock(&SDCARDFS_D(dentry)->lock); 416 spin_lock(&SDCARDFS_D(dentry)->lock);
219 if (sbi->options.multiuser) { 417 if (sbi->options.multiuser) {
220 if(parent_info->perm == PERM_PRE_ROOT && 418 if (parent_info->perm == PERM_PRE_ROOT &&
221 !strcasecmp(dentry->d_name.name, "obb")) { 419 qstr_case_eq(&dentry->d_name, &q_obb)) {
222 ret = 1; 420 ret = 1;
223 } 421 }
224 } else if (parent_info->perm == PERM_ANDROID && 422 } else if (parent_info->perm == PERM_ANDROID &&
225 !strcasecmp(dentry->d_name.name, "obb")) { 423 qstr_case_eq(&dentry->d_name, &q_obb)) {
226 ret = 1; 424 ret = 1;
227 } 425 }
228 spin_unlock(&SDCARDFS_D(dentry)->lock); 426 spin_unlock(&SDCARDFS_D(dentry)->lock);
@@ -232,7 +430,8 @@ int is_base_obbpath(struct dentry *dentry)
232/* The lower_path will be stored to the dentry's orig_path 430/* The lower_path will be stored to the dentry's orig_path
233 * and the base obbpath will be copyed to the lower_path variable. 431 * and the base obbpath will be copyed to the lower_path variable.
234 * if an error returned, there's no change in the lower_path 432 * if an error returned, there's no change in the lower_path
235 * returns: -ERRNO if error (0: no error) */ 433 * returns: -ERRNO if error (0: no error)
434 */
236int setup_obb_dentry(struct dentry *dentry, struct path *lower_path) 435int setup_obb_dentry(struct dentry *dentry, struct path *lower_path)
237{ 436{
238 int err = 0; 437 int err = 0;
@@ -241,23 +440,24 @@ int setup_obb_dentry(struct dentry *dentry, struct path *lower_path)
241 440
242 /* A local obb dentry must have its own orig_path to support rmdir 441 /* A local obb dentry must have its own orig_path to support rmdir
243 * and mkdir of itself. Usually, we expect that the sbi->obbpath 442 * and mkdir of itself. Usually, we expect that the sbi->obbpath
244 * is avaiable on this stage. */ 443 * is avaiable on this stage.
444 */
245 sdcardfs_set_orig_path(dentry, lower_path); 445 sdcardfs_set_orig_path(dentry, lower_path);
246 446
247 err = kern_path(sbi->obbpath_s, 447 err = kern_path(sbi->obbpath_s,
248 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &obbpath); 448 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &obbpath);
249 449
250 if(!err) { 450 if (!err) {
251 /* the obbpath base has been found */ 451 /* the obbpath base has been found */
252 printk(KERN_INFO "sdcardfs: the sbi->obbpath is found\n");
253 pathcpy(lower_path, &obbpath); 452 pathcpy(lower_path, &obbpath);
254 } else { 453 } else {
255 /* if the sbi->obbpath is not available, we can optionally 454 /* if the sbi->obbpath is not available, we can optionally
256 * setup the lower_path with its orig_path. 455 * setup the lower_path with its orig_path.
257 * but, the current implementation just returns an error 456 * but, the current implementation just returns an error
258 * because the sdcard daemon also regards this case as 457 * because the sdcard daemon also regards this case as
259 * a lookup fail. */ 458 * a lookup fail.
260 printk(KERN_INFO "sdcardfs: the sbi->obbpath is not available\n"); 459 */
460 pr_info("sdcardfs: the sbi->obbpath is not available\n");
261 } 461 }
262 return err; 462 return err;
263} 463}
diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c
index c249fa982d3c..6076c342dae6 100644
--- a/fs/sdcardfs/file.c
+++ b/fs/sdcardfs/file.c
@@ -65,7 +65,7 @@ static ssize_t sdcardfs_write(struct file *file, const char __user *buf,
65 65
66 /* check disk space */ 66 /* check disk space */
67 if (!check_min_free_space(dentry, count, 0)) { 67 if (!check_min_free_space(dentry, count, 0)) {
68 printk(KERN_INFO "No minimum free space.\n"); 68 pr_err("No minimum free space.\n");
69 return -ENOSPC; 69 return -ENOSPC;
70 } 70 }
71 71
@@ -113,6 +113,10 @@ static long sdcardfs_unlocked_ioctl(struct file *file, unsigned int cmd,
113 if (lower_file->f_op->unlocked_ioctl) 113 if (lower_file->f_op->unlocked_ioctl)
114 err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); 114 err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
115 115
116 /* some ioctls can change inode attributes (EXT2_IOC_SETFLAGS) */
117 if (!err)
118 sdcardfs_copy_and_fix_attrs(file_inode(file),
119 file_inode(lower_file));
116out: 120out:
117 return err; 121 return err;
118} 122}
@@ -160,8 +164,7 @@ static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma)
160 lower_file = sdcardfs_lower_file(file); 164 lower_file = sdcardfs_lower_file(file);
161 if (willwrite && !lower_file->f_mapping->a_ops->writepage) { 165 if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
162 err = -EINVAL; 166 err = -EINVAL;
163 printk(KERN_ERR "sdcardfs: lower file system does not " 167 pr_err("sdcardfs: lower file system does not support writeable mmap\n");
164 "support writeable mmap\n");
165 goto out; 168 goto out;
166 } 169 }
167 170
@@ -173,16 +176,10 @@ static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma)
173 if (!SDCARDFS_F(file)->lower_vm_ops) { 176 if (!SDCARDFS_F(file)->lower_vm_ops) {
174 err = lower_file->f_op->mmap(lower_file, vma); 177 err = lower_file->f_op->mmap(lower_file, vma);
175 if (err) { 178 if (err) {
176 printk(KERN_ERR "sdcardfs: lower mmap failed %d\n", err); 179 pr_err("sdcardfs: lower mmap failed %d\n", err);
177 goto out; 180 goto out;
178 } 181 }
179 saved_vm_ops = vma->vm_ops; /* save: came from lower ->mmap */ 182 saved_vm_ops = vma->vm_ops; /* save: came from lower ->mmap */
180 err = do_munmap(current->mm, vma->vm_start,
181 vma->vm_end - vma->vm_start);
182 if (err) {
183 printk(KERN_ERR "sdcardfs: do_munmap failed %d\n", err);
184 goto out;
185 }
186 } 183 }
187 184
188 /* 185 /*
@@ -195,6 +192,9 @@ static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma)
195 file->f_mapping->a_ops = &sdcardfs_aops; /* set our aops */ 192 file->f_mapping->a_ops = &sdcardfs_aops; /* set our aops */
196 if (!SDCARDFS_F(file)->lower_vm_ops) /* save for our ->fault */ 193 if (!SDCARDFS_F(file)->lower_vm_ops) /* save for our ->fault */
197 SDCARDFS_F(file)->lower_vm_ops = saved_vm_ops; 194 SDCARDFS_F(file)->lower_vm_ops = saved_vm_ops;
195 vma->vm_private_data = file;
196 get_file(lower_file);
197 vma->vm_file = lower_file;
198 198
199out: 199out:
200 return err; 200 return err;
@@ -216,16 +216,13 @@ static int sdcardfs_open(struct inode *inode, struct file *file)
216 goto out_err; 216 goto out_err;
217 } 217 }
218 218
219 if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name)) { 219 if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) {
220 printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
221 " dentry: %s, task:%s\n",
222 __func__, dentry->d_name.name, current->comm);
223 err = -EACCES; 220 err = -EACCES;
224 goto out_err; 221 goto out_err;
225 } 222 }
226 223
227 /* save current_cred and override it */ 224 /* save current_cred and override it */
228 OVERRIDE_CRED(sbi, saved_cred); 225 OVERRIDE_CRED(sbi, saved_cred, SDCARDFS_I(inode));
229 226
230 file->private_data = 227 file->private_data =
231 kzalloc(sizeof(struct sdcardfs_file_info), GFP_KERNEL); 228 kzalloc(sizeof(struct sdcardfs_file_info), GFP_KERNEL);
@@ -251,9 +248,8 @@ static int sdcardfs_open(struct inode *inode, struct file *file)
251 248
252 if (err) 249 if (err)
253 kfree(SDCARDFS_F(file)); 250 kfree(SDCARDFS_F(file));
254 else { 251 else
255 sdcardfs_copy_and_fix_attrs(inode, sdcardfs_lower_inode(inode)); 252 sdcardfs_copy_and_fix_attrs(inode, sdcardfs_lower_inode(inode));
256 }
257 253
258out_revert_cred: 254out_revert_cred:
259 REVERT_CRED(saved_cred); 255 REVERT_CRED(saved_cred);
@@ -323,6 +319,85 @@ static int sdcardfs_fasync(int fd, struct file *file, int flag)
323 return err; 319 return err;
324} 320}
325 321
322/*
323 * Sdcardfs cannot use generic_file_llseek as ->llseek, because it would
324 * only set the offset of the upper file. So we have to implement our
325 * own method to set both the upper and lower file offsets
326 * consistently.
327 */
328static loff_t sdcardfs_file_llseek(struct file *file, loff_t offset, int whence)
329{
330 int err;
331 struct file *lower_file;
332
333 err = generic_file_llseek(file, offset, whence);
334 if (err < 0)
335 goto out;
336
337 lower_file = sdcardfs_lower_file(file);
338 err = generic_file_llseek(lower_file, offset, whence);
339
340out:
341 return err;
342}
343
344/*
345 * Sdcardfs read_iter, redirect modified iocb to lower read_iter
346 */
347ssize_t sdcardfs_read_iter(struct kiocb *iocb, struct iov_iter *iter)
348{
349 int err;
350 struct file *file = iocb->ki_filp, *lower_file;
351
352 lower_file = sdcardfs_lower_file(file);
353 if (!lower_file->f_op->read_iter) {
354 err = -EINVAL;
355 goto out;
356 }
357
358 get_file(lower_file); /* prevent lower_file from being released */
359 iocb->ki_filp = lower_file;
360 err = lower_file->f_op->read_iter(iocb, iter);
361 iocb->ki_filp = file;
362 fput(lower_file);
363 /* update upper inode atime as needed */
364 if (err >= 0 || err == -EIOCBQUEUED)
365 fsstack_copy_attr_atime(file->f_path.dentry->d_inode,
366 file_inode(lower_file));
367out:
368 return err;
369}
370
371/*
372 * Sdcardfs write_iter, redirect modified iocb to lower write_iter
373 */
374ssize_t sdcardfs_write_iter(struct kiocb *iocb, struct iov_iter *iter)
375{
376 int err;
377 struct file *file = iocb->ki_filp, *lower_file;
378
379 lower_file = sdcardfs_lower_file(file);
380 if (!lower_file->f_op->write_iter) {
381 err = -EINVAL;
382 goto out;
383 }
384
385 get_file(lower_file); /* prevent lower_file from being released */
386 iocb->ki_filp = lower_file;
387 err = lower_file->f_op->write_iter(iocb, iter);
388 iocb->ki_filp = file;
389 fput(lower_file);
390 /* update upper inode times/sizes as needed */
391 if (err >= 0 || err == -EIOCBQUEUED) {
392 fsstack_copy_inode_size(file->f_path.dentry->d_inode,
393 file_inode(lower_file));
394 fsstack_copy_attr_times(file->f_path.dentry->d_inode,
395 file_inode(lower_file));
396 }
397out:
398 return err;
399}
400
326const struct file_operations sdcardfs_main_fops = { 401const struct file_operations sdcardfs_main_fops = {
327 .llseek = generic_file_llseek, 402 .llseek = generic_file_llseek,
328 .read = sdcardfs_read, 403 .read = sdcardfs_read,
@@ -337,11 +412,13 @@ const struct file_operations sdcardfs_main_fops = {
337 .release = sdcardfs_file_release, 412 .release = sdcardfs_file_release,
338 .fsync = sdcardfs_fsync, 413 .fsync = sdcardfs_fsync,
339 .fasync = sdcardfs_fasync, 414 .fasync = sdcardfs_fasync,
415 .read_iter = sdcardfs_read_iter,
416 .write_iter = sdcardfs_write_iter,
340}; 417};
341 418
342/* trimmed directory options */ 419/* trimmed directory options */
343const struct file_operations sdcardfs_dir_fops = { 420const struct file_operations sdcardfs_dir_fops = {
344 .llseek = generic_file_llseek, 421 .llseek = sdcardfs_file_llseek,
345 .read = generic_read_dir, 422 .read = generic_read_dir,
346 .iterate = sdcardfs_readdir, 423 .iterate = sdcardfs_readdir,
347 .unlocked_ioctl = sdcardfs_unlocked_ioctl, 424 .unlocked_ioctl = sdcardfs_unlocked_ioctl,
diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c
index 2528da0d3ae1..f15cb11ca8fd 100644
--- a/fs/sdcardfs/inode.c
+++ b/fs/sdcardfs/inode.c
@@ -19,18 +19,25 @@
19 */ 19 */
20 20
21#include "sdcardfs.h" 21#include "sdcardfs.h"
22#include <linux/fs_struct.h>
23#include <linux/ratelimit.h>
22 24
23/* Do not directly use this function. Use OVERRIDE_CRED() instead. */ 25/* Do not directly use this function. Use OVERRIDE_CRED() instead. */
24const struct cred * override_fsids(struct sdcardfs_sb_info* sbi) 26const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, struct sdcardfs_inode_info *info)
25{ 27{
26 struct cred * cred; 28 struct cred *cred;
27 const struct cred * old_cred; 29 const struct cred *old_cred;
30 uid_t uid;
28 31
29 cred = prepare_creds(); 32 cred = prepare_creds();
30 if (!cred) 33 if (!cred)
31 return NULL; 34 return NULL;
32 35
33 cred->fsuid = make_kuid(&init_user_ns, sbi->options.fs_low_uid); 36 if (info->under_obb)
37 uid = AID_MEDIA_OBB;
38 else
39 uid = multiuser_get_uid(info->userid, sbi->options.fs_low_uid);
40 cred->fsuid = make_kuid(&init_user_ns, uid);
34 cred->fsgid = make_kgid(&init_user_ns, sbi->options.fs_low_gid); 41 cred->fsgid = make_kgid(&init_user_ns, sbi->options.fs_low_gid);
35 42
36 old_cred = override_creds(cred); 43 old_cred = override_creds(cred);
@@ -39,9 +46,9 @@ const struct cred * override_fsids(struct sdcardfs_sb_info* sbi)
39} 46}
40 47
41/* Do not directly use this function, use REVERT_CRED() instead. */ 48/* Do not directly use this function, use REVERT_CRED() instead. */
42void revert_fsids(const struct cred * old_cred) 49void revert_fsids(const struct cred *old_cred)
43{ 50{
44 const struct cred * cur_cred; 51 const struct cred *cur_cred;
45 52
46 cur_cred = current->cred; 53 cur_cred = current->cred;
47 revert_creds(old_cred); 54 revert_creds(old_cred);
@@ -53,28 +60,39 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry,
53{ 60{
54 int err; 61 int err;
55 struct dentry *lower_dentry; 62 struct dentry *lower_dentry;
63 struct vfsmount *lower_dentry_mnt;
56 struct dentry *lower_parent_dentry = NULL; 64 struct dentry *lower_parent_dentry = NULL;
57 struct path lower_path; 65 struct path lower_path;
58 const struct cred *saved_cred = NULL; 66 const struct cred *saved_cred = NULL;
67 struct fs_struct *saved_fs;
68 struct fs_struct *copied_fs;
59 69
60 if(!check_caller_access_to_name(dir, dentry->d_name.name)) { 70 if (!check_caller_access_to_name(dir, &dentry->d_name)) {
61 printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
62 " dentry: %s, task:%s\n",
63 __func__, dentry->d_name.name, current->comm);
64 err = -EACCES; 71 err = -EACCES;
65 goto out_eacces; 72 goto out_eacces;
66 } 73 }
67 74
68 /* save current_cred and override it */ 75 /* save current_cred and override it */
69 OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred); 76 OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir));
70 77
71 sdcardfs_get_lower_path(dentry, &lower_path); 78 sdcardfs_get_lower_path(dentry, &lower_path);
72 lower_dentry = lower_path.dentry; 79 lower_dentry = lower_path.dentry;
80 lower_dentry_mnt = lower_path.mnt;
73 lower_parent_dentry = lock_parent(lower_dentry); 81 lower_parent_dentry = lock_parent(lower_dentry);
74 82
75 /* set last 16bytes of mode field to 0664 */ 83 /* set last 16bytes of mode field to 0664 */
76 mode = (mode & S_IFMT) | 00664; 84 mode = (mode & S_IFMT) | 00664;
77 err = vfs_create(d_inode(lower_parent_dentry), lower_dentry, mode, want_excl); 85
86 /* temporarily change umask for lower fs write */
87 saved_fs = current->fs;
88 copied_fs = copy_fs_struct(current->fs);
89 if (!copied_fs) {
90 err = -ENOMEM;
91 goto out_unlock;
92 }
93 current->fs = copied_fs;
94 current->fs->umask = 0;
95 err = vfs_create2(lower_dentry_mnt, d_inode(lower_parent_dentry), lower_dentry, mode, want_excl);
78 if (err) 96 if (err)
79 goto out; 97 goto out;
80 98
@@ -83,8 +101,12 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry,
83 goto out; 101 goto out;
84 fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); 102 fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
85 fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); 103 fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry));
104 fixup_lower_ownership(dentry, dentry->d_name.name);
86 105
87out: 106out:
107 current->fs = saved_fs;
108 free_fs_struct(copied_fs);
109out_unlock:
88 unlock_dir(lower_parent_dentry); 110 unlock_dir(lower_parent_dentry);
89 sdcardfs_put_lower_path(dentry, &lower_path); 111 sdcardfs_put_lower_path(dentry, &lower_path);
90 REVERT_CRED(saved_cred); 112 REVERT_CRED(saved_cred);
@@ -138,28 +160,27 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry)
138{ 160{
139 int err; 161 int err;
140 struct dentry *lower_dentry; 162 struct dentry *lower_dentry;
163 struct vfsmount *lower_mnt;
141 struct inode *lower_dir_inode = sdcardfs_lower_inode(dir); 164 struct inode *lower_dir_inode = sdcardfs_lower_inode(dir);
142 struct dentry *lower_dir_dentry; 165 struct dentry *lower_dir_dentry;
143 struct path lower_path; 166 struct path lower_path;
144 const struct cred *saved_cred = NULL; 167 const struct cred *saved_cred = NULL;
145 168
146 if(!check_caller_access_to_name(dir, dentry->d_name.name)) { 169 if (!check_caller_access_to_name(dir, &dentry->d_name)) {
147 printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
148 " dentry: %s, task:%s\n",
149 __func__, dentry->d_name.name, current->comm);
150 err = -EACCES; 170 err = -EACCES;
151 goto out_eacces; 171 goto out_eacces;
152 } 172 }
153 173
154 /* save current_cred and override it */ 174 /* save current_cred and override it */
155 OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred); 175 OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir));
156 176
157 sdcardfs_get_lower_path(dentry, &lower_path); 177 sdcardfs_get_lower_path(dentry, &lower_path);
158 lower_dentry = lower_path.dentry; 178 lower_dentry = lower_path.dentry;
179 lower_mnt = lower_path.mnt;
159 dget(lower_dentry); 180 dget(lower_dentry);
160 lower_dir_dentry = lock_parent(lower_dentry); 181 lower_dir_dentry = lock_parent(lower_dentry);
161 182
162 err = vfs_unlink(lower_dir_inode, lower_dentry, NULL); 183 err = vfs_unlink2(lower_mnt, lower_dir_inode, lower_dentry, NULL);
163 184
164 /* 185 /*
165 * Note: unlinking on top of NFS can cause silly-renamed files. 186 * Note: unlinking on top of NFS can cause silly-renamed files.
@@ -219,14 +240,15 @@ out:
219} 240}
220#endif 241#endif
221 242
222static int touch(char *abs_path, mode_t mode) { 243static int touch(char *abs_path, mode_t mode)
244{
223 struct file *filp = filp_open(abs_path, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, mode); 245 struct file *filp = filp_open(abs_path, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, mode);
246
224 if (IS_ERR(filp)) { 247 if (IS_ERR(filp)) {
225 if (PTR_ERR(filp) == -EEXIST) { 248 if (PTR_ERR(filp) == -EEXIST) {
226 return 0; 249 return 0;
227 } 250 } else {
228 else { 251 pr_err("sdcardfs: failed to open(%s): %ld\n",
229 printk(KERN_ERR "sdcardfs: failed to open(%s): %ld\n",
230 abs_path, PTR_ERR(filp)); 252 abs_path, PTR_ERR(filp));
231 return PTR_ERR(filp); 253 return PTR_ERR(filp);
232 } 254 }
@@ -240,31 +262,29 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
240 int err; 262 int err;
241 int make_nomedia_in_obb = 0; 263 int make_nomedia_in_obb = 0;
242 struct dentry *lower_dentry; 264 struct dentry *lower_dentry;
265 struct vfsmount *lower_mnt;
243 struct dentry *lower_parent_dentry = NULL; 266 struct dentry *lower_parent_dentry = NULL;
244 struct path lower_path; 267 struct path lower_path;
245 struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); 268 struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb);
246 const struct cred *saved_cred = NULL; 269 const struct cred *saved_cred = NULL;
247 struct sdcardfs_inode_info *pi = SDCARDFS_I(dir); 270 struct sdcardfs_inode_info *pi = SDCARDFS_I(dir);
248 char *page_buf;
249 char *nomedia_dir_name;
250 char *nomedia_fullpath;
251 int fullpath_namelen;
252 int touch_err = 0; 271 int touch_err = 0;
272 struct fs_struct *saved_fs;
273 struct fs_struct *copied_fs;
274 struct qstr q_obb = QSTR_LITERAL("obb");
275 struct qstr q_data = QSTR_LITERAL("data");
253 276
254 if(!check_caller_access_to_name(dir, dentry->d_name.name)) { 277 if (!check_caller_access_to_name(dir, &dentry->d_name)) {
255 printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
256 " dentry: %s, task:%s\n",
257 __func__, dentry->d_name.name, current->comm);
258 err = -EACCES; 278 err = -EACCES;
259 goto out_eacces; 279 goto out_eacces;
260 } 280 }
261 281
262 /* save current_cred and override it */ 282 /* save current_cred and override it */
263 OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred); 283 OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir));
264 284
265 /* check disk space */ 285 /* check disk space */
266 if (!check_min_free_space(dentry, 0, 1)) { 286 if (!check_min_free_space(dentry, 0, 1)) {
267 printk(KERN_INFO "sdcardfs: No minimum free space.\n"); 287 pr_err("sdcardfs: No minimum free space.\n");
268 err = -ENOSPC; 288 err = -ENOSPC;
269 goto out_revert; 289 goto out_revert;
270 } 290 }
@@ -272,87 +292,83 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
272 /* the lower_dentry is negative here */ 292 /* the lower_dentry is negative here */
273 sdcardfs_get_lower_path(dentry, &lower_path); 293 sdcardfs_get_lower_path(dentry, &lower_path);
274 lower_dentry = lower_path.dentry; 294 lower_dentry = lower_path.dentry;
295 lower_mnt = lower_path.mnt;
275 lower_parent_dentry = lock_parent(lower_dentry); 296 lower_parent_dentry = lock_parent(lower_dentry);
276 297
277 /* set last 16bytes of mode field to 0775 */ 298 /* set last 16bytes of mode field to 0775 */
278 mode = (mode & S_IFMT) | 00775; 299 mode = (mode & S_IFMT) | 00775;
279 err = vfs_mkdir(d_inode(lower_parent_dentry), lower_dentry, mode);
280 300
281 if (err) 301 /* temporarily change umask for lower fs write */
302 saved_fs = current->fs;
303 copied_fs = copy_fs_struct(current->fs);
304 if (!copied_fs) {
305 err = -ENOMEM;
306 unlock_dir(lower_parent_dentry);
307 goto out_unlock;
308 }
309 current->fs = copied_fs;
310 current->fs->umask = 0;
311 err = vfs_mkdir2(lower_mnt, d_inode(lower_parent_dentry), lower_dentry, mode);
312
313 if (err) {
314 unlock_dir(lower_parent_dentry);
282 goto out; 315 goto out;
316 }
283 317
284 /* if it is a local obb dentry, setup it with the base obbpath */ 318 /* if it is a local obb dentry, setup it with the base obbpath */
285 if(need_graft_path(dentry)) { 319 if (need_graft_path(dentry)) {
286 320
287 err = setup_obb_dentry(dentry, &lower_path); 321 err = setup_obb_dentry(dentry, &lower_path);
288 if(err) { 322 if (err) {
289 /* if the sbi->obbpath is not available, the lower_path won't be 323 /* if the sbi->obbpath is not available, the lower_path won't be
290 * changed by setup_obb_dentry() but the lower path is saved to 324 * changed by setup_obb_dentry() but the lower path is saved to
291 * its orig_path. this dentry will be revalidated later. 325 * its orig_path. this dentry will be revalidated later.
292 * but now, the lower_path should be NULL */ 326 * but now, the lower_path should be NULL
327 */
293 sdcardfs_put_reset_lower_path(dentry); 328 sdcardfs_put_reset_lower_path(dentry);
294 329
295 /* the newly created lower path which saved to its orig_path or 330 /* the newly created lower path which saved to its orig_path or
296 * the lower_path is the base obbpath. 331 * the lower_path is the base obbpath.
297 * therefore, an additional path_get is required */ 332 * therefore, an additional path_get is required
333 */
298 path_get(&lower_path); 334 path_get(&lower_path);
299 } else 335 } else
300 make_nomedia_in_obb = 1; 336 make_nomedia_in_obb = 1;
301 } 337 }
302 338
303 err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, pi->userid); 339 err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, pi->userid);
304 if (err) 340 if (err) {
341 unlock_dir(lower_parent_dentry);
305 goto out; 342 goto out;
343 }
306 344
307 fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); 345 fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir));
308 fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); 346 fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry));
309 /* update number of links on parent directory */ 347 /* update number of links on parent directory */
310 set_nlink(dir, sdcardfs_lower_inode(dir)->i_nlink); 348 set_nlink(dir, sdcardfs_lower_inode(dir)->i_nlink);
311 349 fixup_lower_ownership(dentry, dentry->d_name.name);
312 if ((!sbi->options.multiuser) && (!strcasecmp(dentry->d_name.name, "obb")) 350 unlock_dir(lower_parent_dentry);
351 if ((!sbi->options.multiuser) && (qstr_case_eq(&dentry->d_name, &q_obb))
313 && (pi->perm == PERM_ANDROID) && (pi->userid == 0)) 352 && (pi->perm == PERM_ANDROID) && (pi->userid == 0))
314 make_nomedia_in_obb = 1; 353 make_nomedia_in_obb = 1;
315 354
316 /* When creating /Android/data and /Android/obb, mark them as .nomedia */ 355 /* When creating /Android/data and /Android/obb, mark them as .nomedia */
317 if (make_nomedia_in_obb || 356 if (make_nomedia_in_obb ||
318 ((pi->perm == PERM_ANDROID) && (!strcasecmp(dentry->d_name.name, "data")))) { 357 ((pi->perm == PERM_ANDROID) && (qstr_case_eq(&dentry->d_name, &q_data)))) {
319 358 REVERT_CRED(saved_cred);
320 page_buf = (char *)__get_free_page(GFP_KERNEL); 359 OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(d_inode(dentry)));
321 if (!page_buf) { 360 set_fs_pwd(current->fs, &lower_path);
322 printk(KERN_ERR "sdcardfs: failed to allocate page buf\n"); 361 touch_err = touch(".nomedia", 0664);
323 goto out;
324 }
325
326 nomedia_dir_name = d_absolute_path(&lower_path, page_buf, PAGE_SIZE);
327 if (IS_ERR(nomedia_dir_name)) {
328 free_page((unsigned long)page_buf);
329 printk(KERN_ERR "sdcardfs: failed to get .nomedia dir name\n");
330 goto out;
331 }
332
333 fullpath_namelen = page_buf + PAGE_SIZE - nomedia_dir_name - 1;
334 fullpath_namelen += strlen("/.nomedia");
335 nomedia_fullpath = kzalloc(fullpath_namelen + 1, GFP_KERNEL);
336 if (!nomedia_fullpath) {
337 free_page((unsigned long)page_buf);
338 printk(KERN_ERR "sdcardfs: failed to allocate .nomedia fullpath buf\n");
339 goto out;
340 }
341
342 strcpy(nomedia_fullpath, nomedia_dir_name);
343 free_page((unsigned long)page_buf);
344 strcat(nomedia_fullpath, "/.nomedia");
345 touch_err = touch(nomedia_fullpath, 0664);
346 if (touch_err) { 362 if (touch_err) {
347 printk(KERN_ERR "sdcardfs: failed to touch(%s): %d\n", 363 pr_err("sdcardfs: failed to create .nomedia in %s: %d\n",
348 nomedia_fullpath, touch_err); 364 lower_path.dentry->d_name.name, touch_err);
349 kfree(nomedia_fullpath);
350 goto out; 365 goto out;
351 } 366 }
352 kfree(nomedia_fullpath);
353 } 367 }
354out: 368out:
355 unlock_dir(lower_parent_dentry); 369 current->fs = saved_fs;
370 free_fs_struct(copied_fs);
371out_unlock:
356 sdcardfs_put_lower_path(dentry, &lower_path); 372 sdcardfs_put_lower_path(dentry, &lower_path);
357out_revert: 373out_revert:
358 REVERT_CRED(saved_cred); 374 REVERT_CRED(saved_cred);
@@ -364,29 +380,29 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry)
364{ 380{
365 struct dentry *lower_dentry; 381 struct dentry *lower_dentry;
366 struct dentry *lower_dir_dentry; 382 struct dentry *lower_dir_dentry;
383 struct vfsmount *lower_mnt;
367 int err; 384 int err;
368 struct path lower_path; 385 struct path lower_path;
369 const struct cred *saved_cred = NULL; 386 const struct cred *saved_cred = NULL;
370 387
371 if(!check_caller_access_to_name(dir, dentry->d_name.name)) { 388 if (!check_caller_access_to_name(dir, &dentry->d_name)) {
372 printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
373 " dentry: %s, task:%s\n",
374 __func__, dentry->d_name.name, current->comm);
375 err = -EACCES; 389 err = -EACCES;
376 goto out_eacces; 390 goto out_eacces;
377 } 391 }
378 392
379 /* save current_cred and override it */ 393 /* save current_cred and override it */
380 OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred); 394 OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir));
381 395
382 /* sdcardfs_get_real_lower(): in case of remove an user's obb dentry 396 /* sdcardfs_get_real_lower(): in case of remove an user's obb dentry
383 * the dentry on the original path should be deleted. */ 397 * the dentry on the original path should be deleted.
398 */
384 sdcardfs_get_real_lower(dentry, &lower_path); 399 sdcardfs_get_real_lower(dentry, &lower_path);
385 400
386 lower_dentry = lower_path.dentry; 401 lower_dentry = lower_path.dentry;
402 lower_mnt = lower_path.mnt;
387 lower_dir_dentry = lock_parent(lower_dentry); 403 lower_dir_dentry = lock_parent(lower_dentry);
388 404
389 err = vfs_rmdir(d_inode(lower_dir_dentry), lower_dentry); 405 err = vfs_rmdir2(lower_mnt, d_inode(lower_dir_dentry), lower_dentry);
390 if (err) 406 if (err)
391 goto out; 407 goto out;
392 408
@@ -450,27 +466,25 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry,
450 struct dentry *lower_new_dentry = NULL; 466 struct dentry *lower_new_dentry = NULL;
451 struct dentry *lower_old_dir_dentry = NULL; 467 struct dentry *lower_old_dir_dentry = NULL;
452 struct dentry *lower_new_dir_dentry = NULL; 468 struct dentry *lower_new_dir_dentry = NULL;
469 struct vfsmount *lower_mnt = NULL;
453 struct dentry *trap = NULL; 470 struct dentry *trap = NULL;
454 struct dentry *new_parent = NULL;
455 struct path lower_old_path, lower_new_path; 471 struct path lower_old_path, lower_new_path;
456 const struct cred *saved_cred = NULL; 472 const struct cred *saved_cred = NULL;
457 473
458 if(!check_caller_access_to_name(old_dir, old_dentry->d_name.name) || 474 if (!check_caller_access_to_name(old_dir, &old_dentry->d_name) ||
459 !check_caller_access_to_name(new_dir, new_dentry->d_name.name)) { 475 !check_caller_access_to_name(new_dir, &new_dentry->d_name)) {
460 printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
461 " new_dentry: %s, task:%s\n",
462 __func__, new_dentry->d_name.name, current->comm);
463 err = -EACCES; 476 err = -EACCES;
464 goto out_eacces; 477 goto out_eacces;
465 } 478 }
466 479
467 /* save current_cred and override it */ 480 /* save current_cred and override it */
468 OVERRIDE_CRED(SDCARDFS_SB(old_dir->i_sb), saved_cred); 481 OVERRIDE_CRED(SDCARDFS_SB(old_dir->i_sb), saved_cred, SDCARDFS_I(new_dir));
469 482
470 sdcardfs_get_real_lower(old_dentry, &lower_old_path); 483 sdcardfs_get_real_lower(old_dentry, &lower_old_path);
471 sdcardfs_get_lower_path(new_dentry, &lower_new_path); 484 sdcardfs_get_lower_path(new_dentry, &lower_new_path);
472 lower_old_dentry = lower_old_path.dentry; 485 lower_old_dentry = lower_old_path.dentry;
473 lower_new_dentry = lower_new_path.dentry; 486 lower_new_dentry = lower_new_path.dentry;
487 lower_mnt = lower_old_path.mnt;
474 lower_old_dir_dentry = dget_parent(lower_old_dentry); 488 lower_old_dir_dentry = dget_parent(lower_old_dentry);
475 lower_new_dir_dentry = dget_parent(lower_new_dentry); 489 lower_new_dir_dentry = dget_parent(lower_new_dentry);
476 490
@@ -486,7 +500,8 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry,
486 goto out; 500 goto out;
487 } 501 }
488 502
489 err = vfs_rename(d_inode(lower_old_dir_dentry), lower_old_dentry, 503 err = vfs_rename2(lower_mnt,
504 d_inode(lower_old_dir_dentry), lower_old_dentry,
490 d_inode(lower_new_dir_dentry), lower_new_dentry, 505 d_inode(lower_new_dir_dentry), lower_new_dentry,
491 NULL, 0); 506 NULL, 0);
492 if (err) 507 if (err)
@@ -499,25 +514,11 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry,
499 if (new_dir != old_dir) { 514 if (new_dir != old_dir) {
500 sdcardfs_copy_and_fix_attrs(old_dir, d_inode(lower_old_dir_dentry)); 515 sdcardfs_copy_and_fix_attrs(old_dir, d_inode(lower_old_dir_dentry));
501 fsstack_copy_inode_size(old_dir, d_inode(lower_old_dir_dentry)); 516 fsstack_copy_inode_size(old_dir, d_inode(lower_old_dir_dentry));
502
503 /* update the derived permission of the old_dentry
504 * with its new parent
505 */
506 new_parent = dget_parent(new_dentry);
507 if(new_parent) {
508 if(d_inode(old_dentry)) {
509 update_derived_permission_lock(old_dentry);
510 }
511 dput(new_parent);
512 }
513 } 517 }
514 /* At this point, not all dentry information has been moved, so 518 get_derived_permission_new(new_dentry->d_parent, old_dentry, &new_dentry->d_name);
515 * we pass along new_dentry for the name.*/ 519 fixup_tmp_permissions(d_inode(old_dentry));
516 mutex_lock(&d_inode(old_dentry)->i_mutex); 520 fixup_lower_ownership(old_dentry, new_dentry->d_name.name);
517 get_derived_permission_new(new_dentry->d_parent, old_dentry, new_dentry); 521 d_invalidate(old_dentry); /* Can't fixup ownership recursively :( */
518 fix_derived_permission(d_inode(old_dentry));
519 get_derive_permissions_recursive(old_dentry);
520 mutex_unlock(&d_inode(old_dentry)->i_mutex);
521out: 522out:
522 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 523 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
523 dput(lower_old_dir_dentry); 524 dput(lower_old_dir_dentry);
@@ -586,16 +587,63 @@ static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie)
586} 587}
587#endif 588#endif
588 589
589static int sdcardfs_permission(struct inode *inode, int mask) 590static int sdcardfs_permission_wrn(struct inode *inode, int mask)
591{
592 WARN_RATELIMIT(1, "sdcardfs does not support permission. Use permission2.\n");
593 return -EINVAL;
594}
595
596void copy_attrs(struct inode *dest, const struct inode *src)
597{
598 dest->i_mode = src->i_mode;
599 dest->i_uid = src->i_uid;
600 dest->i_gid = src->i_gid;
601 dest->i_rdev = src->i_rdev;
602 dest->i_atime = src->i_atime;
603 dest->i_mtime = src->i_mtime;
604 dest->i_ctime = src->i_ctime;
605 dest->i_blkbits = src->i_blkbits;
606 dest->i_flags = src->i_flags;
607#ifdef CONFIG_FS_POSIX_ACL
608 dest->i_acl = src->i_acl;
609#endif
610#ifdef CONFIG_SECURITY
611 dest->i_security = src->i_security;
612#endif
613}
614
615static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int mask)
590{ 616{
591 int err; 617 int err;
618 struct inode tmp;
619 struct inode *top = grab_top(SDCARDFS_I(inode));
620
621 if (!top) {
622 release_top(SDCARDFS_I(inode));
623 WARN(1, "Top value was null!\n");
624 return -EINVAL;
625 }
592 626
593 /* 627 /*
594 * Permission check on sdcardfs inode. 628 * Permission check on sdcardfs inode.
595 * Calling process should have AID_SDCARD_RW permission 629 * Calling process should have AID_SDCARD_RW permission
630 * Since generic_permission only needs i_mode, i_uid,
631 * i_gid, and i_sb, we can create a fake inode to pass
632 * this information down in.
633 *
634 * The underlying code may attempt to take locks in some
635 * cases for features we're not using, but if that changes,
636 * locks must be dealt with to avoid undefined behavior.
596 */ 637 */
597 err = generic_permission(inode, mask); 638 copy_attrs(&tmp, inode);
598 639 tmp.i_uid = make_kuid(&init_user_ns, SDCARDFS_I(top)->d_uid);
640 tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, SDCARDFS_I(top)));
641 tmp.i_mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(top));
642 release_top(SDCARDFS_I(inode));
643 tmp.i_sb = inode->i_sb;
644 if (IS_POSIXACL(inode))
645 pr_warn("%s: This may be undefined behavior...\n", __func__);
646 err = generic_permission(&tmp, mask);
599 /* XXX 647 /* XXX
600 * Original sdcardfs code calls inode_permission(lower_inode,.. ) 648 * Original sdcardfs code calls inode_permission(lower_inode,.. )
601 * for checking inode permission. But doing such things here seems 649 * for checking inode permission. But doing such things here seems
@@ -612,6 +660,7 @@ static int sdcardfs_permission(struct inode *inode, int mask)
612 * we check it with AID_MEDIA_RW permission 660 * we check it with AID_MEDIA_RW permission
613 */ 661 */
614 struct inode *lower_inode; 662 struct inode *lower_inode;
663
615 OVERRIDE_CRED(SDCARDFS_SB(inode->sb)); 664 OVERRIDE_CRED(SDCARDFS_SB(inode->sb));
616 665
617 lower_inode = sdcardfs_lower_inode(inode); 666 lower_inode = sdcardfs_lower_inode(inode);
@@ -624,47 +673,86 @@ static int sdcardfs_permission(struct inode *inode, int mask)
624 673
625} 674}
626 675
627static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia) 676static int sdcardfs_setattr_wrn(struct dentry *dentry, struct iattr *ia)
677{
678 WARN_RATELIMIT(1, "sdcardfs does not support setattr. User setattr2.\n");
679 return -EINVAL;
680}
681
682static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct iattr *ia)
628{ 683{
629 int err; 684 int err;
630 struct dentry *lower_dentry; 685 struct dentry *lower_dentry;
686 struct vfsmount *lower_mnt;
631 struct inode *inode; 687 struct inode *inode;
632 struct inode *lower_inode; 688 struct inode *lower_inode;
633 struct path lower_path; 689 struct path lower_path;
634 struct iattr lower_ia; 690 struct iattr lower_ia;
635 struct dentry *parent; 691 struct dentry *parent;
692 struct inode tmp;
693 struct inode *top;
694 const struct cred *saved_cred = NULL;
636 695
637 inode = d_inode(dentry); 696 inode = d_inode(dentry);
697 top = grab_top(SDCARDFS_I(inode));
698
699 if (!top) {
700 release_top(SDCARDFS_I(inode));
701 return -EINVAL;
702 }
703
704 /*
705 * Permission check on sdcardfs inode.
706 * Calling process should have AID_SDCARD_RW permission
707 * Since generic_permission only needs i_mode, i_uid,
708 * i_gid, and i_sb, we can create a fake inode to pass
709 * this information down in.
710 *
711 * The underlying code may attempt to take locks in some
712 * cases for features we're not using, but if that changes,
713 * locks must be dealt with to avoid undefined behavior.
714 *
715 */
716 copy_attrs(&tmp, inode);
717 tmp.i_uid = make_kuid(&init_user_ns, SDCARDFS_I(top)->d_uid);
718 tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, SDCARDFS_I(top)));
719 tmp.i_mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(top));
720 tmp.i_size = i_size_read(inode);
721 release_top(SDCARDFS_I(inode));
722 tmp.i_sb = inode->i_sb;
638 723
639 /* 724 /*
640 * Check if user has permission to change inode. We don't check if 725 * Check if user has permission to change inode. We don't check if
641 * this user can change the lower inode: that should happen when 726 * this user can change the lower inode: that should happen when
642 * calling notify_change on the lower inode. 727 * calling notify_change on the lower inode.
643 */ 728 */
644 err = inode_change_ok(inode, ia); 729 /* prepare our own lower struct iattr (with the lower file) */
730 memcpy(&lower_ia, ia, sizeof(lower_ia));
731 /* Allow touch updating timestamps. A previous permission check ensures
732 * we have write access. Changes to mode, owner, and group are ignored
733 */
734 ia->ia_valid |= ATTR_FORCE;
735 err = inode_change_ok(&tmp, ia);
645 736
646 /* no vfs_XXX operations required, cred overriding will be skipped. wj*/
647 if (!err) { 737 if (!err) {
648 /* check the Android group ID */ 738 /* check the Android group ID */
649 parent = dget_parent(dentry); 739 parent = dget_parent(dentry);
650 if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name)) { 740 if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name))
651 printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
652 " dentry: %s, task:%s\n",
653 __func__, dentry->d_name.name, current->comm);
654 err = -EACCES; 741 err = -EACCES;
655 }
656 dput(parent); 742 dput(parent);
657 } 743 }
658 744
659 if (err) 745 if (err)
660 goto out_err; 746 goto out_err;
661 747
748 /* save current_cred and override it */
749 OVERRIDE_CRED(SDCARDFS_SB(dentry->d_sb), saved_cred, SDCARDFS_I(inode));
750
662 sdcardfs_get_lower_path(dentry, &lower_path); 751 sdcardfs_get_lower_path(dentry, &lower_path);
663 lower_dentry = lower_path.dentry; 752 lower_dentry = lower_path.dentry;
753 lower_mnt = lower_path.mnt;
664 lower_inode = sdcardfs_lower_inode(inode); 754 lower_inode = sdcardfs_lower_inode(inode);
665 755
666 /* prepare our own lower struct iattr (with the lower file) */
667 memcpy(&lower_ia, ia, sizeof(lower_ia));
668 if (ia->ia_valid & ATTR_FILE) 756 if (ia->ia_valid & ATTR_FILE)
669 lower_ia.ia_file = sdcardfs_lower_file(ia->ia_file); 757 lower_ia.ia_file = sdcardfs_lower_file(ia->ia_file);
670 758
@@ -681,7 +769,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia)
681 if (current->mm) 769 if (current->mm)
682 down_write(&current->mm->mmap_sem); 770 down_write(&current->mm->mmap_sem);
683 if (ia->ia_valid & ATTR_SIZE) { 771 if (ia->ia_valid & ATTR_SIZE) {
684 err = inode_newsize_ok(inode, ia->ia_size); 772 err = inode_newsize_ok(&tmp, ia->ia_size);
685 if (err) { 773 if (err) {
686 if (current->mm) 774 if (current->mm)
687 up_write(&current->mm->mmap_sem); 775 up_write(&current->mm->mmap_sem);
@@ -704,7 +792,7 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia)
704 * tries to open(), unlink(), then ftruncate() a file. 792 * tries to open(), unlink(), then ftruncate() a file.
705 */ 793 */
706 mutex_lock(&d_inode(lower_dentry)->i_mutex); 794 mutex_lock(&d_inode(lower_dentry)->i_mutex);
707 err = notify_change(lower_dentry, &lower_ia, /* note: lower_ia */ 795 err = notify_change2(lower_mnt, lower_dentry, &lower_ia, /* note: lower_ia */
708 NULL); 796 NULL);
709 mutex_unlock(&d_inode(lower_dentry)->i_mutex); 797 mutex_unlock(&d_inode(lower_dentry)->i_mutex);
710 if (current->mm) 798 if (current->mm)
@@ -723,48 +811,68 @@ static int sdcardfs_setattr(struct dentry *dentry, struct iattr *ia)
723 811
724out: 812out:
725 sdcardfs_put_lower_path(dentry, &lower_path); 813 sdcardfs_put_lower_path(dentry, &lower_path);
814 REVERT_CRED(saved_cred);
726out_err: 815out_err:
727 return err; 816 return err;
728} 817}
729 818
819static int sdcardfs_fillattr(struct vfsmount *mnt,
820 struct inode *inode, struct kstat *stat)
821{
822 struct sdcardfs_inode_info *info = SDCARDFS_I(inode);
823 struct inode *top = grab_top(info);
824
825 if (!top)
826 return -EINVAL;
827
828 stat->dev = inode->i_sb->s_dev;
829 stat->ino = inode->i_ino;
830 stat->mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(top));
831 stat->nlink = inode->i_nlink;
832 stat->uid = make_kuid(&init_user_ns, SDCARDFS_I(top)->d_uid);
833 stat->gid = make_kgid(&init_user_ns, get_gid(mnt, SDCARDFS_I(top)));
834 stat->rdev = inode->i_rdev;
835 stat->size = i_size_read(inode);
836 stat->atime = inode->i_atime;
837 stat->mtime = inode->i_mtime;
838 stat->ctime = inode->i_ctime;
839 stat->blksize = (1 << inode->i_blkbits);
840 stat->blocks = inode->i_blocks;
841 release_top(info);
842 return 0;
843}
844
730static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, 845static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
731 struct kstat *stat) 846 struct kstat *stat)
732{ 847{
733 struct dentry *lower_dentry; 848 struct kstat lower_stat;
734 struct inode *inode;
735 struct inode *lower_inode;
736 struct path lower_path; 849 struct path lower_path;
737 struct dentry *parent; 850 struct dentry *parent;
851 int err;
738 852
739 parent = dget_parent(dentry); 853 parent = dget_parent(dentry);
740 if(!check_caller_access_to_name(d_inode(parent), dentry->d_name.name)) { 854 if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) {
741 printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
742 " dentry: %s, task:%s\n",
743 __func__, dentry->d_name.name, current->comm);
744 dput(parent); 855 dput(parent);
745 return -EACCES; 856 return -EACCES;
746 } 857 }
747 dput(parent); 858 dput(parent);
748 859
749 inode = d_inode(dentry);
750
751 sdcardfs_get_lower_path(dentry, &lower_path); 860 sdcardfs_get_lower_path(dentry, &lower_path);
752 lower_dentry = lower_path.dentry; 861 err = vfs_getattr(&lower_path, &lower_stat);
753 lower_inode = sdcardfs_lower_inode(inode); 862 if (err)
754 863 goto out;
755 864 sdcardfs_copy_and_fix_attrs(d_inode(dentry),
756 sdcardfs_copy_and_fix_attrs(inode, lower_inode); 865 d_inode(lower_path.dentry));
757 fsstack_copy_inode_size(inode, lower_inode); 866 err = sdcardfs_fillattr(mnt, d_inode(dentry), stat);
758 867 stat->blocks = lower_stat.blocks;
759 868out:
760 generic_fillattr(inode, stat);
761 sdcardfs_put_lower_path(dentry, &lower_path); 869 sdcardfs_put_lower_path(dentry, &lower_path);
762 return 0; 870 return err;
763} 871}
764 872
765const struct inode_operations sdcardfs_symlink_iops = { 873const struct inode_operations sdcardfs_symlink_iops = {
766 .permission = sdcardfs_permission, 874 .permission2 = sdcardfs_permission,
767 .setattr = sdcardfs_setattr, 875 .setattr2 = sdcardfs_setattr,
768 /* XXX Following operations are implemented, 876 /* XXX Following operations are implemented,
769 * but FUSE(sdcard) or FAT does not support them 877 * but FUSE(sdcard) or FAT does not support them
770 * These methods are *NOT* perfectly tested. 878 * These methods are *NOT* perfectly tested.
@@ -777,14 +885,14 @@ const struct inode_operations sdcardfs_symlink_iops = {
777const struct inode_operations sdcardfs_dir_iops = { 885const struct inode_operations sdcardfs_dir_iops = {
778 .create = sdcardfs_create, 886 .create = sdcardfs_create,
779 .lookup = sdcardfs_lookup, 887 .lookup = sdcardfs_lookup,
780#if 0 888 .permission = sdcardfs_permission_wrn,
781 .permission = sdcardfs_permission, 889 .permission2 = sdcardfs_permission,
782#endif
783 .unlink = sdcardfs_unlink, 890 .unlink = sdcardfs_unlink,
784 .mkdir = sdcardfs_mkdir, 891 .mkdir = sdcardfs_mkdir,
785 .rmdir = sdcardfs_rmdir, 892 .rmdir = sdcardfs_rmdir,
786 .rename = sdcardfs_rename, 893 .rename = sdcardfs_rename,
787 .setattr = sdcardfs_setattr, 894 .setattr = sdcardfs_setattr_wrn,
895 .setattr2 = sdcardfs_setattr,
788 .getattr = sdcardfs_getattr, 896 .getattr = sdcardfs_getattr,
789 /* XXX Following operations are implemented, 897 /* XXX Following operations are implemented,
790 * but FUSE(sdcard) or FAT does not support them 898 * but FUSE(sdcard) or FAT does not support them
@@ -796,7 +904,9 @@ const struct inode_operations sdcardfs_dir_iops = {
796}; 904};
797 905
798const struct inode_operations sdcardfs_main_iops = { 906const struct inode_operations sdcardfs_main_iops = {
799 .permission = sdcardfs_permission, 907 .permission = sdcardfs_permission_wrn,
800 .setattr = sdcardfs_setattr, 908 .permission2 = sdcardfs_permission,
909 .setattr = sdcardfs_setattr_wrn,
910 .setattr2 = sdcardfs_setattr,
801 .getattr = sdcardfs_getattr, 911 .getattr = sdcardfs_getattr,
802}; 912};
diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c
index a01b06a514fd..509d5fbcb472 100644
--- a/fs/sdcardfs/lookup.c
+++ b/fs/sdcardfs/lookup.c
@@ -36,8 +36,7 @@ int sdcardfs_init_dentry_cache(void)
36 36
37void sdcardfs_destroy_dentry_cache(void) 37void sdcardfs_destroy_dentry_cache(void)
38{ 38{
39 if (sdcardfs_dentry_cachep) 39 kmem_cache_destroy(sdcardfs_dentry_cachep);
40 kmem_cache_destroy(sdcardfs_dentry_cachep);
41} 40}
42 41
43void free_dentry_private_data(struct dentry *dentry) 42void free_dentry_private_data(struct dentry *dentry)
@@ -73,6 +72,7 @@ static int sdcardfs_inode_test(struct inode *inode, void *candidate_data/*void *
73{ 72{
74 struct inode *current_lower_inode = sdcardfs_lower_inode(inode); 73 struct inode *current_lower_inode = sdcardfs_lower_inode(inode);
75 userid_t current_userid = SDCARDFS_I(inode)->userid; 74 userid_t current_userid = SDCARDFS_I(inode)->userid;
75
76 if (current_lower_inode == ((struct inode_data *)candidate_data)->lower_inode && 76 if (current_lower_inode == ((struct inode_data *)candidate_data)->lower_inode &&
77 current_userid == ((struct inode_data *)candidate_data)->id) 77 current_userid == ((struct inode_data *)candidate_data)->id)
78 return 1; /* found a match */ 78 return 1; /* found a match */
@@ -91,7 +91,9 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, u
91 struct sdcardfs_inode_info *info; 91 struct sdcardfs_inode_info *info;
92 struct inode_data data; 92 struct inode_data data;
93 struct inode *inode; /* the new inode to return */ 93 struct inode *inode; /* the new inode to return */
94 int err; 94
95 if (!igrab(lower_inode))
96 return ERR_PTR(-ESTALE);
95 97
96 data.id = id; 98 data.id = id;
97 data.lower_inode = lower_inode; 99 data.lower_inode = lower_inode;
@@ -102,26 +104,23 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, u
102 * instead. 104 * instead.
103 */ 105 */
104 lower_inode->i_ino, /* hashval */ 106 lower_inode->i_ino, /* hashval */
105 sdcardfs_inode_test, /* inode comparison function */ 107 sdcardfs_inode_test, /* inode comparison function */
106 sdcardfs_inode_set, /* inode init function */ 108 sdcardfs_inode_set, /* inode init function */
107 &data); /* data passed to test+set fxns */ 109 &data); /* data passed to test+set fxns */
108 if (!inode) { 110 if (!inode) {
109 err = -EACCES;
110 iput(lower_inode); 111 iput(lower_inode);
111 return ERR_PTR(err); 112 return ERR_PTR(-ENOMEM);
112 } 113 }
113 /* if found a cached inode, then just return it */ 114 /* if found a cached inode, then just return it (after iput) */
114 if (!(inode->i_state & I_NEW)) 115 if (!(inode->i_state & I_NEW)) {
116 iput(lower_inode);
115 return inode; 117 return inode;
118 }
116 119
117 /* initialize new inode */ 120 /* initialize new inode */
118 info = SDCARDFS_I(inode); 121 info = SDCARDFS_I(inode);
119 122
120 inode->i_ino = lower_inode->i_ino; 123 inode->i_ino = lower_inode->i_ino;
121 if (!igrab(lower_inode)) {
122 err = -ESTALE;
123 return ERR_PTR(err);
124 }
125 sdcardfs_set_lower_inode(inode, lower_inode); 124 sdcardfs_set_lower_inode(inode, lower_inode);
126 125
127 inode->i_version++; 126 inode->i_version++;
@@ -164,27 +163,25 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, u
164} 163}
165 164
166/* 165/*
167 * Connect a sdcardfs inode dentry/inode with several lower ones. This is 166 * Helper interpose routine, called directly by ->lookup to handle
168 * the classic stackable file system "vnode interposition" action. 167 * spliced dentries.
169 *
170 * @dentry: sdcardfs's dentry which interposes on lower one
171 * @sb: sdcardfs's super_block
172 * @lower_path: the lower path (caller does path_get/put)
173 */ 168 */
174int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, 169static struct dentry *__sdcardfs_interpose(struct dentry *dentry,
175 struct path *lower_path, userid_t id) 170 struct super_block *sb,
171 struct path *lower_path,
172 userid_t id)
176{ 173{
177 int err = 0;
178 struct inode *inode; 174 struct inode *inode;
179 struct inode *lower_inode; 175 struct inode *lower_inode;
180 struct super_block *lower_sb; 176 struct super_block *lower_sb;
177 struct dentry *ret_dentry;
181 178
182 lower_inode = lower_path->dentry->d_inode; 179 lower_inode = d_inode(lower_path->dentry);
183 lower_sb = sdcardfs_lower_super(sb); 180 lower_sb = sdcardfs_lower_super(sb);
184 181
185 /* check that the lower file system didn't cross a mount point */ 182 /* check that the lower file system didn't cross a mount point */
186 if (lower_inode->i_sb != lower_sb) { 183 if (lower_inode->i_sb != lower_sb) {
187 err = -EXDEV; 184 ret_dentry = ERR_PTR(-EXDEV);
188 goto out; 185 goto out;
189 } 186 }
190 187
@@ -196,14 +193,54 @@ int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb,
196 /* inherit lower inode number for sdcardfs's inode */ 193 /* inherit lower inode number for sdcardfs's inode */
197 inode = sdcardfs_iget(sb, lower_inode, id); 194 inode = sdcardfs_iget(sb, lower_inode, id);
198 if (IS_ERR(inode)) { 195 if (IS_ERR(inode)) {
199 err = PTR_ERR(inode); 196 ret_dentry = ERR_CAST(inode);
200 goto out; 197 goto out;
201 } 198 }
202 199
203 d_add(dentry, inode); 200 ret_dentry = d_splice_alias(inode, dentry);
201 dentry = ret_dentry ?: dentry;
204 update_derived_permission_lock(dentry); 202 update_derived_permission_lock(dentry);
205out: 203out:
206 return err; 204 return ret_dentry;
205}
206
207/*
208 * Connect an sdcardfs inode dentry/inode with several lower ones. This is
209 * the classic stackable file system "vnode interposition" action.
210 *
211 * @dentry: sdcardfs's dentry which interposes on lower one
212 * @sb: sdcardfs's super_block
213 * @lower_path: the lower path (caller does path_get/put)
214 */
215int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb,
216 struct path *lower_path, userid_t id)
217{
218 struct dentry *ret_dentry;
219
220 ret_dentry = __sdcardfs_interpose(dentry, sb, lower_path, id);
221 return PTR_ERR(ret_dentry);
222}
223
224struct sdcardfs_name_data {
225 struct dir_context ctx;
226 const struct qstr *to_find;
227 char *name;
228 bool found;
229};
230
231static int sdcardfs_name_match(struct dir_context *ctx, const char *name,
232 int namelen, loff_t offset, u64 ino, unsigned int d_type)
233{
234 struct sdcardfs_name_data *buf = container_of(ctx, struct sdcardfs_name_data, ctx);
235 struct qstr candidate = QSTR_INIT(name, namelen);
236
237 if (qstr_case_eq(buf->to_find, &candidate)) {
238 memcpy(buf->name, name, namelen);
239 buf->name[namelen] = 0;
240 buf->found = true;
241 return 1;
242 }
243 return 0;
207} 244}
208 245
209/* 246/*
@@ -219,9 +256,10 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry,
219 struct vfsmount *lower_dir_mnt; 256 struct vfsmount *lower_dir_mnt;
220 struct dentry *lower_dir_dentry = NULL; 257 struct dentry *lower_dir_dentry = NULL;
221 struct dentry *lower_dentry; 258 struct dentry *lower_dentry;
222 const char *name; 259 const struct qstr *name;
223 struct path lower_path; 260 struct path lower_path;
224 struct qstr this; 261 struct qstr dname;
262 struct dentry *ret_dentry = NULL;
225 struct sdcardfs_sb_info *sbi; 263 struct sdcardfs_sb_info *sbi;
226 264
227 sbi = SDCARDFS_SB(dentry->d_sb); 265 sbi = SDCARDFS_SB(dentry->d_sb);
@@ -231,47 +269,90 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry,
231 if (IS_ROOT(dentry)) 269 if (IS_ROOT(dentry))
232 goto out; 270 goto out;
233 271
234 name = dentry->d_name.name; 272 name = &dentry->d_name;
235 273
236 /* now start the actual lookup procedure */ 274 /* now start the actual lookup procedure */
237 lower_dir_dentry = lower_parent_path->dentry; 275 lower_dir_dentry = lower_parent_path->dentry;
238 lower_dir_mnt = lower_parent_path->mnt; 276 lower_dir_mnt = lower_parent_path->mnt;
239 277
240 /* Use vfs_path_lookup to check if the dentry exists or not */ 278 /* Use vfs_path_lookup to check if the dentry exists or not */
241 err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0, 279 err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name->name, 0,
242 &lower_path); 280 &lower_path);
281 /* check for other cases */
282 if (err == -ENOENT) {
283 struct file *file;
284 const struct cred *cred = current_cred();
285
286 struct sdcardfs_name_data buffer = {
287 .ctx.actor = sdcardfs_name_match,
288 .to_find = name,
289 .name = __getname(),
290 .found = false,
291 };
292
293 if (!buffer.name) {
294 err = -ENOMEM;
295 goto out;
296 }
297 file = dentry_open(lower_parent_path, O_RDONLY, cred);
298 if (IS_ERR(file)) {
299 err = PTR_ERR(file);
300 goto put_name;
301 }
302 err = iterate_dir(file, &buffer.ctx);
303 fput(file);
304 if (err)
305 goto put_name;
306
307 if (buffer.found)
308 err = vfs_path_lookup(lower_dir_dentry,
309 lower_dir_mnt,
310 buffer.name, 0,
311 &lower_path);
312 else
313 err = -ENOENT;
314put_name:
315 __putname(buffer.name);
316 }
243 317
244 /* no error: handle positive dentries */ 318 /* no error: handle positive dentries */
245 if (!err) { 319 if (!err) {
246 /* check if the dentry is an obb dentry 320 /* check if the dentry is an obb dentry
247 * if true, the lower_inode must be replaced with 321 * if true, the lower_inode must be replaced with
248 * the inode of the graft path */ 322 * the inode of the graft path
323 */
249 324
250 if(need_graft_path(dentry)) { 325 if (need_graft_path(dentry)) {
251 326
252 /* setup_obb_dentry() 327 /* setup_obb_dentry()
253 * The lower_path will be stored to the dentry's orig_path 328 * The lower_path will be stored to the dentry's orig_path
254 * and the base obbpath will be copyed to the lower_path variable. 329 * and the base obbpath will be copyed to the lower_path variable.
255 * if an error returned, there's no change in the lower_path 330 * if an error returned, there's no change in the lower_path
256 * returns: -ERRNO if error (0: no error) */ 331 * returns: -ERRNO if error (0: no error)
332 */
257 err = setup_obb_dentry(dentry, &lower_path); 333 err = setup_obb_dentry(dentry, &lower_path);
258 334
259 if(err) { 335 if (err) {
260 /* if the sbi->obbpath is not available, we can optionally 336 /* if the sbi->obbpath is not available, we can optionally
261 * setup the lower_path with its orig_path. 337 * setup the lower_path with its orig_path.
262 * but, the current implementation just returns an error 338 * but, the current implementation just returns an error
263 * because the sdcard daemon also regards this case as 339 * because the sdcard daemon also regards this case as
264 * a lookup fail. */ 340 * a lookup fail.
265 printk(KERN_INFO "sdcardfs: base obbpath is not available\n"); 341 */
342 pr_info("sdcardfs: base obbpath is not available\n");
266 sdcardfs_put_reset_orig_path(dentry); 343 sdcardfs_put_reset_orig_path(dentry);
267 goto out; 344 goto out;
268 } 345 }
269 } 346 }
270 347
271 sdcardfs_set_lower_path(dentry, &lower_path); 348 sdcardfs_set_lower_path(dentry, &lower_path);
272 err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_path, id); 349 ret_dentry =
273 if (err) /* path_put underlying path on error */ 350 __sdcardfs_interpose(dentry, dentry->d_sb, &lower_path, id);
351 if (IS_ERR(ret_dentry)) {
352 err = PTR_ERR(ret_dentry);
353 /* path_put underlying path on error */
274 sdcardfs_put_reset_lower_path(dentry); 354 sdcardfs_put_reset_lower_path(dentry);
355 }
275 goto out; 356 goto out;
276 } 357 }
277 358
@@ -283,21 +364,24 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry,
283 goto out; 364 goto out;
284 365
285 /* instatiate a new negative dentry */ 366 /* instatiate a new negative dentry */
286 this.name = name; 367 dname.name = name->name;
287 this.len = strlen(name); 368 dname.len = name->len;
288 this.hash = full_name_hash(this.name, this.len); 369
289 lower_dentry = d_lookup(lower_dir_dentry, &this); 370 /* See if the low-level filesystem might want
290 if (lower_dentry) 371 * to use its own hash
291 goto setup_lower; 372 */
292 373 lower_dentry = d_hash_and_lookup(lower_dir_dentry, &dname);
293 lower_dentry = d_alloc(lower_dir_dentry, &this); 374 if (IS_ERR(lower_dentry))
375 return lower_dentry;
294 if (!lower_dentry) { 376 if (!lower_dentry) {
295 err = -ENOMEM; 377 /* We called vfs_path_lookup earlier, and did not get a negative
378 * dentry then. Don't confuse the lower filesystem by forcing
379 * one on it now...
380 */
381 err = -ENOENT;
296 goto out; 382 goto out;
297 } 383 }
298 d_add(lower_dentry, NULL); /* instantiate and hash */
299 384
300setup_lower:
301 lower_path.dentry = lower_dentry; 385 lower_path.dentry = lower_dentry;
302 lower_path.mnt = mntget(lower_dir_mnt); 386 lower_path.mnt = mntget(lower_dir_mnt);
303 sdcardfs_set_lower_path(dentry, &lower_path); 387 sdcardfs_set_lower_path(dentry, &lower_path);
@@ -311,14 +395,16 @@ setup_lower:
311 err = 0; 395 err = 0;
312 396
313out: 397out:
314 return ERR_PTR(err); 398 if (err)
399 return ERR_PTR(err);
400 return ret_dentry;
315} 401}
316 402
317/* 403/*
318 * On success: 404 * On success:
319 * fills dentry object appropriate values and returns NULL. 405 * fills dentry object appropriate values and returns NULL.
320 * On fail (== error) 406 * On fail (== error)
321 * returns error ptr 407 * returns error ptr
322 * 408 *
323 * @dir : Parent inode. It is locked (dir->i_mutex) 409 * @dir : Parent inode. It is locked (dir->i_mutex)
324 * @dentry : Target dentry to lookup. we should set each of fields. 410 * @dentry : Target dentry to lookup. we should set each of fields.
@@ -335,16 +421,13 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
335 421
336 parent = dget_parent(dentry); 422 parent = dget_parent(dentry);
337 423
338 if(!check_caller_access_to_name(parent->d_inode, dentry->d_name.name)) { 424 if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) {
339 ret = ERR_PTR(-EACCES); 425 ret = ERR_PTR(-EACCES);
340 printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n"
341 " dentry: %s, task:%s\n",
342 __func__, dentry->d_name.name, current->comm);
343 goto out_err; 426 goto out_err;
344 } 427 }
345 428
346 /* save current_cred and override it */ 429 /* save current_cred and override it */
347 OVERRIDE_CRED_PTR(SDCARDFS_SB(dir->i_sb), saved_cred); 430 OVERRIDE_CRED_PTR(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir));
348 431
349 sdcardfs_get_lower_path(parent, &lower_parent_path); 432 sdcardfs_get_lower_path(parent, &lower_parent_path);
350 433
@@ -357,23 +440,20 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry,
357 440
358 ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path, SDCARDFS_I(dir)->userid); 441 ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path, SDCARDFS_I(dir)->userid);
359 if (IS_ERR(ret)) 442 if (IS_ERR(ret))
360 {
361 goto out; 443 goto out;
362 }
363 if (ret) 444 if (ret)
364 dentry = ret; 445 dentry = ret;
365 if (dentry->d_inode) { 446 if (d_inode(dentry)) {
366 fsstack_copy_attr_times(dentry->d_inode, 447 fsstack_copy_attr_times(d_inode(dentry),
367 sdcardfs_lower_inode(dentry->d_inode)); 448 sdcardfs_lower_inode(d_inode(dentry)));
368 /* get drived permission */ 449 /* get derived permission */
369 mutex_lock(&dentry->d_inode->i_mutex);
370 get_derived_permission(parent, dentry); 450 get_derived_permission(parent, dentry);
371 fix_derived_permission(dentry->d_inode); 451 fixup_tmp_permissions(d_inode(dentry));
372 mutex_unlock(&dentry->d_inode->i_mutex); 452 fixup_lower_ownership(dentry, dentry->d_name.name);
373 } 453 }
374 /* update parent directory's atime */ 454 /* update parent directory's atime */
375 fsstack_copy_attr_atime(parent->d_inode, 455 fsstack_copy_attr_atime(d_inode(parent),
376 sdcardfs_lower_inode(parent->d_inode)); 456 sdcardfs_lower_inode(d_inode(parent)));
377 457
378out: 458out:
379 sdcardfs_put_lower_path(parent, &lower_parent_path); 459 sdcardfs_put_lower_path(parent, &lower_parent_path);
diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c
index a6522286d731..953d2156d2e9 100644
--- a/fs/sdcardfs/main.c
+++ b/fs/sdcardfs/main.c
@@ -28,9 +28,8 @@ enum {
28 Opt_fsgid, 28 Opt_fsgid,
29 Opt_gid, 29 Opt_gid,
30 Opt_debug, 30 Opt_debug,
31 Opt_lower_fs,
32 Opt_mask, 31 Opt_mask,
33 Opt_multiuser, // May need? 32 Opt_multiuser,
34 Opt_userid, 33 Opt_userid,
35 Opt_reserved_mb, 34 Opt_reserved_mb,
36 Opt_err, 35 Opt_err,
@@ -49,7 +48,8 @@ static const match_table_t sdcardfs_tokens = {
49}; 48};
50 49
51static int parse_options(struct super_block *sb, char *options, int silent, 50static int parse_options(struct super_block *sb, char *options, int silent,
52 int *debug, struct sdcardfs_mount_options *opts) 51 int *debug, struct sdcardfs_vfsmount_options *vfsopts,
52 struct sdcardfs_mount_options *opts)
53{ 53{
54 char *p; 54 char *p;
55 substring_t args[MAX_OPT_ARGS]; 55 substring_t args[MAX_OPT_ARGS];
@@ -58,10 +58,10 @@ static int parse_options(struct super_block *sb, char *options, int silent,
58 /* by default, we use AID_MEDIA_RW as uid, gid */ 58 /* by default, we use AID_MEDIA_RW as uid, gid */
59 opts->fs_low_uid = AID_MEDIA_RW; 59 opts->fs_low_uid = AID_MEDIA_RW;
60 opts->fs_low_gid = AID_MEDIA_RW; 60 opts->fs_low_gid = AID_MEDIA_RW;
61 opts->mask = 0; 61 vfsopts->mask = 0;
62 opts->multiuser = false; 62 opts->multiuser = false;
63 opts->fs_user_id = 0; 63 opts->fs_user_id = 0;
64 opts->gid = 0; 64 vfsopts->gid = 0;
65 /* by default, 0MB is reserved */ 65 /* by default, 0MB is reserved */
66 opts->reserved_mb = 0; 66 opts->reserved_mb = 0;
67 67
@@ -72,6 +72,7 @@ static int parse_options(struct super_block *sb, char *options, int silent,
72 72
73 while ((p = strsep(&options, ",")) != NULL) { 73 while ((p = strsep(&options, ",")) != NULL) {
74 int token; 74 int token;
75
75 if (!*p) 76 if (!*p)
76 continue; 77 continue;
77 78
@@ -94,7 +95,7 @@ static int parse_options(struct super_block *sb, char *options, int silent,
94 case Opt_gid: 95 case Opt_gid:
95 if (match_int(&args[0], &option)) 96 if (match_int(&args[0], &option))
96 return 0; 97 return 0;
97 opts->gid = option; 98 vfsopts->gid = option;
98 break; 99 break;
99 case Opt_userid: 100 case Opt_userid:
100 if (match_int(&args[0], &option)) 101 if (match_int(&args[0], &option))
@@ -104,7 +105,7 @@ static int parse_options(struct super_block *sb, char *options, int silent,
104 case Opt_mask: 105 case Opt_mask:
105 if (match_int(&args[0], &option)) 106 if (match_int(&args[0], &option))
106 return 0; 107 return 0;
107 opts->mask = option; 108 vfsopts->mask = option;
108 break; 109 break;
109 case Opt_multiuser: 110 case Opt_multiuser:
110 opts->multiuser = true; 111 opts->multiuser = true;
@@ -116,25 +117,81 @@ static int parse_options(struct super_block *sb, char *options, int silent,
116 break; 117 break;
117 /* unknown option */ 118 /* unknown option */
118 default: 119 default:
119 if (!silent) { 120 if (!silent)
120 printk( KERN_ERR "Unrecognized mount option \"%s\" " 121 pr_err("Unrecognized mount option \"%s\" or missing value", p);
121 "or missing value", p);
122 }
123 return -EINVAL; 122 return -EINVAL;
124 } 123 }
125 } 124 }
126 125
127 if (*debug) { 126 if (*debug) {
128 printk( KERN_INFO "sdcardfs : options - debug:%d\n", *debug); 127 pr_info("sdcardfs : options - debug:%d\n", *debug);
129 printk( KERN_INFO "sdcardfs : options - uid:%d\n", 128 pr_info("sdcardfs : options - uid:%d\n",
130 opts->fs_low_uid); 129 opts->fs_low_uid);
131 printk( KERN_INFO "sdcardfs : options - gid:%d\n", 130 pr_info("sdcardfs : options - gid:%d\n",
132 opts->fs_low_gid); 131 opts->fs_low_gid);
133 } 132 }
134 133
135 return 0; 134 return 0;
136} 135}
137 136
137int parse_options_remount(struct super_block *sb, char *options, int silent,
138 struct sdcardfs_vfsmount_options *vfsopts)
139{
140 char *p;
141 substring_t args[MAX_OPT_ARGS];
142 int option;
143 int debug;
144
145 if (!options)
146 return 0;
147
148 while ((p = strsep(&options, ",")) != NULL) {
149 int token;
150
151 if (!*p)
152 continue;
153
154 token = match_token(p, sdcardfs_tokens, args);
155
156 switch (token) {
157 case Opt_debug:
158 debug = 1;
159 break;
160 case Opt_gid:
161 if (match_int(&args[0], &option))
162 return 0;
163 vfsopts->gid = option;
164
165 break;
166 case Opt_mask:
167 if (match_int(&args[0], &option))
168 return 0;
169 vfsopts->mask = option;
170 break;
171 case Opt_multiuser:
172 case Opt_userid:
173 case Opt_fsuid:
174 case Opt_fsgid:
175 case Opt_reserved_mb:
176 pr_warn("Option \"%s\" can't be changed during remount\n", p);
177 break;
178 /* unknown option */
179 default:
180 if (!silent)
181 pr_err("Unrecognized mount option \"%s\" or missing value", p);
182 return -EINVAL;
183 }
184 }
185
186 if (debug) {
187 pr_info("sdcardfs : options - debug:%d\n", debug);
188 pr_info("sdcardfs : options - gid:%d\n", vfsopts->gid);
189 pr_info("sdcardfs : options - mask:%d\n", vfsopts->mask);
190 }
191
192 return 0;
193}
194
138#if 0 195#if 0
139/* 196/*
140 * our custom d_alloc_root work-alike 197 * our custom d_alloc_root work-alike
@@ -164,57 +221,58 @@ static struct dentry *sdcardfs_d_alloc_root(struct super_block *sb)
164#endif 221#endif
165 222
166DEFINE_MUTEX(sdcardfs_super_list_lock); 223DEFINE_MUTEX(sdcardfs_super_list_lock);
167LIST_HEAD(sdcardfs_super_list);
168EXPORT_SYMBOL_GPL(sdcardfs_super_list_lock); 224EXPORT_SYMBOL_GPL(sdcardfs_super_list_lock);
225LIST_HEAD(sdcardfs_super_list);
169EXPORT_SYMBOL_GPL(sdcardfs_super_list); 226EXPORT_SYMBOL_GPL(sdcardfs_super_list);
170 227
171/* 228/*
172 * There is no need to lock the sdcardfs_super_info's rwsem as there is no 229 * There is no need to lock the sdcardfs_super_info's rwsem as there is no
173 * way anyone can have a reference to the superblock at this point in time. 230 * way anyone can have a reference to the superblock at this point in time.
174 */ 231 */
175static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, 232static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb,
176 void *raw_data, int silent) 233 const char *dev_name, void *raw_data, int silent)
177{ 234{
178 int err = 0; 235 int err = 0;
179 int debug; 236 int debug;
180 struct super_block *lower_sb; 237 struct super_block *lower_sb;
181 struct path lower_path; 238 struct path lower_path;
182 struct sdcardfs_sb_info *sb_info; 239 struct sdcardfs_sb_info *sb_info;
240 struct sdcardfs_vfsmount_options *mnt_opt = mnt->data;
183 struct inode *inode; 241 struct inode *inode;
184 242
185 printk(KERN_INFO "sdcardfs version 2.0\n"); 243 pr_info("sdcardfs version 2.0\n");
186 244
187 if (!dev_name) { 245 if (!dev_name) {
188 printk(KERN_ERR 246 pr_err("sdcardfs: read_super: missing dev_name argument\n");
189 "sdcardfs: read_super: missing dev_name argument\n");
190 err = -EINVAL; 247 err = -EINVAL;
191 goto out; 248 goto out;
192 } 249 }
193 250
194 printk(KERN_INFO "sdcardfs: dev_name -> %s\n", dev_name); 251 pr_info("sdcardfs: dev_name -> %s\n", dev_name);
195 printk(KERN_INFO "sdcardfs: options -> %s\n", (char *)raw_data); 252 pr_info("sdcardfs: options -> %s\n", (char *)raw_data);
253 pr_info("sdcardfs: mnt -> %p\n", mnt);
196 254
197 /* parse lower path */ 255 /* parse lower path */
198 err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, 256 err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
199 &lower_path); 257 &lower_path);
200 if (err) { 258 if (err) {
201 printk(KERN_ERR "sdcardfs: error accessing lower directory '%s'\n", dev_name); 259 pr_err("sdcardfs: error accessing lower directory '%s'\n", dev_name);
202 goto out; 260 goto out;
203 } 261 }
204 262
205 /* allocate superblock private data */ 263 /* allocate superblock private data */
206 sb->s_fs_info = kzalloc(sizeof(struct sdcardfs_sb_info), GFP_KERNEL); 264 sb->s_fs_info = kzalloc(sizeof(struct sdcardfs_sb_info), GFP_KERNEL);
207 if (!SDCARDFS_SB(sb)) { 265 if (!SDCARDFS_SB(sb)) {
208 printk(KERN_CRIT "sdcardfs: read_super: out of memory\n"); 266 pr_crit("sdcardfs: read_super: out of memory\n");
209 err = -ENOMEM; 267 err = -ENOMEM;
210 goto out_free; 268 goto out_free;
211 } 269 }
212 270
213 sb_info = sb->s_fs_info; 271 sb_info = sb->s_fs_info;
214 /* parse options */ 272 /* parse options */
215 err = parse_options(sb, raw_data, silent, &debug, &sb_info->options); 273 err = parse_options(sb, raw_data, silent, &debug, mnt_opt, &sb_info->options);
216 if (err) { 274 if (err) {
217 printk(KERN_ERR "sdcardfs: invalid options\n"); 275 pr_err("sdcardfs: invalid options\n");
218 goto out_freesbi; 276 goto out_freesbi;
219 } 277 }
220 278
@@ -236,7 +294,7 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name,
236 sb->s_op = &sdcardfs_sops; 294 sb->s_op = &sdcardfs_sops;
237 295
238 /* get a new inode and allocate our root dentry */ 296 /* get a new inode and allocate our root dentry */
239 inode = sdcardfs_iget(sb, lower_path.dentry->d_inode, 0); 297 inode = sdcardfs_iget(sb, d_inode(lower_path.dentry), 0);
240 if (IS_ERR(inode)) { 298 if (IS_ERR(inode)) {
241 err = PTR_ERR(inode); 299 err = PTR_ERR(inode);
242 goto out_sput; 300 goto out_sput;
@@ -267,23 +325,24 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name,
267 /* setup permission policy */ 325 /* setup permission policy */
268 sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); 326 sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL);
269 mutex_lock(&sdcardfs_super_list_lock); 327 mutex_lock(&sdcardfs_super_list_lock);
270 if(sb_info->options.multiuser) { 328 if (sb_info->options.multiuser) {
271 setup_derived_state(sb->s_root->d_inode, PERM_PRE_ROOT, sb_info->options.fs_user_id, AID_ROOT, false); 329 setup_derived_state(d_inode(sb->s_root), PERM_PRE_ROOT,
330 sb_info->options.fs_user_id, AID_ROOT,
331 false, d_inode(sb->s_root));
272 snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); 332 snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name);
273 /*err = prepare_dir(sb_info->obbpath_s,
274 sb_info->options.fs_low_uid,
275 sb_info->options.fs_low_gid, 00755);*/
276 } else { 333 } else {
277 setup_derived_state(sb->s_root->d_inode, PERM_ROOT, sb_info->options.fs_low_uid, AID_ROOT, false); 334 setup_derived_state(d_inode(sb->s_root), PERM_ROOT,
335 sb_info->options.fs_user_id, AID_ROOT,
336 false, d_inode(sb->s_root));
278 snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); 337 snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name);
279 } 338 }
280 fix_derived_permission(sb->s_root->d_inode); 339 fixup_tmp_permissions(d_inode(sb->s_root));
281 sb_info->sb = sb; 340 sb_info->sb = sb;
282 list_add(&sb_info->list, &sdcardfs_super_list); 341 list_add(&sb_info->list, &sdcardfs_super_list);
283 mutex_unlock(&sdcardfs_super_list_lock); 342 mutex_unlock(&sdcardfs_super_list_lock);
284 343
285 if (!silent) 344 if (!silent)
286 printk(KERN_INFO "sdcardfs: mounted on top of %s type %s\n", 345 pr_info("sdcardfs: mounted on top of %s type %s\n",
287 dev_name, lower_sb->s_type->name); 346 dev_name, lower_sb->s_type->name);
288 goto out; /* all is well */ 347 goto out; /* all is well */
289 348
@@ -306,9 +365,11 @@ out:
306} 365}
307 366
308/* A feature which supports mount_nodev() with options */ 367/* A feature which supports mount_nodev() with options */
309static struct dentry *mount_nodev_with_options(struct file_system_type *fs_type, 368static struct dentry *mount_nodev_with_options(struct vfsmount *mnt,
310 int flags, const char *dev_name, void *data, 369 struct file_system_type *fs_type, int flags,
311 int (*fill_super)(struct super_block *, const char *, void *, int)) 370 const char *dev_name, void *data,
371 int (*fill_super)(struct vfsmount *, struct super_block *,
372 const char *, void *, int))
312 373
313{ 374{
314 int error; 375 int error;
@@ -319,7 +380,7 @@ static struct dentry *mount_nodev_with_options(struct file_system_type *fs_type,
319 380
320 s->s_flags = flags; 381 s->s_flags = flags;
321 382
322 error = fill_super(s, dev_name, data, flags & MS_SILENT ? 1 : 0); 383 error = fill_super(mnt, s, dev_name, data, flags & MS_SILENT ? 1 : 0);
323 if (error) { 384 if (error) {
324 deactivate_locked_super(s); 385 deactivate_locked_super(s);
325 return ERR_PTR(error); 386 return ERR_PTR(error);
@@ -328,19 +389,34 @@ static struct dentry *mount_nodev_with_options(struct file_system_type *fs_type,
328 return dget(s->s_root); 389 return dget(s->s_root);
329} 390}
330 391
331struct dentry *sdcardfs_mount(struct file_system_type *fs_type, int flags, 392static struct dentry *sdcardfs_mount(struct vfsmount *mnt,
393 struct file_system_type *fs_type, int flags,
332 const char *dev_name, void *raw_data) 394 const char *dev_name, void *raw_data)
333{ 395{
334 /* 396 /*
335 * dev_name is a lower_path_name, 397 * dev_name is a lower_path_name,
336 * raw_data is a option string. 398 * raw_data is a option string.
337 */ 399 */
338 return mount_nodev_with_options(fs_type, flags, dev_name, 400 return mount_nodev_with_options(mnt, fs_type, flags, dev_name,
339 raw_data, sdcardfs_read_super); 401 raw_data, sdcardfs_read_super);
402}
403
404static struct dentry *sdcardfs_mount_wrn(struct file_system_type *fs_type,
405 int flags, const char *dev_name, void *raw_data)
406{
407 WARN(1, "sdcardfs does not support mount. Use mount2.\n");
408 return ERR_PTR(-EINVAL);
340} 409}
341 410
342void sdcardfs_kill_sb(struct super_block *sb) { 411void *sdcardfs_alloc_mnt_data(void)
412{
413 return kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL);
414}
415
416void sdcardfs_kill_sb(struct super_block *sb)
417{
343 struct sdcardfs_sb_info *sbi; 418 struct sdcardfs_sb_info *sbi;
419
344 if (sb->s_magic == SDCARDFS_SUPER_MAGIC) { 420 if (sb->s_magic == SDCARDFS_SUPER_MAGIC) {
345 sbi = SDCARDFS_SB(sb); 421 sbi = SDCARDFS_SB(sb);
346 mutex_lock(&sdcardfs_super_list_lock); 422 mutex_lock(&sdcardfs_super_list_lock);
@@ -353,10 +429,13 @@ void sdcardfs_kill_sb(struct super_block *sb) {
353static struct file_system_type sdcardfs_fs_type = { 429static struct file_system_type sdcardfs_fs_type = {
354 .owner = THIS_MODULE, 430 .owner = THIS_MODULE,
355 .name = SDCARDFS_NAME, 431 .name = SDCARDFS_NAME,
356 .mount = sdcardfs_mount, 432 .mount = sdcardfs_mount_wrn,
433 .mount2 = sdcardfs_mount,
434 .alloc_mnt_data = sdcardfs_alloc_mnt_data,
357 .kill_sb = sdcardfs_kill_sb, 435 .kill_sb = sdcardfs_kill_sb,
358 .fs_flags = 0, 436 .fs_flags = 0,
359}; 437};
438MODULE_ALIAS_FS(SDCARDFS_NAME);
360 439
361static int __init init_sdcardfs_fs(void) 440static int __init init_sdcardfs_fs(void)
362{ 441{
@@ -392,10 +471,15 @@ static void __exit exit_sdcardfs_fs(void)
392 pr_info("Completed sdcardfs module unload\n"); 471 pr_info("Completed sdcardfs module unload\n");
393} 472}
394 473
395MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University" 474/* Original wrapfs authors */
396 " (http://www.fsl.cs.sunysb.edu/)"); 475MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University (http://www.fsl.cs.sunysb.edu/)");
397MODULE_DESCRIPTION("Wrapfs " SDCARDFS_VERSION 476
398 " (http://wrapfs.filesystems.org/)"); 477/* Original sdcardfs authors */
478MODULE_AUTHOR("Woojoong Lee, Daeho Jeong, Kitae Lee, Yeongjin Gil System Memory Lab., Samsung Electronics");
479
480/* Current maintainer */
481MODULE_AUTHOR("Daniel Rosenberg, Google");
482MODULE_DESCRIPTION("Sdcardfs " SDCARDFS_VERSION);
399MODULE_LICENSE("GPL"); 483MODULE_LICENSE("GPL");
400 484
401module_init(init_sdcardfs_fs); 485module_init(init_sdcardfs_fs);
diff --git a/fs/sdcardfs/mmap.c b/fs/sdcardfs/mmap.c
index e21f64675a80..b61f82275e7d 100644
--- a/fs/sdcardfs/mmap.c
+++ b/fs/sdcardfs/mmap.c
@@ -23,28 +23,46 @@
23static int sdcardfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 23static int sdcardfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
24{ 24{
25 int err; 25 int err;
26 struct file *file, *lower_file; 26 struct file *file;
27 const struct vm_operations_struct *lower_vm_ops; 27 const struct vm_operations_struct *lower_vm_ops;
28 struct vm_area_struct lower_vma;
29 28
30 memcpy(&lower_vma, vma, sizeof(struct vm_area_struct)); 29 file = (struct file *)vma->vm_private_data;
31 file = lower_vma.vm_file;
32 lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops; 30 lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops;
33 BUG_ON(!lower_vm_ops); 31 BUG_ON(!lower_vm_ops);
34 32
35 lower_file = sdcardfs_lower_file(file); 33 err = lower_vm_ops->fault(vma, vmf);
36 /* 34 return err;
37 * XXX: vm_ops->fault may be called in parallel. Because we have to 35}
38 * resort to temporarily changing the vma->vm_file to point to the 36
39 * lower file, a concurrent invocation of sdcardfs_fault could see a 37static void sdcardfs_vm_open(struct vm_area_struct *vma)
40 * different value. In this workaround, we keep a different copy of 38{
41 * the vma structure in our stack, so we never expose a different 39 struct file *file = (struct file *)vma->vm_private_data;
42 * value of the vma->vm_file called to us, even temporarily. A 40
43 * better fix would be to change the calling semantics of ->fault to 41 get_file(file);
44 * take an explicit file pointer. 42}
45 */ 43
46 lower_vma.vm_file = lower_file; 44static void sdcardfs_vm_close(struct vm_area_struct *vma)
47 err = lower_vm_ops->fault(&lower_vma, vmf); 45{
46 struct file *file = (struct file *)vma->vm_private_data;
47
48 fput(file);
49}
50
51static int sdcardfs_page_mkwrite(struct vm_area_struct *vma,
52 struct vm_fault *vmf)
53{
54 int err = 0;
55 struct file *file;
56 const struct vm_operations_struct *lower_vm_ops;
57
58 file = (struct file *)vma->vm_private_data;
59 lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops;
60 BUG_ON(!lower_vm_ops);
61 if (!lower_vm_ops->page_mkwrite)
62 goto out;
63
64 err = lower_vm_ops->page_mkwrite(vma, vmf);
65out:
48 return err; 66 return err;
49} 67}
50 68
@@ -52,30 +70,20 @@ static ssize_t sdcardfs_direct_IO(struct kiocb *iocb,
52 struct iov_iter *iter, loff_t pos) 70 struct iov_iter *iter, loff_t pos)
53{ 71{
54 /* 72 /*
55 * This function returns zero on purpose in order to support direct IO. 73 * This function should never be called directly. We need it
56 * __dentry_open checks a_ops->direct_IO and returns EINVAL if it is null. 74 * to exist, to get past a check in open_check_o_direct(),
57 * 75 * which is called from do_last().
58 * However, this function won't be called by certain file operations
59 * including generic fs functions. * reads and writes are delivered to
60 * the lower file systems and the direct IOs will be handled by them.
61 *
62 * NOTE: exceptionally, on the recent kernels (since Linux 3.8.x),
63 * swap_writepage invokes this function directly.
64 */ 76 */
65 printk(KERN_INFO "%s, operation is not supported\n", __func__); 77 return -EINVAL;
66 return 0;
67} 78}
68 79
69/*
70 * XXX: the default address_space_ops for sdcardfs is empty. We cannot set
71 * our inode->i_mapping->a_ops to NULL because too many code paths expect
72 * the a_ops vector to be non-NULL.
73 */
74const struct address_space_operations sdcardfs_aops = { 80const struct address_space_operations sdcardfs_aops = {
75 /* empty on purpose */
76 .direct_IO = sdcardfs_direct_IO, 81 .direct_IO = sdcardfs_direct_IO,
77}; 82};
78 83
79const struct vm_operations_struct sdcardfs_vm_ops = { 84const struct vm_operations_struct sdcardfs_vm_ops = {
80 .fault = sdcardfs_fault, 85 .fault = sdcardfs_fault,
86 .page_mkwrite = sdcardfs_page_mkwrite,
87 .open = sdcardfs_vm_open,
88 .close = sdcardfs_vm_close,
81}; 89};
diff --git a/fs/sdcardfs/multiuser.h b/fs/sdcardfs/multiuser.h
index 923ba101dfa9..85341e753f8c 100644
--- a/fs/sdcardfs/multiuser.h
+++ b/fs/sdcardfs/multiuser.h
@@ -18,20 +18,36 @@
18 * General Public License. 18 * General Public License.
19 */ 19 */
20 20
21#define MULTIUSER_APP_PER_USER_RANGE 100000 21#define AID_USER_OFFSET 100000 /* offset for uid ranges for each user */
22#define AID_APP_START 10000 /* first app user */
23#define AID_APP_END 19999 /* last app user */
24#define AID_CACHE_GID_START 20000 /* start of gids for apps to mark cached data */
25#define AID_EXT_GID_START 30000 /* start of gids for apps to mark external data */
26#define AID_EXT_CACHE_GID_START 40000 /* start of gids for apps to mark external cached data */
27#define AID_EXT_CACHE_GID_END 49999 /* end of gids for apps to mark external cached data */
28#define AID_SHARED_GID_START 50000 /* start of gids for apps in each user to share */
22 29
23typedef uid_t userid_t; 30typedef uid_t userid_t;
24typedef uid_t appid_t; 31typedef uid_t appid_t;
25 32
26static inline userid_t multiuser_get_user_id(uid_t uid) { 33static inline uid_t multiuser_get_uid(userid_t user_id, appid_t app_id)
27 return uid / MULTIUSER_APP_PER_USER_RANGE; 34{
35 return (user_id * AID_USER_OFFSET) + (app_id % AID_USER_OFFSET);
28} 36}
29 37
30static inline appid_t multiuser_get_app_id(uid_t uid) { 38static inline bool uid_is_app(uid_t uid)
31 return uid % MULTIUSER_APP_PER_USER_RANGE; 39{
40 appid_t appid = uid % AID_USER_OFFSET;
41
42 return appid >= AID_APP_START && appid <= AID_APP_END;
32} 43}
33 44
34static inline uid_t multiuser_get_uid(userid_t userId, appid_t appId) { 45static inline gid_t multiuser_get_ext_cache_gid(uid_t uid)
35 return userId * MULTIUSER_APP_PER_USER_RANGE + (appId % MULTIUSER_APP_PER_USER_RANGE); 46{
47 return uid - AID_APP_START + AID_EXT_CACHE_GID_START;
36} 48}
37 49
50static inline gid_t multiuser_get_ext_gid(uid_t uid)
51{
52 return uid - AID_APP_START + AID_EXT_GID_START;
53}
diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c
index 9c3340528eee..89196e31073e 100644
--- a/fs/sdcardfs/packagelist.c
+++ b/fs/sdcardfs/packagelist.c
@@ -20,8 +20,10 @@
20 20
21#include "sdcardfs.h" 21#include "sdcardfs.h"
22#include <linux/hashtable.h> 22#include <linux/hashtable.h>
23#include <linux/ctype.h>
23#include <linux/delay.h> 24#include <linux/delay.h>
24 25#include <linux/radix-tree.h>
26#include <linux/dcache.h>
25 27
26#include <linux/init.h> 28#include <linux/init.h>
27#include <linux/module.h> 29#include <linux/module.h>
@@ -29,386 +31,835 @@
29 31
30#include <linux/configfs.h> 32#include <linux/configfs.h>
31 33
32#define STRING_BUF_SIZE (512)
33
34struct hashtable_entry { 34struct hashtable_entry {
35 struct hlist_node hlist; 35 struct hlist_node hlist;
36 void *key; 36 struct hlist_node dlist; /* for deletion cleanup */
37 unsigned int value; 37 struct qstr key;
38 atomic_t value;
38}; 39};
39 40
40struct sb_list { 41static DEFINE_HASHTABLE(package_to_appid, 8);
41 struct super_block *sb; 42static DEFINE_HASHTABLE(package_to_userid, 8);
42 struct list_head list; 43static DEFINE_HASHTABLE(ext_to_groupid, 8);
43};
44 44
45struct packagelist_data {
46 DECLARE_HASHTABLE(package_to_appid,8);
47 struct mutex hashtable_lock;
48 45
49}; 46static struct kmem_cache *hashtable_entry_cachep;
50 47
51static struct packagelist_data *pkgl_data_all; 48static unsigned int full_name_case_hash(const unsigned char *name, unsigned int len)
49{
50 unsigned long hash = init_name_hash();
52 51
53static struct kmem_cache *hashtable_entry_cachep; 52 while (len--)
53 hash = partial_name_hash(tolower(*name++), hash);
54 return end_name_hash(hash);
55}
54 56
55static unsigned int str_hash(const char *key) { 57static inline void qstr_init(struct qstr *q, const char *name)
56 int i; 58{
57 unsigned int h = strlen(key); 59 q->name = name;
58 char *data = (char *)key; 60 q->len = strlen(q->name);
61 q->hash = full_name_case_hash(q->name, q->len);
62}
63
64static inline int qstr_copy(const struct qstr *src, struct qstr *dest)
65{
66 dest->name = kstrdup(src->name, GFP_KERNEL);
67 dest->hash_len = src->hash_len;
68 return !!dest->name;
69}
70
71
72static appid_t __get_appid(const struct qstr *key)
73{
74 struct hashtable_entry *hash_cur;
75 unsigned int hash = key->hash;
76 appid_t ret_id;
59 77
60 for (i = 0; i < strlen(key); i++) { 78 rcu_read_lock();
61 h = h * 31 + *data; 79 hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) {
62 data++; 80 if (qstr_case_eq(key, &hash_cur->key)) {
81 ret_id = atomic_read(&hash_cur->value);
82 rcu_read_unlock();
83 return ret_id;
84 }
63 } 85 }
64 return h; 86 rcu_read_unlock();
87 return 0;
88}
89
90appid_t get_appid(const char *key)
91{
92 struct qstr q;
93
94 qstr_init(&q, key);
95 return __get_appid(&q);
65} 96}
66 97
67appid_t get_appid(void *pkgl_id, const char *app_name) 98static appid_t __get_ext_gid(const struct qstr *key)
68{ 99{
69 struct packagelist_data *pkgl_dat = pkgl_data_all;
70 struct hashtable_entry *hash_cur; 100 struct hashtable_entry *hash_cur;
71 unsigned int hash = str_hash(app_name); 101 unsigned int hash = key->hash;
72 appid_t ret_id; 102 appid_t ret_id;
73 103
74 mutex_lock(&pkgl_dat->hashtable_lock); 104 rcu_read_lock();
75 hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { 105 hash_for_each_possible_rcu(ext_to_groupid, hash_cur, hlist, hash) {
76 if (!strcasecmp(app_name, hash_cur->key)) { 106 if (qstr_case_eq(key, &hash_cur->key)) {
77 ret_id = (appid_t)hash_cur->value; 107 ret_id = atomic_read(&hash_cur->value);
78 mutex_unlock(&pkgl_dat->hashtable_lock); 108 rcu_read_unlock();
79 return ret_id; 109 return ret_id;
80 } 110 }
81 } 111 }
82 mutex_unlock(&pkgl_dat->hashtable_lock); 112 rcu_read_unlock();
113 return 0;
114}
115
116appid_t get_ext_gid(const char *key)
117{
118 struct qstr q;
119
120 qstr_init(&q, key);
121 return __get_ext_gid(&q);
122}
123
124static appid_t __is_excluded(const struct qstr *app_name, userid_t user)
125{
126 struct hashtable_entry *hash_cur;
127 unsigned int hash = app_name->hash;
128
129 rcu_read_lock();
130 hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
131 if (atomic_read(&hash_cur->value) == user &&
132 qstr_case_eq(app_name, &hash_cur->key)) {
133 rcu_read_unlock();
134 return 1;
135 }
136 }
137 rcu_read_unlock();
83 return 0; 138 return 0;
84} 139}
85 140
141appid_t is_excluded(const char *key, userid_t user)
142{
143 struct qstr q;
144 qstr_init(&q, key);
145 return __is_excluded(&q, user);
146}
147
86/* Kernel has already enforced everything we returned through 148/* Kernel has already enforced everything we returned through
87 * derive_permissions_locked(), so this is used to lock down access 149 * derive_permissions_locked(), so this is used to lock down access
88 * even further, such as enforcing that apps hold sdcard_rw. */ 150 * even further, such as enforcing that apps hold sdcard_rw.
89int check_caller_access_to_name(struct inode *parent_node, const char* name) { 151 */
152int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name)
153{
154 struct qstr q_autorun = QSTR_LITERAL("autorun.inf");
155 struct qstr q__android_secure = QSTR_LITERAL(".android_secure");
156 struct qstr q_android_secure = QSTR_LITERAL("android_secure");
90 157
91 /* Always block security-sensitive files at root */ 158 /* Always block security-sensitive files at root */
92 if (parent_node && SDCARDFS_I(parent_node)->perm == PERM_ROOT) { 159 if (parent_node && SDCARDFS_I(parent_node)->perm == PERM_ROOT) {
93 if (!strcasecmp(name, "autorun.inf") 160 if (qstr_case_eq(name, &q_autorun)
94 || !strcasecmp(name, ".android_secure") 161 || qstr_case_eq(name, &q__android_secure)
95 || !strcasecmp(name, "android_secure")) { 162 || qstr_case_eq(name, &q_android_secure)) {
96 return 0; 163 return 0;
97 } 164 }
98 } 165 }
99 166
100 /* Root always has access; access for any other UIDs should always 167 /* Root always has access; access for any other UIDs should always
101 * be controlled through packages.list. */ 168 * be controlled through packages.list.
102 if (from_kuid(&init_user_ns, current_fsuid()) == 0) { 169 */
170 if (from_kuid(&init_user_ns, current_fsuid()) == 0)
103 return 1; 171 return 1;
104 }
105 172
106 /* No extra permissions to enforce */ 173 /* No extra permissions to enforce */
107 return 1; 174 return 1;
108} 175}
109 176
110/* This function is used when file opening. The open flags must be 177/* This function is used when file opening. The open flags must be
111 * checked before calling check_caller_access_to_name() */ 178 * checked before calling check_caller_access_to_name()
112int open_flags_to_access_mode(int open_flags) { 179 */
113 if((open_flags & O_ACCMODE) == O_RDONLY) { 180int open_flags_to_access_mode(int open_flags)
181{
182 if ((open_flags & O_ACCMODE) == O_RDONLY)
114 return 0; /* R_OK */ 183 return 0; /* R_OK */
115 } else if ((open_flags & O_ACCMODE) == O_WRONLY) { 184 if ((open_flags & O_ACCMODE) == O_WRONLY)
116 return 1; /* W_OK */ 185 return 1; /* W_OK */
117 } else { 186 /* Probably O_RDRW, but treat as default to be safe */
118 /* Probably O_RDRW, but treat as default to be safe */
119 return 1; /* R_OK | W_OK */ 187 return 1; /* R_OK | W_OK */
188}
189
190static struct hashtable_entry *alloc_hashtable_entry(const struct qstr *key,
191 appid_t value)
192{
193 struct hashtable_entry *ret = kmem_cache_alloc(hashtable_entry_cachep,
194 GFP_KERNEL);
195 if (!ret)
196 return NULL;
197 INIT_HLIST_NODE(&ret->dlist);
198 INIT_HLIST_NODE(&ret->hlist);
199
200 if (!qstr_copy(key, &ret->key)) {
201 kmem_cache_free(hashtable_entry_cachep, ret);
202 return NULL;
120 } 203 }
204
205 atomic_set(&ret->value, value);
206 return ret;
121} 207}
122 208
123static int insert_str_to_int_lock(struct packagelist_data *pkgl_dat, char *key, 209static int insert_packagelist_appid_entry_locked(const struct qstr *key, appid_t value)
124 unsigned int value)
125{ 210{
126 struct hashtable_entry *hash_cur; 211 struct hashtable_entry *hash_cur;
127 struct hashtable_entry *new_entry; 212 struct hashtable_entry *new_entry;
128 unsigned int hash = str_hash(key); 213 unsigned int hash = key->hash;
129 214
130 hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { 215 hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) {
131 if (!strcasecmp(key, hash_cur->key)) { 216 if (qstr_case_eq(key, &hash_cur->key)) {
132 hash_cur->value = value; 217 atomic_set(&hash_cur->value, value);
133 return 0; 218 return 0;
134 } 219 }
135 } 220 }
136 new_entry = kmem_cache_alloc(hashtable_entry_cachep, GFP_KERNEL); 221 new_entry = alloc_hashtable_entry(key, value);
137 if (!new_entry) 222 if (!new_entry)
138 return -ENOMEM; 223 return -ENOMEM;
139 new_entry->key = kstrdup(key, GFP_KERNEL); 224 hash_add_rcu(package_to_appid, &new_entry->hlist, hash);
140 new_entry->value = value;
141 hash_add(pkgl_dat->package_to_appid, &new_entry->hlist, hash);
142 return 0; 225 return 0;
143} 226}
144 227
145static void fixup_perms(struct super_block *sb) { 228static int insert_ext_gid_entry_locked(const struct qstr *key, appid_t value)
146 if (sb && sb->s_magic == SDCARDFS_SUPER_MAGIC) { 229{
147 mutex_lock(&sb->s_root->d_inode->i_mutex); 230 struct hashtable_entry *hash_cur;
148 get_derive_permissions_recursive(sb->s_root); 231 struct hashtable_entry *new_entry;
149 mutex_unlock(&sb->s_root->d_inode->i_mutex); 232 unsigned int hash = key->hash;
233
234 /* An extension can only belong to one gid */
235 hash_for_each_possible_rcu(ext_to_groupid, hash_cur, hlist, hash) {
236 if (qstr_case_eq(key, &hash_cur->key))
237 return -EINVAL;
150 } 238 }
239 new_entry = alloc_hashtable_entry(key, value);
240 if (!new_entry)
241 return -ENOMEM;
242 hash_add_rcu(ext_to_groupid, &new_entry->hlist, hash);
243 return 0;
151} 244}
152 245
153static int insert_str_to_int(struct packagelist_data *pkgl_dat, char *key, 246static int insert_userid_exclude_entry_locked(const struct qstr *key, userid_t value)
154 unsigned int value) { 247{
155 int ret; 248 struct hashtable_entry *hash_cur;
249 struct hashtable_entry *new_entry;
250 unsigned int hash = key->hash;
251
252 /* Only insert if not already present */
253 hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
254 if (atomic_read(&hash_cur->value) == value &&
255 qstr_case_eq(key, &hash_cur->key))
256 return 0;
257 }
258 new_entry = alloc_hashtable_entry(key, value);
259 if (!new_entry)
260 return -ENOMEM;
261 hash_add_rcu(package_to_userid, &new_entry->hlist, hash);
262 return 0;
263}
264
265static void fixup_all_perms_name(const struct qstr *key)
266{
156 struct sdcardfs_sb_info *sbinfo; 267 struct sdcardfs_sb_info *sbinfo;
157 mutex_lock(&sdcardfs_super_list_lock); 268 struct limit_search limit = {
158 mutex_lock(&pkgl_dat->hashtable_lock); 269 .flags = BY_NAME,
159 ret = insert_str_to_int_lock(pkgl_dat, key, value); 270 .name = QSTR_INIT(key->name, key->len),
160 mutex_unlock(&pkgl_dat->hashtable_lock); 271 };
272 list_for_each_entry(sbinfo, &sdcardfs_super_list, list) {
273 if (sbinfo_has_sdcard_magic(sbinfo))
274 fixup_perms_recursive(sbinfo->sb->s_root, &limit);
275 }
276}
161 277
278static void fixup_all_perms_name_userid(const struct qstr *key, userid_t userid)
279{
280 struct sdcardfs_sb_info *sbinfo;
281 struct limit_search limit = {
282 .flags = BY_NAME | BY_USERID,
283 .name = QSTR_INIT(key->name, key->len),
284 .userid = userid,
285 };
162 list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { 286 list_for_each_entry(sbinfo, &sdcardfs_super_list, list) {
163 if (sbinfo) { 287 if (sbinfo_has_sdcard_magic(sbinfo))
164 fixup_perms(sbinfo->sb); 288 fixup_perms_recursive(sbinfo->sb->s_root, &limit);
165 }
166 } 289 }
290}
291
292static void fixup_all_perms_userid(userid_t userid)
293{
294 struct sdcardfs_sb_info *sbinfo;
295 struct limit_search limit = {
296 .flags = BY_USERID,
297 .userid = userid,
298 };
299 list_for_each_entry(sbinfo, &sdcardfs_super_list, list) {
300 if (sbinfo_has_sdcard_magic(sbinfo))
301 fixup_perms_recursive(sbinfo->sb->s_root, &limit);
302 }
303}
304
305static int insert_packagelist_entry(const struct qstr *key, appid_t value)
306{
307 int err;
308
309 mutex_lock(&sdcardfs_super_list_lock);
310 err = insert_packagelist_appid_entry_locked(key, value);
311 if (!err)
312 fixup_all_perms_name(key);
167 mutex_unlock(&sdcardfs_super_list_lock); 313 mutex_unlock(&sdcardfs_super_list_lock);
168 return ret; 314
315 return err;
169} 316}
170 317
171static void remove_str_to_int_lock(struct hashtable_entry *h_entry) { 318static int insert_ext_gid_entry(const struct qstr *key, appid_t value)
172 kfree(h_entry->key); 319{
173 hash_del(&h_entry->hlist); 320 int err;
174 kmem_cache_free(hashtable_entry_cachep, h_entry); 321
322 mutex_lock(&sdcardfs_super_list_lock);
323 err = insert_ext_gid_entry_locked(key, value);
324 mutex_unlock(&sdcardfs_super_list_lock);
325
326 return err;
175} 327}
176 328
177static void remove_str_to_int(struct packagelist_data *pkgl_dat, const char *key) 329static int insert_userid_exclude_entry(const struct qstr *key, userid_t value)
178{ 330{
179 struct sdcardfs_sb_info *sbinfo; 331 int err;
180 struct hashtable_entry *hash_cur; 332
181 unsigned int hash = str_hash(key);
182 mutex_lock(&sdcardfs_super_list_lock); 333 mutex_lock(&sdcardfs_super_list_lock);
183 mutex_lock(&pkgl_dat->hashtable_lock); 334 err = insert_userid_exclude_entry_locked(key, value);
184 hash_for_each_possible(pkgl_dat->package_to_appid, hash_cur, hlist, hash) { 335 if (!err)
185 if (!strcasecmp(key, hash_cur->key)) { 336 fixup_all_perms_name_userid(key, value);
186 remove_str_to_int_lock(hash_cur); 337 mutex_unlock(&sdcardfs_super_list_lock);
338
339 return err;
340}
341
342static void free_hashtable_entry(struct hashtable_entry *entry)
343{
344 kfree(entry->key.name);
345 kmem_cache_free(hashtable_entry_cachep, entry);
346}
347
348static void remove_packagelist_entry_locked(const struct qstr *key)
349{
350 struct hashtable_entry *hash_cur;
351 unsigned int hash = key->hash;
352 struct hlist_node *h_t;
353 HLIST_HEAD(free_list);
354
355 hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
356 if (qstr_case_eq(key, &hash_cur->key)) {
357 hash_del_rcu(&hash_cur->hlist);
358 hlist_add_head(&hash_cur->dlist, &free_list);
359 }
360 }
361 hash_for_each_possible_rcu(package_to_appid, hash_cur, hlist, hash) {
362 if (qstr_case_eq(key, &hash_cur->key)) {
363 hash_del_rcu(&hash_cur->hlist);
364 hlist_add_head(&hash_cur->dlist, &free_list);
187 break; 365 break;
188 } 366 }
189 } 367 }
190 mutex_unlock(&pkgl_dat->hashtable_lock); 368 synchronize_rcu();
191 list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { 369 hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist)
192 if (sbinfo) { 370 free_hashtable_entry(hash_cur);
193 fixup_perms(sbinfo->sb); 371}
372
373static void remove_packagelist_entry(const struct qstr *key)
374{
375 mutex_lock(&sdcardfs_super_list_lock);
376 remove_packagelist_entry_locked(key);
377 fixup_all_perms_name(key);
378 mutex_unlock(&sdcardfs_super_list_lock);
379}
380
381static void remove_ext_gid_entry_locked(const struct qstr *key, gid_t group)
382{
383 struct hashtable_entry *hash_cur;
384 unsigned int hash = key->hash;
385
386 hash_for_each_possible_rcu(ext_to_groupid, hash_cur, hlist, hash) {
387 if (qstr_case_eq(key, &hash_cur->key) && atomic_read(&hash_cur->value) == group) {
388 hash_del_rcu(&hash_cur->hlist);
389 synchronize_rcu();
390 free_hashtable_entry(hash_cur);
391 break;
194 } 392 }
195 } 393 }
394}
395
396static void remove_ext_gid_entry(const struct qstr *key, gid_t group)
397{
398 mutex_lock(&sdcardfs_super_list_lock);
399 remove_ext_gid_entry_locked(key, group);
196 mutex_unlock(&sdcardfs_super_list_lock); 400 mutex_unlock(&sdcardfs_super_list_lock);
197 return;
198} 401}
199 402
200static void remove_all_hashentrys(struct packagelist_data *pkgl_dat) 403static void remove_userid_all_entry_locked(userid_t userid)
201{ 404{
202 struct hashtable_entry *hash_cur; 405 struct hashtable_entry *hash_cur;
203 struct hlist_node *h_t; 406 struct hlist_node *h_t;
407 HLIST_HEAD(free_list);
204 int i; 408 int i;
205 mutex_lock(&pkgl_dat->hashtable_lock); 409
206 hash_for_each_safe(pkgl_dat->package_to_appid, i, h_t, hash_cur, hlist) 410 hash_for_each_rcu(package_to_userid, i, hash_cur, hlist) {
207 remove_str_to_int_lock(hash_cur); 411 if (atomic_read(&hash_cur->value) == userid) {
208 mutex_unlock(&pkgl_dat->hashtable_lock); 412 hash_del_rcu(&hash_cur->hlist);
209 hash_init(pkgl_dat->package_to_appid); 413 hlist_add_head(&hash_cur->dlist, &free_list);
414 }
415 }
416 synchronize_rcu();
417 hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist) {
418 free_hashtable_entry(hash_cur);
419 }
210} 420}
211 421
212static struct packagelist_data * packagelist_create(void) 422static void remove_userid_all_entry(userid_t userid)
213{ 423{
214 struct packagelist_data *pkgl_dat; 424 mutex_lock(&sdcardfs_super_list_lock);
425 remove_userid_all_entry_locked(userid);
426 fixup_all_perms_userid(userid);
427 mutex_unlock(&sdcardfs_super_list_lock);
428}
215 429
216 pkgl_dat = kmalloc(sizeof(*pkgl_dat), GFP_KERNEL | __GFP_ZERO); 430static void remove_userid_exclude_entry_locked(const struct qstr *key, userid_t userid)
217 if (!pkgl_dat) { 431{
218 printk(KERN_ERR "sdcardfs: Failed to create hash\n"); 432 struct hashtable_entry *hash_cur;
219 return ERR_PTR(-ENOMEM); 433 unsigned int hash = key->hash;
434
435 hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
436 if (qstr_case_eq(key, &hash_cur->key) &&
437 atomic_read(&hash_cur->value) == userid) {
438 hash_del_rcu(&hash_cur->hlist);
439 synchronize_rcu();
440 free_hashtable_entry(hash_cur);
441 break;
442 }
220 } 443 }
444}
221 445
222 mutex_init(&pkgl_dat->hashtable_lock); 446static void remove_userid_exclude_entry(const struct qstr *key, userid_t userid)
223 hash_init(pkgl_dat->package_to_appid); 447{
224 448 mutex_lock(&sdcardfs_super_list_lock);
225 return pkgl_dat; 449 remove_userid_exclude_entry_locked(key, userid);
450 fixup_all_perms_name_userid(key, userid);
451 mutex_unlock(&sdcardfs_super_list_lock);
226} 452}
227 453
228static void packagelist_destroy(struct packagelist_data *pkgl_dat) 454static void packagelist_destroy(void)
229{ 455{
230 remove_all_hashentrys(pkgl_dat); 456 struct hashtable_entry *hash_cur;
231 printk(KERN_INFO "sdcardfs: destroyed packagelist pkgld\n"); 457 struct hlist_node *h_t;
232 kfree(pkgl_dat); 458 HLIST_HEAD(free_list);
459 int i;
460
461 mutex_lock(&sdcardfs_super_list_lock);
462 hash_for_each_rcu(package_to_appid, i, hash_cur, hlist) {
463 hash_del_rcu(&hash_cur->hlist);
464 hlist_add_head(&hash_cur->dlist, &free_list);
465 }
466 hash_for_each_rcu(package_to_userid, i, hash_cur, hlist) {
467 hash_del_rcu(&hash_cur->hlist);
468 hlist_add_head(&hash_cur->dlist, &free_list);
469 }
470 synchronize_rcu();
471 hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist)
472 free_hashtable_entry(hash_cur);
473 mutex_unlock(&sdcardfs_super_list_lock);
474 pr_info("sdcardfs: destroyed packagelist pkgld\n");
475}
476
477#define SDCARDFS_CONFIGFS_ATTR(_pfx, _name) \
478static struct configfs_attribute _pfx##attr_##_name = { \
479 .ca_name = __stringify(_name), \
480 .ca_mode = S_IRUGO | S_IWUGO, \
481 .ca_owner = THIS_MODULE, \
482 .show = _pfx##_name##_show, \
483 .store = _pfx##_name##_store, \
484}
485
486#define SDCARDFS_CONFIGFS_ATTR_RO(_pfx, _name) \
487static struct configfs_attribute _pfx##attr_##_name = { \
488 .ca_name = __stringify(_name), \
489 .ca_mode = S_IRUGO, \
490 .ca_owner = THIS_MODULE, \
491 .show = _pfx##_name##_show, \
492}
493
494#define SDCARDFS_CONFIGFS_ATTR_WO(_pfx, _name) \
495static struct configfs_attribute _pfx##attr_##_name = { \
496 .ca_name = __stringify(_name), \
497 .ca_mode = S_IWUGO, \
498 .ca_owner = THIS_MODULE, \
499 .store = _pfx##_name##_store, \
233} 500}
234 501
235struct package_appid { 502struct package_details {
236 struct config_item item; 503 struct config_item item;
237 int add_pid; 504 struct qstr name;
238}; 505};
239 506
240static inline struct package_appid *to_package_appid(struct config_item *item) 507static inline struct package_details *to_package_details(struct config_item *item)
241{ 508{
242 return item ? container_of(item, struct package_appid, item) : NULL; 509 return item ? container_of(item, struct package_details, item) : NULL;
510}
511
512static ssize_t package_details_appid_show(struct config_item *item, char *page)
513{
514 return scnprintf(page, PAGE_SIZE, "%u\n", __get_appid(&to_package_details(item)->name));
515}
516
517static ssize_t package_details_appid_store(struct config_item *item,
518 const char *page, size_t count)
519{
520 unsigned int tmp;
521 int ret;
522
523 ret = kstrtouint(page, 10, &tmp);
524 if (ret)
525 return ret;
526
527 ret = insert_packagelist_entry(&to_package_details(item)->name, tmp);
528
529 if (ret)
530 return ret;
531
532 return count;
243} 533}
244 534
245static ssize_t package_appid_attr_show(struct config_item *item, 535static ssize_t package_details_excluded_userids_show(struct config_item *item,
246 char *page) 536 char *page)
247{ 537{
248 ssize_t count; 538 struct package_details *package_details = to_package_details(item);
249 count = sprintf(page, "%d\n", get_appid(pkgl_data_all, item->ci_name)); 539 struct hashtable_entry *hash_cur;
540 unsigned int hash = package_details->name.hash;
541 int count = 0;
542
543 rcu_read_lock();
544 hash_for_each_possible_rcu(package_to_userid, hash_cur, hlist, hash) {
545 if (qstr_case_eq(&package_details->name, &hash_cur->key))
546 count += scnprintf(page + count, PAGE_SIZE - count,
547 "%d ", atomic_read(&hash_cur->value));
548 }
549 rcu_read_unlock();
550 if (count)
551 count--;
552 count += scnprintf(page + count, PAGE_SIZE - count, "\n");
250 return count; 553 return count;
251} 554}
252 555
253static ssize_t package_appid_attr_store(struct config_item *item, 556static ssize_t package_details_excluded_userids_store(struct config_item *item,
254 const char *page, size_t count) 557 const char *page, size_t count)
255{ 558{
256 struct package_appid *package_appid = to_package_appid(item); 559 unsigned int tmp;
257 unsigned long tmp;
258 char *p = (char *) page;
259 int ret; 560 int ret;
260 561
261 tmp = simple_strtoul(p, &p, 10); 562 ret = kstrtouint(page, 10, &tmp);
262 if (!p || (*p && (*p != '\n'))) 563 if (ret)
263 return -EINVAL; 564 return ret;
565
566 ret = insert_userid_exclude_entry(&to_package_details(item)->name, tmp);
264 567
265 if (tmp > INT_MAX)
266 return -ERANGE;
267 ret = insert_str_to_int(pkgl_data_all, item->ci_name, (unsigned int)tmp);
268 package_appid->add_pid = tmp;
269 if (ret) 568 if (ret)
270 return ret; 569 return ret;
271 570
272 return count; 571 return count;
273} 572}
274 573
275static struct configfs_attribute package_appid_attr_add_pid = { 574static ssize_t package_details_clear_userid_store(struct config_item *item,
276 .ca_owner = THIS_MODULE, 575 const char *page, size_t count)
277 .ca_name = "appid", 576{
278 .ca_mode = S_IRUGO | S_IWUGO, 577 unsigned int tmp;
279 .show = package_appid_attr_show, 578 int ret;
280 .store = package_appid_attr_store,
281};
282 579
283static struct configfs_attribute *package_appid_attrs[] = { 580 ret = kstrtouint(page, 10, &tmp);
284 &package_appid_attr_add_pid, 581 if (ret)
285 NULL, 582 return ret;
286}; 583 remove_userid_exclude_entry(&to_package_details(item)->name, tmp);
584 return count;
585}
287 586
288static void package_appid_release(struct config_item *item) 587static void package_details_release(struct config_item *item)
289{ 588{
290 printk(KERN_INFO "sdcardfs: removing %s\n", item->ci_dentry->d_name.name); 589 struct package_details *package_details = to_package_details(item);
291 /* item->ci_name is freed already, so we rely on the dentry */ 590
292 remove_str_to_int(pkgl_data_all, item->ci_dentry->d_name.name); 591 pr_info("sdcardfs: removing %s\n", package_details->name.name);
293 kfree(to_package_appid(item)); 592 remove_packagelist_entry(&package_details->name);
593 kfree(package_details->name.name);
594 kfree(package_details);
294} 595}
295 596
296static struct configfs_item_operations package_appid_item_ops = { 597SDCARDFS_CONFIGFS_ATTR(package_details_, appid);
297 .release = package_appid_release, 598SDCARDFS_CONFIGFS_ATTR(package_details_, excluded_userids);
599SDCARDFS_CONFIGFS_ATTR_WO(package_details_, clear_userid);
600
601static struct configfs_attribute *package_details_attrs[] = {
602 &package_details_attr_appid,
603 &package_details_attr_excluded_userids,
604 &package_details_attr_clear_userid,
605 NULL,
606};
607
608static struct configfs_item_operations package_details_item_ops = {
609 .release = package_details_release,
298}; 610};
299 611
300static struct config_item_type package_appid_type = { 612static struct config_item_type package_appid_type = {
301 .ct_item_ops = &package_appid_item_ops, 613 .ct_item_ops = &package_details_item_ops,
302 .ct_attrs = package_appid_attrs, 614 .ct_attrs = package_details_attrs,
303 .ct_owner = THIS_MODULE, 615 .ct_owner = THIS_MODULE,
304}; 616};
305 617
306 618struct extensions_value {
307struct sdcardfs_packages {
308 struct config_group group; 619 struct config_group group;
620 unsigned int num;
621};
622
623struct extension_details {
624 struct config_item item;
625 struct qstr name;
626 unsigned int num;
627};
628
629static inline struct extensions_value *to_extensions_value(struct config_item *item)
630{
631 return item ? container_of(to_config_group(item), struct extensions_value, group) : NULL;
632}
633
634static inline struct extension_details *to_extension_details(struct config_item *item)
635{
636 return item ? container_of(item, struct extension_details, item) : NULL;
637}
638
639static void extension_details_release(struct config_item *item)
640{
641 struct extension_details *extension_details = to_extension_details(item);
642
643 pr_info("sdcardfs: No longer mapping %s files to gid %d\n",
644 extension_details->name.name, extension_details->num);
645 remove_ext_gid_entry(&extension_details->name, extension_details->num);
646 kfree(extension_details->name.name);
647 kfree(extension_details);
648}
649
650static struct configfs_item_operations extension_details_item_ops = {
651 .release = extension_details_release,
652};
653
654static struct config_item_type extension_details_type = {
655 .ct_item_ops = &extension_details_item_ops,
656 .ct_owner = THIS_MODULE,
309}; 657};
310 658
311static inline struct sdcardfs_packages *to_sdcardfs_packages(struct config_item *item) 659static struct config_item *extension_details_make_item(struct config_group *group, const char *name)
312{ 660{
313 return item ? container_of(to_config_group(item), struct sdcardfs_packages, group) : NULL; 661 struct extensions_value *extensions_value = to_extensions_value(&group->cg_item);
662 struct extension_details *extension_details = kzalloc(sizeof(struct extension_details), GFP_KERNEL);
663 const char *tmp;
664 int ret;
665
666 if (!extension_details)
667 return ERR_PTR(-ENOMEM);
668
669 tmp = kstrdup(name, GFP_KERNEL);
670 if (!tmp) {
671 kfree(extension_details);
672 return ERR_PTR(-ENOMEM);
673 }
674 qstr_init(&extension_details->name, tmp);
675 ret = insert_ext_gid_entry(&extension_details->name, extensions_value->num);
676
677 if (ret) {
678 kfree(extension_details->name.name);
679 kfree(extension_details);
680 return ERR_PTR(ret);
681 }
682 config_item_init_type_name(&extension_details->item, name, &extension_details_type);
683
684 return &extension_details->item;
314} 685}
315 686
316static struct config_item *sdcardfs_packages_make_item(struct config_group *group, const char *name) 687static struct configfs_group_operations extensions_value_group_ops = {
688 .make_item = extension_details_make_item,
689};
690
691static struct config_item_type extensions_name_type = {
692 .ct_group_ops = &extensions_value_group_ops,
693 .ct_owner = THIS_MODULE,
694};
695
696static struct config_group *extensions_make_group(struct config_group *group, const char *name)
317{ 697{
318 struct package_appid *package_appid; 698 struct extensions_value *extensions_value;
699 unsigned int tmp;
700 int ret;
319 701
320 package_appid = kzalloc(sizeof(struct package_appid), GFP_KERNEL); 702 extensions_value = kzalloc(sizeof(struct extensions_value), GFP_KERNEL);
321 if (!package_appid) 703 if (!extensions_value)
322 return ERR_PTR(-ENOMEM); 704 return ERR_PTR(-ENOMEM);
705 ret = kstrtouint(name, 10, &tmp);
706 if (ret) {
707 kfree(extensions_value);
708 return ERR_PTR(ret);
709 }
710
711 extensions_value->num = tmp;
712 config_group_init_type_name(&extensions_value->group, name,
713 &extensions_name_type);
714 return &extensions_value->group;
715}
716
717static void extensions_drop_group(struct config_group *group, struct config_item *item)
718{
719 struct extensions_value *value = to_extensions_value(item);
720
721 pr_info("sdcardfs: No longer mapping any files to gid %d\n", value->num);
722 kfree(value);
723}
724
725static struct configfs_group_operations extensions_group_ops = {
726 .make_group = extensions_make_group,
727 .drop_item = extensions_drop_group,
728};
323 729
324 config_item_init_type_name(&package_appid->item, name, 730static struct config_item_type extensions_type = {
325 &package_appid_type); 731 .ct_group_ops = &extensions_group_ops,
732 .ct_owner = THIS_MODULE,
733};
326 734
327 package_appid->add_pid = 0; 735struct config_group extension_group = {
736 .cg_item = {
737 .ci_namebuf = "extensions",
738 .ci_type = &extensions_type,
739 },
740};
328 741
329 return &package_appid->item; 742static struct config_item *packages_make_item(struct config_group *group, const char *name)
743{
744 struct package_details *package_details;
745 const char *tmp;
746
747 package_details = kzalloc(sizeof(struct package_details), GFP_KERNEL);
748 if (!package_details)
749 return ERR_PTR(-ENOMEM);
750 tmp = kstrdup(name, GFP_KERNEL);
751 if (!tmp) {
752 kfree(package_details);
753 return ERR_PTR(-ENOMEM);
754 }
755 qstr_init(&package_details->name, tmp);
756 config_item_init_type_name(&package_details->item, name,
757 &package_appid_type);
758
759 return &package_details->item;
330} 760}
331 761
332static ssize_t packages_attr_show(struct config_item *item, 762static ssize_t packages_list_show(struct config_item *item, char *page)
333 char *page)
334{ 763{
335 struct hashtable_entry *hash_cur; 764 struct hashtable_entry *hash_cur_app;
336 struct hlist_node *h_t; 765 struct hashtable_entry *hash_cur_user;
337 int i; 766 int i;
338 int count = 0, written = 0; 767 int count = 0, written = 0;
339 char errormsg[] = "<truncated>\n"; 768 const char errormsg[] = "<truncated>\n";
340 769 unsigned int hash;
341 mutex_lock(&pkgl_data_all->hashtable_lock); 770
342 hash_for_each_safe(pkgl_data_all->package_to_appid, i, h_t, hash_cur, hlist) { 771 rcu_read_lock();
343 written = scnprintf(page + count, PAGE_SIZE - sizeof(errormsg) - count, "%s %d\n", (char *)hash_cur->key, hash_cur->value); 772 hash_for_each_rcu(package_to_appid, i, hash_cur_app, hlist) {
344 if (count + written == PAGE_SIZE - sizeof(errormsg)) { 773 written = scnprintf(page + count, PAGE_SIZE - sizeof(errormsg) - count, "%s %d\n",
774 hash_cur_app->key.name, atomic_read(&hash_cur_app->value));
775 hash = hash_cur_app->key.hash;
776 hash_for_each_possible_rcu(package_to_userid, hash_cur_user, hlist, hash) {
777 if (qstr_case_eq(&hash_cur_app->key, &hash_cur_user->key)) {
778 written += scnprintf(page + count + written - 1,
779 PAGE_SIZE - sizeof(errormsg) - count - written + 1,
780 " %d\n", atomic_read(&hash_cur_user->value)) - 1;
781 }
782 }
783 if (count + written == PAGE_SIZE - sizeof(errormsg) - 1) {
345 count += scnprintf(page + count, PAGE_SIZE - count, errormsg); 784 count += scnprintf(page + count, PAGE_SIZE - count, errormsg);
346 break; 785 break;
347 } 786 }
348 count += written; 787 count += written;
349 } 788 }
350 mutex_unlock(&pkgl_data_all->hashtable_lock); 789 rcu_read_unlock();
351 790
352 return count; 791 return count;
353} 792}
354 793
355static struct configfs_attribute sdcardfs_packages_attr_description = { 794static ssize_t packages_remove_userid_store(struct config_item *item,
356 .ca_owner = THIS_MODULE, 795 const char *page, size_t count)
357 .ca_name = "packages_gid.list",
358 .ca_mode = S_IRUGO,
359 .show = packages_attr_show,
360};
361
362static struct configfs_attribute *sdcardfs_packages_attrs[] = {
363 &sdcardfs_packages_attr_description,
364 NULL,
365};
366
367static void sdcardfs_packages_release(struct config_item *item)
368{ 796{
797 unsigned int tmp;
798 int ret;
369 799
370 printk(KERN_INFO "sdcardfs: destroyed something?\n"); 800 ret = kstrtouint(page, 10, &tmp);
371 kfree(to_sdcardfs_packages(item)); 801 if (ret)
802 return ret;
803 remove_userid_all_entry(tmp);
804 return count;
372} 805}
373 806
374static struct configfs_item_operations sdcardfs_packages_item_ops = { 807static struct configfs_attribute packages_attr_packages_gid_list = {
375 .release = sdcardfs_packages_release, 808 .ca_name = "packages_gid.list",
809 .ca_mode = S_IRUGO,
810 .ca_owner = THIS_MODULE,
811 .show = packages_list_show,
812};
813
814SDCARDFS_CONFIGFS_ATTR_WO(packages_, remove_userid);
815
816static struct configfs_attribute *packages_attrs[] = {
817 &packages_attr_packages_gid_list,
818 &packages_attr_remove_userid,
819 NULL,
376}; 820};
377 821
378/* 822/*
379 * Note that, since no extra work is required on ->drop_item(), 823 * Note that, since no extra work is required on ->drop_item(),
380 * no ->drop_item() is provided. 824 * no ->drop_item() is provided.
381 */ 825 */
382static struct configfs_group_operations sdcardfs_packages_group_ops = { 826static struct configfs_group_operations packages_group_ops = {
383 .make_item = sdcardfs_packages_make_item, 827 .make_item = packages_make_item,
384}; 828};
385 829
386static struct config_item_type sdcardfs_packages_type = { 830static struct config_item_type packages_type = {
387 .ct_item_ops = &sdcardfs_packages_item_ops, 831 .ct_group_ops = &packages_group_ops,
388 .ct_group_ops = &sdcardfs_packages_group_ops, 832 .ct_attrs = packages_attrs,
389 .ct_attrs = sdcardfs_packages_attrs,
390 .ct_owner = THIS_MODULE, 833 .ct_owner = THIS_MODULE,
391}; 834};
392 835
393static struct configfs_subsystem sdcardfs_packages_subsys = { 836struct config_group *sd_default_groups[] = {
837 &extension_group,
838 NULL,
839};
840
841static struct configfs_subsystem sdcardfs_packages = {
394 .su_group = { 842 .su_group = {
395 .cg_item = { 843 .cg_item = {
396 .ci_namebuf = "sdcardfs", 844 .ci_namebuf = "sdcardfs",
397 .ci_type = &sdcardfs_packages_type, 845 .ci_type = &packages_type,
398 }, 846 },
847 .default_groups = sd_default_groups,
399 }, 848 },
400}; 849};
401 850
402static int configfs_sdcardfs_init(void) 851static int configfs_sdcardfs_init(void)
403{ 852{
404 int ret; 853 int ret, i;
405 struct configfs_subsystem *subsys = &sdcardfs_packages_subsys; 854 struct configfs_subsystem *subsys = &sdcardfs_packages;
406 855
856 for (i = 0; sd_default_groups[i]; i++)
857 config_group_init(sd_default_groups[i]);
407 config_group_init(&subsys->su_group); 858 config_group_init(&subsys->su_group);
408 mutex_init(&subsys->su_mutex); 859 mutex_init(&subsys->su_mutex);
409 ret = configfs_register_subsystem(subsys); 860 ret = configfs_register_subsystem(subsys);
410 if (ret) { 861 if (ret) {
411 printk(KERN_ERR "Error %d while registering subsystem %s\n", 862 pr_err("Error %d while registering subsystem %s\n",
412 ret, 863 ret,
413 subsys->su_group.cg_item.ci_namebuf); 864 subsys->su_group.cg_item.ci_namebuf);
414 } 865 }
@@ -417,7 +868,7 @@ static int configfs_sdcardfs_init(void)
417 868
418static void configfs_sdcardfs_exit(void) 869static void configfs_sdcardfs_exit(void)
419{ 870{
420 configfs_unregister_subsystem(&sdcardfs_packages_subsys); 871 configfs_unregister_subsystem(&sdcardfs_packages);
421} 872}
422 873
423int packagelist_init(void) 874int packagelist_init(void)
@@ -426,19 +877,17 @@ int packagelist_init(void)
426 kmem_cache_create("packagelist_hashtable_entry", 877 kmem_cache_create("packagelist_hashtable_entry",
427 sizeof(struct hashtable_entry), 0, 0, NULL); 878 sizeof(struct hashtable_entry), 0, 0, NULL);
428 if (!hashtable_entry_cachep) { 879 if (!hashtable_entry_cachep) {
429 printk(KERN_ERR "sdcardfs: failed creating pkgl_hashtable entry slab cache\n"); 880 pr_err("sdcardfs: failed creating pkgl_hashtable entry slab cache\n");
430 return -ENOMEM; 881 return -ENOMEM;
431 } 882 }
432 883
433 pkgl_data_all = packagelist_create();
434 configfs_sdcardfs_init(); 884 configfs_sdcardfs_init();
435 return 0; 885 return 0;
436} 886}
437 887
438void packagelist_exit(void) 888void packagelist_exit(void)
439{ 889{
440 configfs_sdcardfs_exit(); 890 configfs_sdcardfs_exit();
441 packagelist_destroy(pkgl_data_all); 891 packagelist_destroy();
442 if (hashtable_entry_cachep) 892 kmem_cache_destroy(hashtable_entry_cachep);
443 kmem_cache_destroy(hashtable_entry_cachep);
444} 893}
diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h
index f111f898b630..2b67b9a8ef9f 100644
--- a/fs/sdcardfs/sdcardfs.h
+++ b/fs/sdcardfs/sdcardfs.h
@@ -29,6 +29,7 @@
29#include <linux/dcache.h> 29#include <linux/dcache.h>
30#include <linux/file.h> 30#include <linux/file.h>
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/aio.h>
32#include <linux/mm.h> 33#include <linux/mm.h>
33#include <linux/mount.h> 34#include <linux/mount.h>
34#include <linux/namei.h> 35#include <linux/namei.h>
@@ -52,7 +53,7 @@
52#define SDCARDFS_ROOT_INO 1 53#define SDCARDFS_ROOT_INO 1
53 54
54/* useful for tracking code reachability */ 55/* useful for tracking code reachability */
55#define UDBG printk(KERN_DEFAULT "DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__) 56#define UDBG pr_default("DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__)
56 57
57#define SDCARDFS_DIRENT_SIZE 256 58#define SDCARDFS_DIRENT_SIZE 256
58 59
@@ -65,71 +66,87 @@
65#define AID_SDCARD_PICS 1033 /* external storage photos access */ 66#define AID_SDCARD_PICS 1033 /* external storage photos access */
66#define AID_SDCARD_AV 1034 /* external storage audio/video access */ 67#define AID_SDCARD_AV 1034 /* external storage audio/video access */
67#define AID_SDCARD_ALL 1035 /* access all users external storage */ 68#define AID_SDCARD_ALL 1035 /* access all users external storage */
69#define AID_MEDIA_OBB 1059 /* obb files */
70
71#define AID_SDCARD_IMAGE 1057
68 72
69#define AID_PACKAGE_INFO 1027 73#define AID_PACKAGE_INFO 1027
70 74
71#define fix_derived_permission(x) \ 75
76/*
77 * Permissions are handled by our permission function.
78 * We don't want anyone who happens to look at our inode value to prematurely
79 * block access, so store more permissive values. These are probably never
80 * used.
81 */
82#define fixup_tmp_permissions(x) \
72 do { \ 83 do { \
73 (x)->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(x)->d_uid); \ 84 (x)->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(x)->d_uid); \
74 (x)->i_gid = make_kgid(&init_user_ns, get_gid(SDCARDFS_I(x))); \ 85 (x)->i_gid = make_kgid(&init_user_ns, AID_SDCARD_RW); \
75 (x)->i_mode = ((x)->i_mode & S_IFMT) | get_mode(SDCARDFS_I(x));\ 86 (x)->i_mode = ((x)->i_mode & S_IFMT) | 0775;\
76 } while (0) 87 } while (0)
77 88
78
79/* OVERRIDE_CRED() and REVERT_CRED() 89/* OVERRIDE_CRED() and REVERT_CRED()
80 * OVERRID_CRED() 90 * OVERRIDE_CRED()
81 * backup original task->cred 91 * backup original task->cred
82 * and modifies task->cred->fsuid/fsgid to specified value. 92 * and modifies task->cred->fsuid/fsgid to specified value.
83 * REVERT_CRED() 93 * REVERT_CRED()
84 * restore original task->cred->fsuid/fsgid. 94 * restore original task->cred->fsuid/fsgid.
85 * These two macro should be used in pair, and OVERRIDE_CRED() should be 95 * These two macro should be used in pair, and OVERRIDE_CRED() should be
86 * placed at the beginning of a function, right after variable declaration. 96 * placed at the beginning of a function, right after variable declaration.
87 */ 97 */
88#define OVERRIDE_CRED(sdcardfs_sbi, saved_cred) \ 98#define OVERRIDE_CRED(sdcardfs_sbi, saved_cred, info) \
89 saved_cred = override_fsids(sdcardfs_sbi); \ 99 do { \
90 if (!saved_cred) { return -ENOMEM; } 100 saved_cred = override_fsids(sdcardfs_sbi, info); \
101 if (!saved_cred) \
102 return -ENOMEM; \
103 } while (0)
91 104
92#define OVERRIDE_CRED_PTR(sdcardfs_sbi, saved_cred) \ 105#define OVERRIDE_CRED_PTR(sdcardfs_sbi, saved_cred, info) \
93 saved_cred = override_fsids(sdcardfs_sbi); \ 106 do { \
94 if (!saved_cred) { return ERR_PTR(-ENOMEM); } 107 saved_cred = override_fsids(sdcardfs_sbi, info); \
108 if (!saved_cred) \
109 return ERR_PTR(-ENOMEM); \
110 } while (0)
95 111
96#define REVERT_CRED(saved_cred) revert_fsids(saved_cred) 112#define REVERT_CRED(saved_cred) revert_fsids(saved_cred)
97 113
98#define DEBUG_CRED() \
99 printk("KAKJAGI: %s:%d fsuid %d fsgid %d\n", \
100 __FUNCTION__, __LINE__, \
101 (int)current->cred->fsuid, \
102 (int)current->cred->fsgid);
103
104/* Android 5.0 support */ 114/* Android 5.0 support */
105 115
106/* Permission mode for a specific node. Controls how file permissions 116/* Permission mode for a specific node. Controls how file permissions
107 * are derived for children nodes. */ 117 * are derived for children nodes.
118 */
108typedef enum { 119typedef enum {
109 /* Nothing special; this node should just inherit from its parent. */ 120 /* Nothing special; this node should just inherit from its parent. */
110 PERM_INHERIT, 121 PERM_INHERIT,
111 /* This node is one level above a normal root; used for legacy layouts 122 /* This node is one level above a normal root; used for legacy layouts
112 * which use the first level to represent user_id. */ 123 * which use the first level to represent user_id.
113 PERM_PRE_ROOT, 124 */
114 /* This node is "/" */ 125 PERM_PRE_ROOT,
115 PERM_ROOT, 126 /* This node is "/" */
116 /* This node is "/Android" */ 127 PERM_ROOT,
117 PERM_ANDROID, 128 /* This node is "/Android" */
118 /* This node is "/Android/data" */ 129 PERM_ANDROID,
119 PERM_ANDROID_DATA, 130 /* This node is "/Android/data" */
120 /* This node is "/Android/obb" */ 131 PERM_ANDROID_DATA,
121 PERM_ANDROID_OBB, 132 /* This node is "/Android/obb" */
122 /* This node is "/Android/media" */ 133 PERM_ANDROID_OBB,
123 PERM_ANDROID_MEDIA, 134 /* This node is "/Android/media" */
135 PERM_ANDROID_MEDIA,
136 /* This node is "/Android/[data|media|obb]/[package]" */
137 PERM_ANDROID_PACKAGE,
138 /* This node is "/Android/[data|media|obb]/[package]/cache" */
139 PERM_ANDROID_PACKAGE_CACHE,
124} perm_t; 140} perm_t;
125 141
126struct sdcardfs_sb_info; 142struct sdcardfs_sb_info;
127struct sdcardfs_mount_options; 143struct sdcardfs_mount_options;
144struct sdcardfs_inode_info;
128 145
129/* Do not directly use this function. Use OVERRIDE_CRED() instead. */ 146/* Do not directly use this function. Use OVERRIDE_CRED() instead. */
130const struct cred * override_fsids(struct sdcardfs_sb_info* sbi); 147const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, struct sdcardfs_inode_info *info);
131/* Do not directly use this function, use REVERT_CRED() instead. */ 148/* Do not directly use this function, use REVERT_CRED() instead. */
132void revert_fsids(const struct cred * old_cred); 149void revert_fsids(const struct cred *old_cred);
133 150
134/* operations vectors defined in specific files */ 151/* operations vectors defined in specific files */
135extern const struct file_operations sdcardfs_main_fops; 152extern const struct file_operations sdcardfs_main_fops;
@@ -169,6 +186,10 @@ struct sdcardfs_inode_info {
169 userid_t userid; 186 userid_t userid;
170 uid_t d_uid; 187 uid_t d_uid;
171 bool under_android; 188 bool under_android;
189 bool under_cache;
190 bool under_obb;
191 /* top folder for ownership */
192 struct inode *top;
172 193
173 struct inode vfs_inode; 194 struct inode vfs_inode;
174}; 195};
@@ -185,18 +206,25 @@ struct sdcardfs_mount_options {
185 uid_t fs_low_uid; 206 uid_t fs_low_uid;
186 gid_t fs_low_gid; 207 gid_t fs_low_gid;
187 userid_t fs_user_id; 208 userid_t fs_user_id;
188 gid_t gid;
189 mode_t mask;
190 bool multiuser; 209 bool multiuser;
191 unsigned int reserved_mb; 210 unsigned int reserved_mb;
192}; 211};
193 212
213struct sdcardfs_vfsmount_options {
214 gid_t gid;
215 mode_t mask;
216};
217
218extern int parse_options_remount(struct super_block *sb, char *options, int silent,
219 struct sdcardfs_vfsmount_options *vfsopts);
220
194/* sdcardfs super-block data in memory */ 221/* sdcardfs super-block data in memory */
195struct sdcardfs_sb_info { 222struct sdcardfs_sb_info {
196 struct super_block *sb; 223 struct super_block *sb;
197 struct super_block *lower_sb; 224 struct super_block *lower_sb;
198 /* derived perm policy : some of options have been added 225 /* derived perm policy : some of options have been added
199 * to sdcardfs_mount_options (Android 4.4 support) */ 226 * to sdcardfs_mount_options (Android 4.4 support)
227 */
200 struct sdcardfs_mount_options options; 228 struct sdcardfs_mount_options options;
201 spinlock_t lock; /* protects obbpath */ 229 spinlock_t lock; /* protects obbpath */
202 char *obbpath_s; 230 char *obbpath_s;
@@ -307,7 +335,7 @@ static inline void sdcardfs_put_reset_##pname(const struct dentry *dent) \
307{ \ 335{ \
308 struct path pname; \ 336 struct path pname; \
309 spin_lock(&SDCARDFS_D(dent)->lock); \ 337 spin_lock(&SDCARDFS_D(dent)->lock); \
310 if(SDCARDFS_D(dent)->pname.dentry) { \ 338 if (SDCARDFS_D(dent)->pname.dentry) { \
311 pathcpy(&pname, &SDCARDFS_D(dent)->pname); \ 339 pathcpy(&pname, &SDCARDFS_D(dent)->pname); \
312 SDCARDFS_D(dent)->pname.dentry = NULL; \ 340 SDCARDFS_D(dent)->pname.dentry = NULL; \
313 SDCARDFS_D(dent)->pname.mnt = NULL; \ 341 SDCARDFS_D(dent)->pname.mnt = NULL; \
@@ -321,38 +349,78 @@ static inline void sdcardfs_put_reset_##pname(const struct dentry *dent) \
321SDCARDFS_DENT_FUNC(lower_path) 349SDCARDFS_DENT_FUNC(lower_path)
322SDCARDFS_DENT_FUNC(orig_path) 350SDCARDFS_DENT_FUNC(orig_path)
323 351
324static inline int get_gid(struct sdcardfs_inode_info *info) { 352static inline bool sbinfo_has_sdcard_magic(struct sdcardfs_sb_info *sbinfo)
325 struct sdcardfs_sb_info *sb_info = SDCARDFS_SB(info->vfs_inode.i_sb); 353{
326 if (sb_info->options.gid == AID_SDCARD_RW) { 354 return sbinfo && sbinfo->sb && sbinfo->sb->s_magic == SDCARDFS_SUPER_MAGIC;
355}
356
357/* grab a refererence if we aren't linking to ourself */
358static inline void set_top(struct sdcardfs_inode_info *info, struct inode *top)
359{
360 struct inode *old_top = NULL;
361
362 BUG_ON(IS_ERR_OR_NULL(top));
363 if (info->top && info->top != &info->vfs_inode)
364 old_top = info->top;
365 if (top != &info->vfs_inode)
366 igrab(top);
367 info->top = top;
368 iput(old_top);
369}
370
371static inline struct inode *grab_top(struct sdcardfs_inode_info *info)
372{
373 struct inode *top = info->top;
374
375 if (top)
376 return igrab(top);
377 else
378 return NULL;
379}
380
381static inline void release_top(struct sdcardfs_inode_info *info)
382{
383 iput(info->top);
384}
385
386static inline int get_gid(struct vfsmount *mnt, struct sdcardfs_inode_info *info)
387{
388 struct sdcardfs_vfsmount_options *opts = mnt->data;
389
390 if (opts->gid == AID_SDCARD_RW)
327 /* As an optimization, certain trusted system components only run 391 /* As an optimization, certain trusted system components only run
328 * as owner but operate across all users. Since we're now handing 392 * as owner but operate across all users. Since we're now handing
329 * out the sdcard_rw GID only to trusted apps, we're okay relaxing 393 * out the sdcard_rw GID only to trusted apps, we're okay relaxing
330 * the user boundary enforcement for the default view. The UIDs 394 * the user boundary enforcement for the default view. The UIDs
331 * assigned to app directories are still multiuser aware. */ 395 * assigned to app directories are still multiuser aware.
396 */
332 return AID_SDCARD_RW; 397 return AID_SDCARD_RW;
333 } else { 398 else
334 return multiuser_get_uid(info->userid, sb_info->options.gid); 399 return multiuser_get_uid(info->userid, opts->gid);
335 }
336} 400}
337static inline int get_mode(struct sdcardfs_inode_info *info) { 401
402static inline int get_mode(struct vfsmount *mnt, struct sdcardfs_inode_info *info)
403{
338 int owner_mode; 404 int owner_mode;
339 int filtered_mode; 405 int filtered_mode;
340 struct sdcardfs_sb_info *sb_info = SDCARDFS_SB(info->vfs_inode.i_sb); 406 struct sdcardfs_vfsmount_options *opts = mnt->data;
341 int visible_mode = 0775 & ~sb_info->options.mask; 407 int visible_mode = 0775 & ~opts->mask;
408
342 409
343 if (info->perm == PERM_PRE_ROOT) { 410 if (info->perm == PERM_PRE_ROOT) {
344 /* Top of multi-user view should always be visible to ensure 411 /* Top of multi-user view should always be visible to ensure
345 * secondary users can traverse inside. */ 412 * secondary users can traverse inside.
413 */
346 visible_mode = 0711; 414 visible_mode = 0711;
347 } else if (info->under_android) { 415 } else if (info->under_android) {
348 /* Block "other" access to Android directories, since only apps 416 /* Block "other" access to Android directories, since only apps
349 * belonging to a specific user should be in there; we still 417 * belonging to a specific user should be in there; we still
350 * leave +x open for the default view. */ 418 * leave +x open for the default view.
351 if (sb_info->options.gid == AID_SDCARD_RW) { 419 */
420 if (opts->gid == AID_SDCARD_RW)
352 visible_mode = visible_mode & ~0006; 421 visible_mode = visible_mode & ~0006;
353 } else { 422 else
354 visible_mode = visible_mode & ~0007; 423 visible_mode = visible_mode & ~0007;
355 }
356 } 424 }
357 owner_mode = info->lower_inode->i_mode & 0700; 425 owner_mode = info->lower_inode->i_mode & 0700;
358 filtered_mode = visible_mode & (owner_mode | (owner_mode >> 3) | (owner_mode >> 6)); 426 filtered_mode = visible_mode & (owner_mode | (owner_mode >> 3) | (owner_mode >> 6));
@@ -377,7 +445,7 @@ static inline void sdcardfs_get_real_lower(const struct dentry *dent,
377 /* in case of a local obb dentry 445 /* in case of a local obb dentry
378 * the orig_path should be returned 446 * the orig_path should be returned
379 */ 447 */
380 if(has_graft_path(dent)) 448 if (has_graft_path(dent))
381 sdcardfs_get_orig_path(dent, real_lower); 449 sdcardfs_get_orig_path(dent, real_lower);
382 else 450 else
383 sdcardfs_get_lower_path(dent, real_lower); 451 sdcardfs_get_lower_path(dent, real_lower);
@@ -386,7 +454,7 @@ static inline void sdcardfs_get_real_lower(const struct dentry *dent,
386static inline void sdcardfs_put_real_lower(const struct dentry *dent, 454static inline void sdcardfs_put_real_lower(const struct dentry *dent,
387 struct path *real_lower) 455 struct path *real_lower)
388{ 456{
389 if(has_graft_path(dent)) 457 if (has_graft_path(dent))
390 sdcardfs_put_orig_path(dent, real_lower); 458 sdcardfs_put_orig_path(dent, real_lower);
391 else 459 else
392 sdcardfs_put_lower_path(dent, real_lower); 460 sdcardfs_put_lower_path(dent, real_lower);
@@ -396,20 +464,31 @@ extern struct mutex sdcardfs_super_list_lock;
396extern struct list_head sdcardfs_super_list; 464extern struct list_head sdcardfs_super_list;
397 465
398/* for packagelist.c */ 466/* for packagelist.c */
399extern appid_t get_appid(void *pkgl_id, const char *app_name); 467extern appid_t get_appid(const char *app_name);
400extern int check_caller_access_to_name(struct inode *parent_node, const char* name); 468extern appid_t get_ext_gid(const char *app_name);
469extern appid_t is_excluded(const char *app_name, userid_t userid);
470extern int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name);
401extern int open_flags_to_access_mode(int open_flags); 471extern int open_flags_to_access_mode(int open_flags);
402extern int packagelist_init(void); 472extern int packagelist_init(void);
403extern void packagelist_exit(void); 473extern void packagelist_exit(void);
404 474
405/* for derived_perm.c */ 475/* for derived_perm.c */
406extern void setup_derived_state(struct inode *inode, perm_t perm, 476#define BY_NAME (1 << 0)
407 userid_t userid, uid_t uid, bool under_android); 477#define BY_USERID (1 << 1)
478struct limit_search {
479 unsigned int flags;
480 struct qstr name;
481 userid_t userid;
482};
483
484extern void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid,
485 uid_t uid, bool under_android, struct inode *top);
408extern void get_derived_permission(struct dentry *parent, struct dentry *dentry); 486extern void get_derived_permission(struct dentry *parent, struct dentry *dentry);
409extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, struct dentry *newdentry); 487extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, const struct qstr *name);
410extern void get_derive_permissions_recursive(struct dentry *parent); 488extern void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit);
411 489
412extern void update_derived_permission_lock(struct dentry *dentry); 490extern void update_derived_permission_lock(struct dentry *dentry);
491void fixup_lower_ownership(struct dentry *dentry, const char *name);
413extern int need_graft_path(struct dentry *dentry); 492extern int need_graft_path(struct dentry *dentry);
414extern int is_base_obbpath(struct dentry *dentry); 493extern int is_base_obbpath(struct dentry *dentry);
415extern int is_obbpath_invalid(struct dentry *dentry); 494extern int is_obbpath_invalid(struct dentry *dentry);
@@ -419,6 +498,7 @@ extern int setup_obb_dentry(struct dentry *dentry, struct path *lower_path);
419static inline struct dentry *lock_parent(struct dentry *dentry) 498static inline struct dentry *lock_parent(struct dentry *dentry)
420{ 499{
421 struct dentry *dir = dget_parent(dentry); 500 struct dentry *dir = dget_parent(dentry);
501
422 mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); 502 mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
423 return dir; 503 return dir;
424} 504}
@@ -444,7 +524,7 @@ static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t m
444 goto out_unlock; 524 goto out_unlock;
445 } 525 }
446 526
447 err = vfs_mkdir(d_inode(parent.dentry), dent, mode); 527 err = vfs_mkdir2(parent.mnt, d_inode(parent.dentry), dent, mode);
448 if (err) { 528 if (err) {
449 if (err == -EEXIST) 529 if (err == -EEXIST)
450 err = 0; 530 err = 0;
@@ -455,7 +535,7 @@ static inline int prepare_dir(const char *path_s, uid_t uid, gid_t gid, mode_t m
455 attrs.ia_gid = make_kgid(&init_user_ns, gid); 535 attrs.ia_gid = make_kgid(&init_user_ns, gid);
456 attrs.ia_valid = ATTR_UID | ATTR_GID; 536 attrs.ia_valid = ATTR_UID | ATTR_GID;
457 mutex_lock(&d_inode(dent)->i_mutex); 537 mutex_lock(&d_inode(dent)->i_mutex);
458 notify_change(dent, &attrs, NULL); 538 notify_change2(parent.mnt, dent, &attrs, NULL);
459 mutex_unlock(&d_inode(dent)->i_mutex); 539 mutex_unlock(&d_inode(dent)->i_mutex);
460 540
461out_dput: 541out_dput:
@@ -513,12 +593,16 @@ static inline int check_min_free_space(struct dentry *dentry, size_t size, int d
513 return 1; 593 return 1;
514} 594}
515 595
516/* Copies attrs and maintains sdcardfs managed attrs */ 596/*
597 * Copies attrs and maintains sdcardfs managed attrs
598 * Since our permission check handles all special permissions, set those to be open
599 */
517static inline void sdcardfs_copy_and_fix_attrs(struct inode *dest, const struct inode *src) 600static inline void sdcardfs_copy_and_fix_attrs(struct inode *dest, const struct inode *src)
518{ 601{
519 dest->i_mode = (src->i_mode & S_IFMT) | get_mode(SDCARDFS_I(dest)); 602 dest->i_mode = (src->i_mode & S_IFMT) | S_IRWXU | S_IRWXG |
603 S_IROTH | S_IXOTH; /* 0775 */
520 dest->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(dest)->d_uid); 604 dest->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(dest)->d_uid);
521 dest->i_gid = make_kgid(&init_user_ns, get_gid(SDCARDFS_I(dest))); 605 dest->i_gid = make_kgid(&init_user_ns, AID_SDCARD_RW);
522 dest->i_rdev = src->i_rdev; 606 dest->i_rdev = src->i_rdev;
523 dest->i_atime = src->i_atime; 607 dest->i_atime = src->i_atime;
524 dest->i_mtime = src->i_mtime; 608 dest->i_mtime = src->i_mtime;
@@ -527,4 +611,22 @@ static inline void sdcardfs_copy_and_fix_attrs(struct inode *dest, const struct
527 dest->i_flags = src->i_flags; 611 dest->i_flags = src->i_flags;
528 set_nlink(dest, src->i_nlink); 612 set_nlink(dest, src->i_nlink);
529} 613}
614
615static inline bool str_case_eq(const char *s1, const char *s2)
616{
617 return !strcasecmp(s1, s2);
618}
619
620static inline bool str_n_case_eq(const char *s1, const char *s2, size_t len)
621{
622 return !strncasecmp(s1, s2, len);
623}
624
625static inline bool qstr_case_eq(const struct qstr *q1, const struct qstr *q2)
626{
627 return q1->len == q2->len && str_case_eq(q1->name, q2->name);
628}
629
630#define QSTR_LITERAL(string) QSTR_INIT(string, sizeof(string)-1)
631
530#endif /* not _SDCARDFS_H_ */ 632#endif /* not _SDCARDFS_H_ */
diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c
index 1d6490128c99..a3393e959c63 100644
--- a/fs/sdcardfs/super.c
+++ b/fs/sdcardfs/super.c
@@ -36,7 +36,7 @@ static void sdcardfs_put_super(struct super_block *sb)
36 if (!spd) 36 if (!spd)
37 return; 37 return;
38 38
39 if(spd->obbpath_s) { 39 if (spd->obbpath_s) {
40 kfree(spd->obbpath_s); 40 kfree(spd->obbpath_s);
41 path_put(&spd->obbpath); 41 path_put(&spd->obbpath);
42 } 42 }
@@ -64,7 +64,7 @@ static int sdcardfs_statfs(struct dentry *dentry, struct kstatfs *buf)
64 if (sbi->options.reserved_mb) { 64 if (sbi->options.reserved_mb) {
65 /* Invalid statfs informations. */ 65 /* Invalid statfs informations. */
66 if (buf->f_bsize == 0) { 66 if (buf->f_bsize == 0) {
67 printk(KERN_ERR "Returned block size is zero.\n"); 67 pr_err("Returned block size is zero.\n");
68 return -EINVAL; 68 return -EINVAL;
69 } 69 }
70 70
@@ -100,8 +100,7 @@ static int sdcardfs_remount_fs(struct super_block *sb, int *flags, char *options
100 * SILENT, but anything else left over is an error. 100 * SILENT, but anything else left over is an error.
101 */ 101 */
102 if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT)) != 0) { 102 if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT)) != 0) {
103 printk(KERN_ERR 103 pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags);
104 "sdcardfs: remount flags 0x%x unsupported\n", *flags);
105 err = -EINVAL; 104 err = -EINVAL;
106 } 105 }
107 106
@@ -109,6 +108,54 @@ static int sdcardfs_remount_fs(struct super_block *sb, int *flags, char *options
109} 108}
110 109
111/* 110/*
111 * @mnt: mount point we are remounting
112 * @sb: superblock we are remounting
113 * @flags: numeric mount options
114 * @options: mount options string
115 */
116static int sdcardfs_remount_fs2(struct vfsmount *mnt, struct super_block *sb,
117 int *flags, char *options)
118{
119 int err = 0;
120
121 /*
122 * The VFS will take care of "ro" and "rw" flags among others. We
123 * can safely accept a few flags (RDONLY, MANDLOCK), and honor
124 * SILENT, but anything else left over is an error.
125 */
126 if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT | MS_REMOUNT)) != 0) {
127 pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags);
128 err = -EINVAL;
129 }
130 pr_info("Remount options were %s for vfsmnt %p.\n", options, mnt);
131 err = parse_options_remount(sb, options, *flags & ~MS_SILENT, mnt->data);
132
133
134 return err;
135}
136
137static void *sdcardfs_clone_mnt_data(void *data)
138{
139 struct sdcardfs_vfsmount_options *opt = kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL);
140 struct sdcardfs_vfsmount_options *old = data;
141
142 if (!opt)
143 return NULL;
144 opt->gid = old->gid;
145 opt->mask = old->mask;
146 return opt;
147}
148
149static void sdcardfs_copy_mnt_data(void *data, void *newdata)
150{
151 struct sdcardfs_vfsmount_options *old = data;
152 struct sdcardfs_vfsmount_options *new = newdata;
153
154 old->gid = new->gid;
155 old->mask = new->mask;
156}
157
158/*
112 * Called by iput() when the inode reference count reached zero 159 * Called by iput() when the inode reference count reached zero
113 * and the inode is not hashed anywhere. Used to clear anything 160 * and the inode is not hashed anywhere. Used to clear anything
114 * that needs to be, before the inode is completely destroyed and put 161 * that needs to be, before the inode is completely destroyed and put
@@ -126,6 +173,7 @@ static void sdcardfs_evict_inode(struct inode *inode)
126 */ 173 */
127 lower_inode = sdcardfs_lower_inode(inode); 174 lower_inode = sdcardfs_lower_inode(inode);
128 sdcardfs_set_lower_inode(inode, NULL); 175 sdcardfs_set_lower_inode(inode, NULL);
176 set_top(SDCARDFS_I(inode), inode);
129 iput(lower_inode); 177 iput(lower_inode);
130} 178}
131 179
@@ -173,8 +221,7 @@ int sdcardfs_init_inode_cache(void)
173/* sdcardfs inode cache destructor */ 221/* sdcardfs inode cache destructor */
174void sdcardfs_destroy_inode_cache(void) 222void sdcardfs_destroy_inode_cache(void)
175{ 223{
176 if (sdcardfs_inode_cachep) 224 kmem_cache_destroy(sdcardfs_inode_cachep);
177 kmem_cache_destroy(sdcardfs_inode_cachep);
178} 225}
179 226
180/* 227/*
@@ -190,19 +237,25 @@ static void sdcardfs_umount_begin(struct super_block *sb)
190 lower_sb->s_op->umount_begin(lower_sb); 237 lower_sb->s_op->umount_begin(lower_sb);
191} 238}
192 239
193static int sdcardfs_show_options(struct seq_file *m, struct dentry *root) 240static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m,
241 struct dentry *root)
194{ 242{
195 struct sdcardfs_sb_info *sbi = SDCARDFS_SB(root->d_sb); 243 struct sdcardfs_sb_info *sbi = SDCARDFS_SB(root->d_sb);
196 struct sdcardfs_mount_options *opts = &sbi->options; 244 struct sdcardfs_mount_options *opts = &sbi->options;
245 struct sdcardfs_vfsmount_options *vfsopts = mnt->data;
197 246
198 if (opts->fs_low_uid != 0) 247 if (opts->fs_low_uid != 0)
199 seq_printf(m, ",uid=%u", opts->fs_low_uid); 248 seq_printf(m, ",fsuid=%u", opts->fs_low_uid);
200 if (opts->fs_low_gid != 0) 249 if (opts->fs_low_gid != 0)
201 seq_printf(m, ",gid=%u", opts->fs_low_gid); 250 seq_printf(m, ",fsgid=%u", opts->fs_low_gid);
202 251 if (vfsopts->gid != 0)
252 seq_printf(m, ",gid=%u", vfsopts->gid);
203 if (opts->multiuser) 253 if (opts->multiuser)
204 seq_printf(m, ",multiuser"); 254 seq_puts(m, ",multiuser");
205 255 if (vfsopts->mask)
256 seq_printf(m, ",mask=%u", vfsopts->mask);
257 if (opts->fs_user_id)
258 seq_printf(m, ",userid=%u", opts->fs_user_id);
206 if (opts->reserved_mb != 0) 259 if (opts->reserved_mb != 0)
207 seq_printf(m, ",reserved=%uMB", opts->reserved_mb); 260 seq_printf(m, ",reserved=%uMB", opts->reserved_mb);
208 261
@@ -213,9 +266,12 @@ const struct super_operations sdcardfs_sops = {
213 .put_super = sdcardfs_put_super, 266 .put_super = sdcardfs_put_super,
214 .statfs = sdcardfs_statfs, 267 .statfs = sdcardfs_statfs,
215 .remount_fs = sdcardfs_remount_fs, 268 .remount_fs = sdcardfs_remount_fs,
269 .remount_fs2 = sdcardfs_remount_fs2,
270 .clone_mnt_data = sdcardfs_clone_mnt_data,
271 .copy_mnt_data = sdcardfs_copy_mnt_data,
216 .evict_inode = sdcardfs_evict_inode, 272 .evict_inode = sdcardfs_evict_inode,
217 .umount_begin = sdcardfs_umount_begin, 273 .umount_begin = sdcardfs_umount_begin,
218 .show_options = sdcardfs_show_options, 274 .show_options2 = sdcardfs_show_options,
219 .alloc_inode = sdcardfs_alloc_inode, 275 .alloc_inode = sdcardfs_alloc_inode,
220 .destroy_inode = sdcardfs_destroy_inode, 276 .destroy_inode = sdcardfs_destroy_inode,
221 .drop_inode = generic_delete_inode, 277 .drop_inode = generic_delete_inode,
diff --git a/fs/seq_file.c b/fs/seq_file.c
index d672e2fec459..6dc4296eed62 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -72,9 +72,10 @@ int seq_open(struct file *file, const struct seq_operations *op)
72 72
73 mutex_init(&p->lock); 73 mutex_init(&p->lock);
74 p->op = op; 74 p->op = op;
75#ifdef CONFIG_USER_NS 75
76 p->user_ns = file->f_cred->user_ns; 76 // No refcounting: the lifetime of 'p' is constrained
77#endif 77 // to the lifetime of the file.
78 p->file = file;
78 79
79 /* 80 /*
80 * Wrappers around seq_open(e.g. swaps_open) need to be 81 * Wrappers around seq_open(e.g. swaps_open) need to be
diff --git a/fs/splice.c b/fs/splice.c
index 0f77e9682857..8398974e1538 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -211,6 +211,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
211 buf->len = spd->partial[page_nr].len; 211 buf->len = spd->partial[page_nr].len;
212 buf->private = spd->partial[page_nr].private; 212 buf->private = spd->partial[page_nr].private;
213 buf->ops = spd->ops; 213 buf->ops = spd->ops;
214 buf->flags = 0;
214 if (spd->flags & SPLICE_F_GIFT) 215 if (spd->flags & SPLICE_F_GIFT)
215 buf->flags |= PIPE_BUF_FLAG_GIFT; 216 buf->flags |= PIPE_BUF_FLAG_GIFT;
216 217
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index ffb093e72b6c..6dd158a216f4 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -26,34 +26,6 @@ config SQUASHFS
26 If unsure, say N. 26 If unsure, say N.
27 27
28choice 28choice
29 prompt "File decompression options"
30 depends on SQUASHFS
31 help
32 Squashfs now supports two options for decompressing file
33 data. Traditionally Squashfs has decompressed into an
34 intermediate buffer and then memcopied it into the page cache.
35 Squashfs now supports the ability to decompress directly into
36 the page cache.
37
38 If unsure, select "Decompress file data into an intermediate buffer"
39
40config SQUASHFS_FILE_CACHE
41 bool "Decompress file data into an intermediate buffer"
42 help
43 Decompress file data into an intermediate buffer and then
44 memcopy it into the page cache.
45
46config SQUASHFS_FILE_DIRECT
47 bool "Decompress files directly into the page cache"
48 help
49 Directly decompress file data into the page cache.
50 Doing so can significantly improve performance because
51 it eliminates a memcpy and it also removes the lock contention
52 on the single buffer.
53
54endchoice
55
56choice
57 prompt "Decompressor parallelisation options" 29 prompt "Decompressor parallelisation options"
58 depends on SQUASHFS 30 depends on SQUASHFS
59 help 31 help
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 246a6f329d89..fe51f1507ed1 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -5,8 +5,7 @@
5obj-$(CONFIG_SQUASHFS) += squashfs.o 5obj-$(CONFIG_SQUASHFS) += squashfs.o
6squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o 6squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
7squashfs-y += namei.o super.o symlink.o decompressor.o 7squashfs-y += namei.o super.o symlink.o decompressor.o
8squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o 8squashfs-y += file_direct.o page_actor.o
9squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o
10squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o 9squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
11squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o 10squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
12squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o 11squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 0cea9b9236d0..2eb66decc5ab 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -28,9 +28,12 @@
28 28
29#include <linux/fs.h> 29#include <linux/fs.h>
30#include <linux/vfs.h> 30#include <linux/vfs.h>
31#include <linux/bio.h>
31#include <linux/slab.h> 32#include <linux/slab.h>
32#include <linux/string.h> 33#include <linux/string.h>
34#include <linux/pagemap.h>
33#include <linux/buffer_head.h> 35#include <linux/buffer_head.h>
36#include <linux/workqueue.h>
34 37
35#include "squashfs_fs.h" 38#include "squashfs_fs.h"
36#include "squashfs_fs_sb.h" 39#include "squashfs_fs_sb.h"
@@ -38,177 +41,434 @@
38#include "decompressor.h" 41#include "decompressor.h"
39#include "page_actor.h" 42#include "page_actor.h"
40 43
41/* 44static struct workqueue_struct *squashfs_read_wq;
42 * Read the metadata block length, this is stored in the first two 45
43 * bytes of the metadata block. 46struct squashfs_read_request {
44 */ 47 struct super_block *sb;
45static struct buffer_head *get_block_length(struct super_block *sb, 48 u64 index;
46 u64 *cur_index, int *offset, int *length) 49 int length;
50 int compressed;
51 int offset;
52 u64 read_end;
53 struct squashfs_page_actor *output;
54 enum {
55 SQUASHFS_COPY,
56 SQUASHFS_DECOMPRESS,
57 SQUASHFS_METADATA,
58 } data_processing;
59 bool synchronous;
60
61 /*
62 * If the read is synchronous, it is possible to retrieve information
63 * about the request by setting these pointers.
64 */
65 int *res;
66 int *bytes_read;
67 int *bytes_uncompressed;
68
69 int nr_buffers;
70 struct buffer_head **bh;
71 struct work_struct offload;
72};
73
74struct squashfs_bio_request {
75 struct buffer_head **bh;
76 int nr_buffers;
77};
78
79static int squashfs_bio_submit(struct squashfs_read_request *req);
80
81int squashfs_init_read_wq(void)
47{ 82{
48 struct squashfs_sb_info *msblk = sb->s_fs_info; 83 squashfs_read_wq = create_workqueue("SquashFS read wq");
49 struct buffer_head *bh; 84 return !!squashfs_read_wq;
85}
86
87void squashfs_destroy_read_wq(void)
88{
89 flush_workqueue(squashfs_read_wq);
90 destroy_workqueue(squashfs_read_wq);
91}
92
93static void free_read_request(struct squashfs_read_request *req, int error)
94{
95 if (!req->synchronous)
96 squashfs_page_actor_free(req->output, error);
97 if (req->res)
98 *(req->res) = error;
99 kfree(req->bh);
100 kfree(req);
101}
102
103static void squashfs_process_blocks(struct squashfs_read_request *req)
104{
105 int error = 0;
106 int bytes, i, length;
107 struct squashfs_sb_info *msblk = req->sb->s_fs_info;
108 struct squashfs_page_actor *actor = req->output;
109 struct buffer_head **bh = req->bh;
110 int nr_buffers = req->nr_buffers;
111
112 for (i = 0; i < nr_buffers; ++i) {
113 if (!bh[i])
114 continue;
115 wait_on_buffer(bh[i]);
116 if (!buffer_uptodate(bh[i]))
117 error = -EIO;
118 }
119 if (error)
120 goto cleanup;
121
122 if (req->data_processing == SQUASHFS_METADATA) {
123 /* Extract the length of the metadata block */
124 if (req->offset != msblk->devblksize - 1)
125 length = *((u16 *)(bh[0]->b_data + req->offset));
126 else {
127 length = bh[0]->b_data[req->offset];
128 length |= bh[1]->b_data[0] << 8;
129 }
130 req->compressed = SQUASHFS_COMPRESSED(length);
131 req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS
132 : SQUASHFS_COPY;
133 length = SQUASHFS_COMPRESSED_SIZE(length);
134 if (req->index + length + 2 > req->read_end) {
135 for (i = 0; i < nr_buffers; ++i)
136 put_bh(bh[i]);
137 kfree(bh);
138 req->length = length;
139 req->index += 2;
140 squashfs_bio_submit(req);
141 return;
142 }
143 req->length = length;
144 req->offset = (req->offset + 2) % PAGE_SIZE;
145 if (req->offset < 2) {
146 put_bh(bh[0]);
147 ++bh;
148 --nr_buffers;
149 }
150 }
151 if (req->bytes_read)
152 *(req->bytes_read) = req->length;
50 153
51 bh = sb_bread(sb, *cur_index); 154 if (req->data_processing == SQUASHFS_COPY) {
52 if (bh == NULL) 155 squashfs_bh_to_actor(bh, nr_buffers, req->output, req->offset,
53 return NULL; 156 req->length, msblk->devblksize);
54 157 } else if (req->data_processing == SQUASHFS_DECOMPRESS) {
55 if (msblk->devblksize - *offset == 1) { 158 req->length = squashfs_decompress(msblk, bh, nr_buffers,
56 *length = (unsigned char) bh->b_data[*offset]; 159 req->offset, req->length, actor);
57 put_bh(bh); 160 if (req->length < 0) {
58 bh = sb_bread(sb, ++(*cur_index)); 161 error = -EIO;
59 if (bh == NULL) 162 goto cleanup;
60 return NULL;
61 *length |= (unsigned char) bh->b_data[0] << 8;
62 *offset = 1;
63 } else {
64 *length = (unsigned char) bh->b_data[*offset] |
65 (unsigned char) bh->b_data[*offset + 1] << 8;
66 *offset += 2;
67
68 if (*offset == msblk->devblksize) {
69 put_bh(bh);
70 bh = sb_bread(sb, ++(*cur_index));
71 if (bh == NULL)
72 return NULL;
73 *offset = 0;
74 } 163 }
75 } 164 }
76 165
77 return bh; 166 /* Last page may have trailing bytes not filled */
167 bytes = req->length % PAGE_SIZE;
168 if (bytes && actor->page[actor->pages - 1])
169 zero_user_segment(actor->page[actor->pages - 1], bytes,
170 PAGE_SIZE);
171
172cleanup:
173 if (req->bytes_uncompressed)
174 *(req->bytes_uncompressed) = req->length;
175 if (error) {
176 for (i = 0; i < nr_buffers; ++i)
177 if (bh[i])
178 put_bh(bh[i]);
179 }
180 free_read_request(req, error);
78} 181}
79 182
183static void read_wq_handler(struct work_struct *work)
184{
185 squashfs_process_blocks(container_of(work,
186 struct squashfs_read_request, offload));
187}
80 188
81/* 189static void squashfs_bio_end_io(struct bio *bio)
82 * Read and decompress a metadata block or datablock. Length is non-zero
83 * if a datablock is being read (the size is stored elsewhere in the
84 * filesystem), otherwise the length is obtained from the first two bytes of
85 * the metadata block. A bit in the length field indicates if the block
86 * is stored uncompressed in the filesystem (usually because compression
87 * generated a larger block - this does occasionally happen with compression
88 * algorithms).
89 */
90int squashfs_read_data(struct super_block *sb, u64 index, int length,
91 u64 *next_index, struct squashfs_page_actor *output)
92{ 190{
93 struct squashfs_sb_info *msblk = sb->s_fs_info; 191 int i;
94 struct buffer_head **bh; 192 int error = bio->bi_error;
95 int offset = index & ((1 << msblk->devblksize_log2) - 1); 193 struct squashfs_bio_request *bio_req = bio->bi_private;
96 u64 cur_index = index >> msblk->devblksize_log2; 194
97 int bytes, compressed, b = 0, k = 0, avail, i; 195 bio_put(bio);
196
197 for (i = 0; i < bio_req->nr_buffers; ++i) {
198 if (!bio_req->bh[i])
199 continue;
200 if (!error)
201 set_buffer_uptodate(bio_req->bh[i]);
202 else
203 clear_buffer_uptodate(bio_req->bh[i]);
204 unlock_buffer(bio_req->bh[i]);
205 }
206 kfree(bio_req);
207}
208
209static int bh_is_optional(struct squashfs_read_request *req, int idx)
210{
211 int start_idx, end_idx;
212 struct squashfs_sb_info *msblk = req->sb->s_fs_info;
98 213
99 bh = kcalloc(((output->length + msblk->devblksize - 1) 214 start_idx = (idx * msblk->devblksize - req->offset) / PAGE_CACHE_SIZE;
100 >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); 215 end_idx = ((idx + 1) * msblk->devblksize - req->offset + 1) / PAGE_CACHE_SIZE;
101 if (bh == NULL) 216 if (start_idx >= req->output->pages)
217 return 1;
218 if (start_idx < 0)
219 start_idx = end_idx;
220 if (end_idx >= req->output->pages)
221 end_idx = start_idx;
222 return !req->output->page[start_idx] && !req->output->page[end_idx];
223}
224
225static int actor_getblks(struct squashfs_read_request *req, u64 block)
226{
227 int i;
228
229 req->bh = kmalloc_array(req->nr_buffers, sizeof(*(req->bh)), GFP_NOIO);
230 if (!req->bh)
102 return -ENOMEM; 231 return -ENOMEM;
103 232
104 if (length) { 233 for (i = 0; i < req->nr_buffers; ++i) {
105 /* 234 /*
106 * Datablock. 235 * When dealing with an uncompressed block, the actor may
236 * contains NULL pages. There's no need to read the buffers
237 * associated with these pages.
107 */ 238 */
108 bytes = -offset; 239 if (!req->compressed && bh_is_optional(req, i)) {
109 compressed = SQUASHFS_COMPRESSED_BLOCK(length); 240 req->bh[i] = NULL;
110 length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length); 241 continue;
111 if (next_index)
112 *next_index = index + length;
113
114 TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
115 index, compressed ? "" : "un", length, output->length);
116
117 if (length < 0 || length > output->length ||
118 (index + length) > msblk->bytes_used)
119 goto read_failure;
120
121 for (b = 0; bytes < length; b++, cur_index++) {
122 bh[b] = sb_getblk(sb, cur_index);
123 if (bh[b] == NULL)
124 goto block_release;
125 bytes += msblk->devblksize;
126 } 242 }
127 ll_rw_block(READ, b, bh); 243 req->bh[i] = sb_getblk(req->sb, block + i);
128 } else { 244 if (!req->bh[i]) {
129 /* 245 while (--i) {
130 * Metadata block. 246 if (req->bh[i])
131 */ 247 put_bh(req->bh[i]);
132 if ((index + 2) > msblk->bytes_used) 248 }
133 goto read_failure; 249 return -1;
250 }
251 }
252 return 0;
253}
134 254
135 bh[0] = get_block_length(sb, &cur_index, &offset, &length); 255static int squashfs_bio_submit(struct squashfs_read_request *req)
136 if (bh[0] == NULL) 256{
137 goto read_failure; 257 struct bio *bio = NULL;
138 b = 1; 258 struct buffer_head *bh;
259 struct squashfs_bio_request *bio_req = NULL;
260 int b = 0, prev_block = 0;
261 struct squashfs_sb_info *msblk = req->sb->s_fs_info;
139 262
140 bytes = msblk->devblksize - offset; 263 u64 read_start = round_down(req->index, msblk->devblksize);
141 compressed = SQUASHFS_COMPRESSED(length); 264 u64 read_end = round_up(req->index + req->length, msblk->devblksize);
142 length = SQUASHFS_COMPRESSED_SIZE(length); 265 sector_t block = read_start >> msblk->devblksize_log2;
143 if (next_index) 266 sector_t block_end = read_end >> msblk->devblksize_log2;
144 *next_index = index + length + 2; 267 int offset = read_start - round_down(req->index, PAGE_SIZE);
268 int nr_buffers = block_end - block;
269 int blksz = msblk->devblksize;
270 int bio_max_pages = nr_buffers > BIO_MAX_PAGES ? BIO_MAX_PAGES
271 : nr_buffers;
145 272
146 TRACE("Block @ 0x%llx, %scompressed size %d\n", index, 273 /* Setup the request */
147 compressed ? "" : "un", length); 274 req->read_end = read_end;
275 req->offset = req->index - read_start;
276 req->nr_buffers = nr_buffers;
277 if (actor_getblks(req, block) < 0)
278 goto getblk_failed;
148 279
149 if (length < 0 || length > output->length || 280 /* Create and submit the BIOs */
150 (index + length) > msblk->bytes_used) 281 for (b = 0; b < nr_buffers; ++b, offset += blksz) {
151 goto block_release; 282 bh = req->bh[b];
283 if (!bh || !trylock_buffer(bh))
284 continue;
285 if (buffer_uptodate(bh)) {
286 unlock_buffer(bh);
287 continue;
288 }
289 offset %= PAGE_SIZE;
152 290
153 for (; bytes < length; b++) { 291 /* Append the buffer to the current BIO if it is contiguous */
154 bh[b] = sb_getblk(sb, ++cur_index); 292 if (bio && bio_req && prev_block + 1 == b) {
155 if (bh[b] == NULL) 293 if (bio_add_page(bio, bh->b_page, blksz, offset)) {
156 goto block_release; 294 bio_req->nr_buffers += 1;
157 bytes += msblk->devblksize; 295 prev_block = b;
296 continue;
297 }
158 } 298 }
159 ll_rw_block(READ, b - 1, bh + 1); 299
300 /* Otherwise, submit the current BIO and create a new one */
301 if (bio)
302 submit_bio(READ, bio);
303 bio_req = kcalloc(1, sizeof(struct squashfs_bio_request),
304 GFP_NOIO);
305 if (!bio_req)
306 goto req_alloc_failed;
307 bio_req->bh = &req->bh[b];
308 bio = bio_alloc(GFP_NOIO, bio_max_pages);
309 if (!bio)
310 goto bio_alloc_failed;
311 bio->bi_bdev = req->sb->s_bdev;
312 bio->bi_iter.bi_sector = (block + b)
313 << (msblk->devblksize_log2 - 9);
314 bio->bi_private = bio_req;
315 bio->bi_end_io = squashfs_bio_end_io;
316
317 bio_add_page(bio, bh->b_page, blksz, offset);
318 bio_req->nr_buffers += 1;
319 prev_block = b;
160 } 320 }
321 if (bio)
322 submit_bio(READ, bio);
161 323
162 for (i = 0; i < b; i++) { 324 if (req->synchronous)
163 wait_on_buffer(bh[i]); 325 squashfs_process_blocks(req);
164 if (!buffer_uptodate(bh[i])) 326 else {
165 goto block_release; 327 INIT_WORK(&req->offload, read_wq_handler);
328 schedule_work(&req->offload);
166 } 329 }
330 return 0;
167 331
168 if (compressed) { 332bio_alloc_failed:
169 length = squashfs_decompress(msblk, bh, b, offset, length, 333 kfree(bio_req);
170 output); 334req_alloc_failed:
171 if (length < 0) 335 unlock_buffer(bh);
172 goto read_failure; 336 while (--nr_buffers >= b)
173 } else { 337 if (req->bh[nr_buffers])
174 /* 338 put_bh(req->bh[nr_buffers]);
175 * Block is uncompressed. 339 while (--b >= 0)
176 */ 340 if (req->bh[b])
177 int in, pg_offset = 0; 341 wait_on_buffer(req->bh[b]);
178 void *data = squashfs_first_page(output); 342getblk_failed:
179 343 free_read_request(req, -ENOMEM);
180 for (bytes = length; k < b; k++) { 344 return -ENOMEM;
181 in = min(bytes, msblk->devblksize - offset); 345}
182 bytes -= in; 346
183 while (in) { 347static int read_metadata_block(struct squashfs_read_request *req,
184 if (pg_offset == PAGE_CACHE_SIZE) { 348 u64 *next_index)
185 data = squashfs_next_page(output); 349{
186 pg_offset = 0; 350 int ret, error, bytes_read = 0, bytes_uncompressed = 0;
187 } 351 struct squashfs_sb_info *msblk = req->sb->s_fs_info;
188 avail = min_t(int, in, PAGE_CACHE_SIZE - 352
189 pg_offset); 353 if (req->index + 2 > msblk->bytes_used) {
190 memcpy(data + pg_offset, bh[k]->b_data + offset, 354 free_read_request(req, -EINVAL);
191 avail); 355 return -EINVAL;
192 in -= avail; 356 }
193 pg_offset += avail; 357 req->length = 2;
194 offset += avail; 358
195 } 359 /* Do not read beyond the end of the device */
196 offset = 0; 360 if (req->index + req->length > msblk->bytes_used)
197 put_bh(bh[k]); 361 req->length = msblk->bytes_used - req->index;
198 } 362 req->data_processing = SQUASHFS_METADATA;
199 squashfs_finish_page(output); 363
364 /*
365 * Reading metadata is always synchronous because we don't know the
366 * length in advance and the function is expected to update
367 * 'next_index' and return the length.
368 */
369 req->synchronous = true;
370 req->res = &error;
371 req->bytes_read = &bytes_read;
372 req->bytes_uncompressed = &bytes_uncompressed;
373
374 TRACE("Metadata block @ 0x%llx, %scompressed size %d, src size %d\n",
375 req->index, req->compressed ? "" : "un", bytes_read,
376 req->output->length);
377
378 ret = squashfs_bio_submit(req);
379 if (ret)
380 return ret;
381 if (error)
382 return error;
383 if (next_index)
384 *next_index += 2 + bytes_read;
385 return bytes_uncompressed;
386}
387
388static int read_data_block(struct squashfs_read_request *req, int length,
389 u64 *next_index, bool synchronous)
390{
391 int ret, error = 0, bytes_uncompressed = 0, bytes_read = 0;
392
393 req->compressed = SQUASHFS_COMPRESSED_BLOCK(length);
394 req->length = length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
395 req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS
396 : SQUASHFS_COPY;
397
398 req->synchronous = synchronous;
399 if (synchronous) {
400 req->res = &error;
401 req->bytes_read = &bytes_read;
402 req->bytes_uncompressed = &bytes_uncompressed;
403 }
404
405 TRACE("Data block @ 0x%llx, %scompressed size %d, src size %d\n",
406 req->index, req->compressed ? "" : "un", req->length,
407 req->output->length);
408
409 ret = squashfs_bio_submit(req);
410 if (ret)
411 return ret;
412 if (synchronous)
413 ret = error ? error : bytes_uncompressed;
414 if (next_index)
415 *next_index += length;
416 return ret;
417}
418
419/*
420 * Read and decompress a metadata block or datablock. Length is non-zero
421 * if a datablock is being read (the size is stored elsewhere in the
422 * filesystem), otherwise the length is obtained from the first two bytes of
423 * the metadata block. A bit in the length field indicates if the block
424 * is stored uncompressed in the filesystem (usually because compression
425 * generated a larger block - this does occasionally happen with compression
426 * algorithms).
427 */
428static int __squashfs_read_data(struct super_block *sb, u64 index, int length,
429 u64 *next_index, struct squashfs_page_actor *output, bool sync)
430{
431 struct squashfs_read_request *req;
432
433 req = kcalloc(1, sizeof(struct squashfs_read_request), GFP_KERNEL);
434 if (!req) {
435 if (!sync)
436 squashfs_page_actor_free(output, -ENOMEM);
437 return -ENOMEM;
438 }
439
440 req->sb = sb;
441 req->index = index;
442 req->output = output;
443
444 if (next_index)
445 *next_index = index;
446
447 if (length)
448 length = read_data_block(req, length, next_index, sync);
449 else
450 length = read_metadata_block(req, next_index);
451
452 if (length < 0) {
453 ERROR("squashfs_read_data failed to read block 0x%llx\n",
454 (unsigned long long)index);
455 return -EIO;
200 } 456 }
201 457
202 kfree(bh);
203 return length; 458 return length;
459}
204 460
205block_release: 461int squashfs_read_data(struct super_block *sb, u64 index, int length,
206 for (; k < b; k++) 462 u64 *next_index, struct squashfs_page_actor *output)
207 put_bh(bh[k]); 463{
464 return __squashfs_read_data(sb, index, length, next_index, output,
465 true);
466}
467
468int squashfs_read_data_async(struct super_block *sb, u64 index, int length,
469 u64 *next_index, struct squashfs_page_actor *output)
470{
208 471
209read_failure: 472 return __squashfs_read_data(sb, index, length, next_index, output,
210 ERROR("squashfs_read_data failed to read block 0x%llx\n", 473 false);
211 (unsigned long long) index);
212 kfree(bh);
213 return -EIO;
214} 474}
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 1cb70a0b2168..6785d086ab38 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -209,17 +209,14 @@ void squashfs_cache_put(struct squashfs_cache_entry *entry)
209 */ 209 */
210void squashfs_cache_delete(struct squashfs_cache *cache) 210void squashfs_cache_delete(struct squashfs_cache *cache)
211{ 211{
212 int i, j; 212 int i;
213 213
214 if (cache == NULL) 214 if (cache == NULL)
215 return; 215 return;
216 216
217 for (i = 0; i < cache->entries; i++) { 217 for (i = 0; i < cache->entries; i++) {
218 if (cache->entry[i].data) { 218 if (cache->entry[i].page)
219 for (j = 0; j < cache->pages; j++) 219 free_page_array(cache->entry[i].page, cache->pages);
220 kfree(cache->entry[i].data[j]);
221 kfree(cache->entry[i].data);
222 }
223 kfree(cache->entry[i].actor); 220 kfree(cache->entry[i].actor);
224 } 221 }
225 222
@@ -236,7 +233,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache)
236struct squashfs_cache *squashfs_cache_init(char *name, int entries, 233struct squashfs_cache *squashfs_cache_init(char *name, int entries,
237 int block_size) 234 int block_size)
238{ 235{
239 int i, j; 236 int i;
240 struct squashfs_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL); 237 struct squashfs_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL);
241 238
242 if (cache == NULL) { 239 if (cache == NULL) {
@@ -268,22 +265,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
268 init_waitqueue_head(&cache->entry[i].wait_queue); 265 init_waitqueue_head(&cache->entry[i].wait_queue);
269 entry->cache = cache; 266 entry->cache = cache;
270 entry->block = SQUASHFS_INVALID_BLK; 267 entry->block = SQUASHFS_INVALID_BLK;
271 entry->data = kcalloc(cache->pages, sizeof(void *), GFP_KERNEL); 268 entry->page = alloc_page_array(cache->pages, GFP_KERNEL);
272 if (entry->data == NULL) { 269 if (!entry->page) {
273 ERROR("Failed to allocate %s cache entry\n", name); 270 ERROR("Failed to allocate %s cache entry\n", name);
274 goto cleanup; 271 goto cleanup;
275 } 272 }
276 273 entry->actor = squashfs_page_actor_init(entry->page,
277 for (j = 0; j < cache->pages; j++) { 274 cache->pages, 0, NULL);
278 entry->data[j] = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
279 if (entry->data[j] == NULL) {
280 ERROR("Failed to allocate %s buffer\n", name);
281 goto cleanup;
282 }
283 }
284
285 entry->actor = squashfs_page_actor_init(entry->data,
286 cache->pages, 0);
287 if (entry->actor == NULL) { 275 if (entry->actor == NULL) {
288 ERROR("Failed to allocate %s cache entry\n", name); 276 ERROR("Failed to allocate %s cache entry\n", name);
289 goto cleanup; 277 goto cleanup;
@@ -314,18 +302,20 @@ int squashfs_copy_data(void *buffer, struct squashfs_cache_entry *entry,
314 return min(length, entry->length - offset); 302 return min(length, entry->length - offset);
315 303
316 while (offset < entry->length) { 304 while (offset < entry->length) {
317 void *buff = entry->data[offset / PAGE_CACHE_SIZE] 305 void *buff = kmap_atomic(entry->page[offset / PAGE_CACHE_SIZE])
318 + (offset % PAGE_CACHE_SIZE); 306 + (offset % PAGE_CACHE_SIZE);
319 int bytes = min_t(int, entry->length - offset, 307 int bytes = min_t(int, entry->length - offset,
320 PAGE_CACHE_SIZE - (offset % PAGE_CACHE_SIZE)); 308 PAGE_CACHE_SIZE - (offset % PAGE_CACHE_SIZE));
321 309
322 if (bytes >= remaining) { 310 if (bytes >= remaining) {
323 memcpy(buffer, buff, remaining); 311 memcpy(buffer, buff, remaining);
312 kunmap_atomic(buff);
324 remaining = 0; 313 remaining = 0;
325 break; 314 break;
326 } 315 }
327 316
328 memcpy(buffer, buff, bytes); 317 memcpy(buffer, buff, bytes);
318 kunmap_atomic(buff);
329 buffer += bytes; 319 buffer += bytes;
330 remaining -= bytes; 320 remaining -= bytes;
331 offset += bytes; 321 offset += bytes;
@@ -416,43 +406,38 @@ struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb,
416void *squashfs_read_table(struct super_block *sb, u64 block, int length) 406void *squashfs_read_table(struct super_block *sb, u64 block, int length)
417{ 407{
418 int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 408 int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
419 int i, res; 409 struct page **page;
420 void *table, *buffer, **data; 410 void *buff;
411 int res;
421 struct squashfs_page_actor *actor; 412 struct squashfs_page_actor *actor;
422 413
423 table = buffer = kmalloc(length, GFP_KERNEL); 414 page = alloc_page_array(pages, GFP_KERNEL);
424 if (table == NULL) 415 if (!page)
425 return ERR_PTR(-ENOMEM); 416 return ERR_PTR(-ENOMEM);
426 417
427 data = kcalloc(pages, sizeof(void *), GFP_KERNEL); 418 actor = squashfs_page_actor_init(page, pages, length, NULL);
428 if (data == NULL) {
429 res = -ENOMEM;
430 goto failed;
431 }
432
433 actor = squashfs_page_actor_init(data, pages, length);
434 if (actor == NULL) { 419 if (actor == NULL) {
435 res = -ENOMEM; 420 res = -ENOMEM;
436 goto failed2; 421 goto failed;
437 } 422 }
438 423
439 for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
440 data[i] = buffer;
441
442 res = squashfs_read_data(sb, block, length | 424 res = squashfs_read_data(sb, block, length |
443 SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor); 425 SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor);
444 426
445 kfree(data);
446 kfree(actor);
447
448 if (res < 0) 427 if (res < 0)
449 goto failed; 428 goto failed2;
450 429
451 return table; 430 buff = kmalloc(length, GFP_KERNEL);
431 if (!buff)
432 goto failed2;
433 squashfs_actor_to_buf(actor, buff, length);
434 squashfs_page_actor_free(actor, 0);
435 free_page_array(page, pages);
436 return buff;
452 437
453failed2: 438failed2:
454 kfree(data); 439 squashfs_page_actor_free(actor, 0);
455failed: 440failed:
456 kfree(table); 441 free_page_array(page, pages);
457 return ERR_PTR(res); 442 return ERR_PTR(res);
458} 443}
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index e9034bf6e5ae..7de35bf297aa 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -24,7 +24,8 @@
24#include <linux/types.h> 24#include <linux/types.h>
25#include <linux/mutex.h> 25#include <linux/mutex.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/buffer_head.h> 27#include <linux/highmem.h>
28#include <linux/fs.h>
28 29
29#include "squashfs_fs.h" 30#include "squashfs_fs.h"
30#include "squashfs_fs_sb.h" 31#include "squashfs_fs_sb.h"
@@ -94,40 +95,44 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
94static void *get_comp_opts(struct super_block *sb, unsigned short flags) 95static void *get_comp_opts(struct super_block *sb, unsigned short flags)
95{ 96{
96 struct squashfs_sb_info *msblk = sb->s_fs_info; 97 struct squashfs_sb_info *msblk = sb->s_fs_info;
97 void *buffer = NULL, *comp_opts; 98 void *comp_opts, *buffer = NULL;
99 struct page *page;
98 struct squashfs_page_actor *actor = NULL; 100 struct squashfs_page_actor *actor = NULL;
99 int length = 0; 101 int length = 0;
100 102
103 if (!SQUASHFS_COMP_OPTS(flags))
104 return squashfs_comp_opts(msblk, buffer, length);
105
101 /* 106 /*
102 * Read decompressor specific options from file system if present 107 * Read decompressor specific options from file system if present
103 */ 108 */
104 if (SQUASHFS_COMP_OPTS(flags)) { 109
105 buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); 110 page = alloc_page(GFP_KERNEL);
106 if (buffer == NULL) { 111 if (!page)
107 comp_opts = ERR_PTR(-ENOMEM); 112 return ERR_PTR(-ENOMEM);
108 goto out; 113
109 } 114 actor = squashfs_page_actor_init(&page, 1, 0, NULL);
110 115 if (actor == NULL) {
111 actor = squashfs_page_actor_init(&buffer, 1, 0); 116 comp_opts = ERR_PTR(-ENOMEM);
112 if (actor == NULL) { 117 goto actor_error;
113 comp_opts = ERR_PTR(-ENOMEM); 118 }
114 goto out; 119
115 } 120 length = squashfs_read_data(sb,
116 121 sizeof(struct squashfs_super_block), 0, NULL, actor);
117 length = squashfs_read_data(sb, 122
118 sizeof(struct squashfs_super_block), 0, NULL, actor); 123 if (length < 0) {
119 124 comp_opts = ERR_PTR(length);
120 if (length < 0) { 125 goto read_error;
121 comp_opts = ERR_PTR(length);
122 goto out;
123 }
124 } 126 }
125 127
128 buffer = kmap_atomic(page);
126 comp_opts = squashfs_comp_opts(msblk, buffer, length); 129 comp_opts = squashfs_comp_opts(msblk, buffer, length);
130 kunmap_atomic(buffer);
127 131
128out: 132read_error:
129 kfree(actor); 133 squashfs_page_actor_free(actor, 0);
130 kfree(buffer); 134actor_error:
135 __free_page(page);
131 return comp_opts; 136 return comp_opts;
132} 137}
133 138
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index e5c9689062ba..6f5ef8d7e55a 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -47,12 +47,16 @@
47#include <linux/string.h> 47#include <linux/string.h>
48#include <linux/pagemap.h> 48#include <linux/pagemap.h>
49#include <linux/mutex.h> 49#include <linux/mutex.h>
50#include <linux/mm_inline.h>
50 51
51#include "squashfs_fs.h" 52#include "squashfs_fs.h"
52#include "squashfs_fs_sb.h" 53#include "squashfs_fs_sb.h"
53#include "squashfs_fs_i.h" 54#include "squashfs_fs_i.h"
54#include "squashfs.h" 55#include "squashfs.h"
55 56
57// Backported from 4.5
58#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
59
56/* 60/*
57 * Locate cache slot in range [offset, index] for specified inode. If 61 * Locate cache slot in range [offset, index] for specified inode. If
58 * there's more than one return the slot closest to index. 62 * there's more than one return the slot closest to index.
@@ -438,6 +442,21 @@ static int squashfs_readpage_fragment(struct page *page)
438 return res; 442 return res;
439} 443}
440 444
445static int squashfs_readpages_fragment(struct page *page,
446 struct list_head *readahead_pages, struct address_space *mapping)
447{
448 if (!page) {
449 page = lru_to_page(readahead_pages);
450 list_del(&page->lru);
451 if (add_to_page_cache_lru(page, mapping, page->index,
452 mapping_gfp_constraint(mapping, GFP_KERNEL))) {
453 put_page(page);
454 return 0;
455 }
456 }
457 return squashfs_readpage_fragment(page);
458}
459
441static int squashfs_readpage_sparse(struct page *page, int index, int file_end) 460static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
442{ 461{
443 struct inode *inode = page->mapping->host; 462 struct inode *inode = page->mapping->host;
@@ -450,54 +469,105 @@ static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
450 return 0; 469 return 0;
451} 470}
452 471
453static int squashfs_readpage(struct file *file, struct page *page) 472static int squashfs_readpages_sparse(struct page *page,
473 struct list_head *readahead_pages, int index, int file_end,
474 struct address_space *mapping)
454{ 475{
455 struct inode *inode = page->mapping->host; 476 if (!page) {
477 page = lru_to_page(readahead_pages);
478 list_del(&page->lru);
479 if (add_to_page_cache_lru(page, mapping, page->index,
480 mapping_gfp_constraint(mapping, GFP_KERNEL))) {
481 put_page(page);
482 return 0;
483 }
484 }
485 return squashfs_readpage_sparse(page, index, file_end);
486}
487
488static int __squashfs_readpages(struct file *file, struct page *page,
489 struct list_head *readahead_pages, unsigned int nr_pages,
490 struct address_space *mapping)
491{
492 struct inode *inode = mapping->host;
456 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 493 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
457 int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
458 int file_end = i_size_read(inode) >> msblk->block_log; 494 int file_end = i_size_read(inode) >> msblk->block_log;
459 int res; 495 int res;
460 void *pageaddr;
461 496
462 TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", 497 do {
463 page->index, squashfs_i(inode)->start); 498 struct page *cur_page = page ? page
499 : lru_to_page(readahead_pages);
500 int page_index = cur_page->index;
501 int index = page_index >> (msblk->block_log - PAGE_CACHE_SHIFT);
502
503 if (page_index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
504 PAGE_CACHE_SHIFT))
505 return 1;
506
507 if (index < file_end || squashfs_i(inode)->fragment_block ==
508 SQUASHFS_INVALID_BLK) {
509 u64 block = 0;
510 int bsize = read_blocklist(inode, index, &block);
511
512 if (bsize < 0)
513 return -1;
514
515 if (bsize == 0) {
516 res = squashfs_readpages_sparse(page,
517 readahead_pages, index, file_end,
518 mapping);
519 } else {
520 res = squashfs_readpages_block(page,
521 readahead_pages, &nr_pages, mapping,
522 page_index, block, bsize);
523 }
524 } else {
525 res = squashfs_readpages_fragment(page,
526 readahead_pages, mapping);
527 }
528 if (res)
529 return 0;
530 page = NULL;
531 } while (readahead_pages && !list_empty(readahead_pages));
464 532
465 if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 533 return 0;
466 PAGE_CACHE_SHIFT)) 534}
467 goto out;
468 535
469 if (index < file_end || squashfs_i(inode)->fragment_block == 536static int squashfs_readpage(struct file *file, struct page *page)
470 SQUASHFS_INVALID_BLK) { 537{
471 u64 block = 0; 538 int ret;
472 int bsize = read_blocklist(inode, index, &block);
473 if (bsize < 0)
474 goto error_out;
475 539
476 if (bsize == 0) 540 TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
477 res = squashfs_readpage_sparse(page, index, file_end); 541 page->index, squashfs_i(page->mapping->host)->start);
542
543 get_page(page);
544
545 ret = __squashfs_readpages(file, page, NULL, 1, page->mapping);
546 if (ret) {
547 flush_dcache_page(page);
548 if (ret < 0)
549 SetPageError(page);
478 else 550 else
479 res = squashfs_readpage_block(page, block, bsize); 551 SetPageUptodate(page);
480 } else 552 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
481 res = squashfs_readpage_fragment(page); 553 unlock_page(page);
482 554 put_page(page);
483 if (!res) 555 }
484 return 0;
485
486error_out:
487 SetPageError(page);
488out:
489 pageaddr = kmap_atomic(page);
490 memset(pageaddr, 0, PAGE_CACHE_SIZE);
491 kunmap_atomic(pageaddr);
492 flush_dcache_page(page);
493 if (!PageError(page))
494 SetPageUptodate(page);
495 unlock_page(page);
496 556
497 return 0; 557 return 0;
498} 558}
499 559
560static int squashfs_readpages(struct file *file, struct address_space *mapping,
561 struct list_head *pages, unsigned int nr_pages)
562{
563 TRACE("Entered squashfs_readpages, %u pages, first page index %lx\n",
564 nr_pages, lru_to_page(pages)->index);
565 __squashfs_readpages(file, NULL, pages, nr_pages, mapping);
566 return 0;
567}
568
500 569
501const struct address_space_operations squashfs_aops = { 570const struct address_space_operations squashfs_aops = {
502 .readpage = squashfs_readpage 571 .readpage = squashfs_readpage,
572 .readpages = squashfs_readpages,
503}; 573};
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c
deleted file mode 100644
index f2310d2a2019..000000000000
--- a/fs/squashfs/file_cache.c
+++ /dev/null
@@ -1,38 +0,0 @@
1/*
2 * Copyright (c) 2013
3 * Phillip Lougher <phillip@squashfs.org.uk>
4 *
5 * This work is licensed under the terms of the GNU GPL, version 2. See
6 * the COPYING file in the top-level directory.
7 */
8
9#include <linux/fs.h>
10#include <linux/vfs.h>
11#include <linux/kernel.h>
12#include <linux/slab.h>
13#include <linux/string.h>
14#include <linux/pagemap.h>
15#include <linux/mutex.h>
16
17#include "squashfs_fs.h"
18#include "squashfs_fs_sb.h"
19#include "squashfs_fs_i.h"
20#include "squashfs.h"
21
22/* Read separately compressed datablock and memcopy into page cache */
23int squashfs_readpage_block(struct page *page, u64 block, int bsize)
24{
25 struct inode *i = page->mapping->host;
26 struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
27 block, bsize);
28 int res = buffer->error;
29
30 if (res)
31 ERROR("Unable to read page, block %llx, size %x\n", block,
32 bsize);
33 else
34 squashfs_copy_cache(page, buffer, buffer->length, 0);
35
36 squashfs_cache_put(buffer);
37 return res;
38}
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 43e7a7eddac0..c97af4c6ccd0 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -13,6 +13,7 @@
13#include <linux/string.h> 13#include <linux/string.h>
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/mutex.h> 15#include <linux/mutex.h>
16#include <linux/mm_inline.h>
16 17
17#include "squashfs_fs.h" 18#include "squashfs_fs.h"
18#include "squashfs_fs_sb.h" 19#include "squashfs_fs_sb.h"
@@ -20,157 +21,139 @@
20#include "squashfs.h" 21#include "squashfs.h"
21#include "page_actor.h" 22#include "page_actor.h"
22 23
23static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, 24// Backported from 4.5
24 int pages, struct page **page); 25#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
25
26/* Read separately compressed datablock directly into page cache */
27int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
28 26
27static void release_actor_pages(struct page **page, int pages, int error)
29{ 28{
30 struct inode *inode = target_page->mapping->host; 29 int i;
31 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
32 30
33 int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 31 for (i = 0; i < pages; i++) {
34 int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; 32 if (!page[i])
35 int start_index = target_page->index & ~mask; 33 continue;
36 int end_index = start_index | mask; 34 flush_dcache_page(page[i]);
37 int i, n, pages, missing_pages, bytes, res = -ENOMEM; 35 if (!error)
36 SetPageUptodate(page[i]);
37 else {
38 SetPageError(page[i]);
39 zero_user_segment(page[i], 0, PAGE_CACHE_SIZE);
40 }
41 unlock_page(page[i]);
42 put_page(page[i]);
43 }
44 kfree(page);
45}
46
47/*
48 * Create a "page actor" which will kmap and kunmap the
49 * page cache pages appropriately within the decompressor
50 */
51static struct squashfs_page_actor *actor_from_page_cache(
52 unsigned int actor_pages, struct page *target_page,
53 struct list_head *rpages, unsigned int *nr_pages, int start_index,
54 struct address_space *mapping)
55{
38 struct page **page; 56 struct page **page;
39 struct squashfs_page_actor *actor; 57 struct squashfs_page_actor *actor;
40 void *pageaddr; 58 int i, n;
41 59 gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
42 if (end_index > file_end) 60
43 end_index = file_end; 61 page = kmalloc_array(actor_pages, sizeof(void *), GFP_KERNEL);
44 62 if (!page)
45 pages = end_index - start_index + 1; 63 return NULL;
46 64
47 page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL); 65 for (i = 0, n = start_index; i < actor_pages; i++, n++) {
48 if (page == NULL) 66 if (target_page == NULL && rpages && !list_empty(rpages)) {
49 return res; 67 struct page *cur_page = lru_to_page(rpages);
50 68
51 /* 69 if (cur_page->index < start_index + actor_pages) {
52 * Create a "page actor" which will kmap and kunmap the 70 list_del(&cur_page->lru);
53 * page cache pages appropriately within the decompressor 71 --(*nr_pages);
54 */ 72 if (add_to_page_cache_lru(cur_page, mapping,
55 actor = squashfs_page_actor_init_special(page, pages, 0); 73 cur_page->index, gfp))
56 if (actor == NULL) 74 put_page(cur_page);
57 goto out; 75 else
58 76 target_page = cur_page;
59 /* Try to grab all the pages covered by the Squashfs block */ 77 } else
60 for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) { 78 rpages = NULL;
61 page[i] = (n == target_page->index) ? target_page : 79 }
62 grab_cache_page_nowait(target_page->mapping, n);
63 80
64 if (page[i] == NULL) { 81 if (target_page && target_page->index == n) {
65 missing_pages++; 82 page[i] = target_page;
66 continue; 83 target_page = NULL;
84 } else {
85 page[i] = grab_cache_page_nowait(mapping, n);
86 if (page[i] == NULL)
87 continue;
67 } 88 }
68 89
69 if (PageUptodate(page[i])) { 90 if (PageUptodate(page[i])) {
70 unlock_page(page[i]); 91 unlock_page(page[i]);
71 page_cache_release(page[i]); 92 put_page(page[i]);
72 page[i] = NULL; 93 page[i] = NULL;
73 missing_pages++;
74 } 94 }
75 } 95 }
76 96
77 if (missing_pages) { 97 actor = squashfs_page_actor_init(page, actor_pages, 0,
78 /* 98 release_actor_pages);
79 * Couldn't get one or more pages, this page has either 99 if (!actor) {
80 * been VM reclaimed, but others are still in the page cache 100 release_actor_pages(page, actor_pages, -ENOMEM);
81 * and uptodate, or we're racing with another thread in 101 kfree(page);
82 * squashfs_readpage also trying to grab them. Fall back to 102 return NULL;
83 * using an intermediate buffer.
84 */
85 res = squashfs_read_cache(target_page, block, bsize, pages,
86 page);
87 if (res < 0)
88 goto mark_errored;
89
90 goto out;
91 } 103 }
92 104 return actor;
93 /* Decompress directly into the page cache buffers */
94 res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
95 if (res < 0)
96 goto mark_errored;
97
98 /* Last page may have trailing bytes not filled */
99 bytes = res % PAGE_CACHE_SIZE;
100 if (bytes) {
101 pageaddr = kmap_atomic(page[pages - 1]);
102 memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
103 kunmap_atomic(pageaddr);
104 }
105
106 /* Mark pages as uptodate, unlock and release */
107 for (i = 0; i < pages; i++) {
108 flush_dcache_page(page[i]);
109 SetPageUptodate(page[i]);
110 unlock_page(page[i]);
111 if (page[i] != target_page)
112 page_cache_release(page[i]);
113 }
114
115 kfree(actor);
116 kfree(page);
117
118 return 0;
119
120mark_errored:
121 /* Decompression failed, mark pages as errored. Target_page is
122 * dealt with by the caller
123 */
124 for (i = 0; i < pages; i++) {
125 if (page[i] == NULL || page[i] == target_page)
126 continue;
127 flush_dcache_page(page[i]);
128 SetPageError(page[i]);
129 unlock_page(page[i]);
130 page_cache_release(page[i]);
131 }
132
133out:
134 kfree(actor);
135 kfree(page);
136 return res;
137} 105}
138 106
107int squashfs_readpages_block(struct page *target_page,
108 struct list_head *readahead_pages,
109 unsigned int *nr_pages,
110 struct address_space *mapping,
111 int page_index, u64 block, int bsize)
139 112
140static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
141 int pages, struct page **page)
142{ 113{
143 struct inode *i = target_page->mapping->host; 114 struct squashfs_page_actor *actor;
144 struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, 115 struct inode *inode = mapping->host;
145 block, bsize); 116 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
146 int bytes = buffer->length, res = buffer->error, n, offset = 0; 117 int start_index, end_index, file_end, actor_pages, res;
147 void *pageaddr; 118 int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
148
149 if (res) {
150 ERROR("Unable to read page, block %llx, size %x\n", block,
151 bsize);
152 goto out;
153 }
154
155 for (n = 0; n < pages && bytes > 0; n++,
156 bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
157 int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
158
159 if (page[n] == NULL)
160 continue;
161 119
162 pageaddr = kmap_atomic(page[n]); 120 /*
163 squashfs_copy_data(pageaddr, buffer, offset, avail); 121 * If readpage() is called on an uncompressed datablock, we can just
164 memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); 122 * read the pages instead of fetching the whole block.
165 kunmap_atomic(pageaddr); 123 * This greatly improves the performance when a process keep doing
166 flush_dcache_page(page[n]); 124 * random reads because we only fetch the necessary data.
167 SetPageUptodate(page[n]); 125 * The readahead algorithm will take care of doing speculative reads
168 unlock_page(page[n]); 126 * if necessary.
169 if (page[n] != target_page) 127 * We can't read more than 1 block even if readahead provides use more
170 page_cache_release(page[n]); 128 * pages because we don't know yet if the next block is compressed or
129 * not.
130 */
131 if (bsize && !SQUASHFS_COMPRESSED_BLOCK(bsize)) {
132 u64 block_end = block + msblk->block_size;
133
134 block += (page_index & mask) * PAGE_CACHE_SIZE;
135 actor_pages = (block_end - block) / PAGE_CACHE_SIZE;
136 if (*nr_pages < actor_pages)
137 actor_pages = *nr_pages;
138 start_index = page_index;
139 bsize = min_t(int, bsize, (PAGE_CACHE_SIZE * actor_pages)
140 | SQUASHFS_COMPRESSED_BIT_BLOCK);
141 } else {
142 file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
143 start_index = page_index & ~mask;
144 end_index = start_index | mask;
145 if (end_index > file_end)
146 end_index = file_end;
147 actor_pages = end_index - start_index + 1;
171 } 148 }
172 149
173out: 150 actor = actor_from_page_cache(actor_pages, target_page,
174 squashfs_cache_put(buffer); 151 readahead_pages, nr_pages, start_index,
175 return res; 152 mapping);
153 if (!actor)
154 return -ENOMEM;
155
156 res = squashfs_read_data_async(inode->i_sb, block, bsize, NULL,
157 actor);
158 return res < 0 ? res : 0;
176} 159}
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index c31e2bc9c081..df4fa3c7ddd0 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -94,39 +94,17 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
94 struct buffer_head **bh, int b, int offset, int length, 94 struct buffer_head **bh, int b, int offset, int length,
95 struct squashfs_page_actor *output) 95 struct squashfs_page_actor *output)
96{ 96{
97 struct squashfs_lz4 *stream = strm; 97 int res;
98 void *buff = stream->input, *data;
99 int avail, i, bytes = length, res;
100 size_t dest_len = output->length; 98 size_t dest_len = output->length;
99 struct squashfs_lz4 *stream = strm;
101 100
102 for (i = 0; i < b; i++) { 101 squashfs_bh_to_buf(bh, b, stream->input, offset, length,
103 avail = min(bytes, msblk->devblksize - offset); 102 msblk->devblksize);
104 memcpy(buff, bh[i]->b_data + offset, avail);
105 buff += avail;
106 bytes -= avail;
107 offset = 0;
108 put_bh(bh[i]);
109 }
110
111 res = lz4_decompress_unknownoutputsize(stream->input, length, 103 res = lz4_decompress_unknownoutputsize(stream->input, length,
112 stream->output, &dest_len); 104 stream->output, &dest_len);
113 if (res) 105 if (res)
114 return -EIO; 106 return -EIO;
115 107 squashfs_buf_to_actor(stream->output, output, dest_len);
116 bytes = dest_len;
117 data = squashfs_first_page(output);
118 buff = stream->output;
119 while (data) {
120 if (bytes <= PAGE_CACHE_SIZE) {
121 memcpy(data, buff, bytes);
122 break;
123 }
124 memcpy(data, buff, PAGE_CACHE_SIZE);
125 buff += PAGE_CACHE_SIZE;
126 bytes -= PAGE_CACHE_SIZE;
127 data = squashfs_next_page(output);
128 }
129 squashfs_finish_page(output);
130 108
131 return dest_len; 109 return dest_len;
132} 110}
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 244b9fbfff7b..2c844d53a59e 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -79,45 +79,19 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
79 struct buffer_head **bh, int b, int offset, int length, 79 struct buffer_head **bh, int b, int offset, int length,
80 struct squashfs_page_actor *output) 80 struct squashfs_page_actor *output)
81{ 81{
82 struct squashfs_lzo *stream = strm; 82 int res;
83 void *buff = stream->input, *data;
84 int avail, i, bytes = length, res;
85 size_t out_len = output->length; 83 size_t out_len = output->length;
84 struct squashfs_lzo *stream = strm;
86 85
87 for (i = 0; i < b; i++) { 86 squashfs_bh_to_buf(bh, b, stream->input, offset, length,
88 avail = min(bytes, msblk->devblksize - offset); 87 msblk->devblksize);
89 memcpy(buff, bh[i]->b_data + offset, avail);
90 buff += avail;
91 bytes -= avail;
92 offset = 0;
93 put_bh(bh[i]);
94 }
95
96 res = lzo1x_decompress_safe(stream->input, (size_t)length, 88 res = lzo1x_decompress_safe(stream->input, (size_t)length,
97 stream->output, &out_len); 89 stream->output, &out_len);
98 if (res != LZO_E_OK) 90 if (res != LZO_E_OK)
99 goto failed; 91 return -EIO;
92 squashfs_buf_to_actor(stream->output, output, out_len);
100 93
101 res = bytes = (int)out_len; 94 return out_len;
102 data = squashfs_first_page(output);
103 buff = stream->output;
104 while (data) {
105 if (bytes <= PAGE_CACHE_SIZE) {
106 memcpy(data, buff, bytes);
107 break;
108 } else {
109 memcpy(data, buff, PAGE_CACHE_SIZE);
110 buff += PAGE_CACHE_SIZE;
111 bytes -= PAGE_CACHE_SIZE;
112 data = squashfs_next_page(output);
113 }
114 }
115 squashfs_finish_page(output);
116
117 return res;
118
119failed:
120 return -EIO;
121} 95}
122 96
123const struct squashfs_decompressor squashfs_lzo_comp_ops = { 97const struct squashfs_decompressor squashfs_lzo_comp_ops = {
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
index 5a1c11f56441..53863508e400 100644
--- a/fs/squashfs/page_actor.c
+++ b/fs/squashfs/page_actor.c
@@ -9,39 +9,11 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/buffer_head.h>
12#include "page_actor.h" 13#include "page_actor.h"
13 14
14/* 15struct squashfs_page_actor *squashfs_page_actor_init(struct page **page,
15 * This file contains implementations of page_actor for decompressing into 16 int pages, int length, void (*release_pages)(struct page **, int, int))
16 * an intermediate buffer, and for decompressing directly into the
17 * page cache.
18 *
19 * Calling code should avoid sleeping between calls to squashfs_first_page()
20 * and squashfs_finish_page().
21 */
22
23/* Implementation of page_actor for decompressing into intermediate buffer */
24static void *cache_first_page(struct squashfs_page_actor *actor)
25{
26 actor->next_page = 1;
27 return actor->buffer[0];
28}
29
30static void *cache_next_page(struct squashfs_page_actor *actor)
31{
32 if (actor->next_page == actor->pages)
33 return NULL;
34
35 return actor->buffer[actor->next_page++];
36}
37
38static void cache_finish_page(struct squashfs_page_actor *actor)
39{
40 /* empty */
41}
42
43struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
44 int pages, int length)
45{ 17{
46 struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); 18 struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
47 19
@@ -49,52 +21,133 @@ struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
49 return NULL; 21 return NULL;
50 22
51 actor->length = length ? : pages * PAGE_CACHE_SIZE; 23 actor->length = length ? : pages * PAGE_CACHE_SIZE;
52 actor->buffer = buffer; 24 actor->page = page;
53 actor->pages = pages; 25 actor->pages = pages;
54 actor->next_page = 0; 26 actor->next_page = 0;
55 actor->squashfs_first_page = cache_first_page; 27 actor->pageaddr = NULL;
56 actor->squashfs_next_page = cache_next_page; 28 actor->release_pages = release_pages;
57 actor->squashfs_finish_page = cache_finish_page;
58 return actor; 29 return actor;
59} 30}
60 31
61/* Implementation of page_actor for decompressing directly into page cache. */ 32void squashfs_page_actor_free(struct squashfs_page_actor *actor, int error)
62static void *direct_first_page(struct squashfs_page_actor *actor) 33{
34 if (!actor)
35 return;
36
37 if (actor->release_pages)
38 actor->release_pages(actor->page, actor->pages, error);
39 kfree(actor);
40}
41
42void squashfs_actor_to_buf(struct squashfs_page_actor *actor, void *buf,
43 int length)
63{ 44{
64 actor->next_page = 1; 45 void *pageaddr;
65 return actor->pageaddr = kmap_atomic(actor->page[0]); 46 int pos = 0, avail, i;
47
48 for (i = 0; i < actor->pages && pos < length; ++i) {
49 avail = min_t(int, length - pos, PAGE_CACHE_SIZE);
50 if (actor->page[i]) {
51 pageaddr = kmap_atomic(actor->page[i]);
52 memcpy(buf + pos, pageaddr, avail);
53 kunmap_atomic(pageaddr);
54 }
55 pos += avail;
56 }
66} 57}
67 58
68static void *direct_next_page(struct squashfs_page_actor *actor) 59void squashfs_buf_to_actor(void *buf, struct squashfs_page_actor *actor,
60 int length)
69{ 61{
70 if (actor->pageaddr) 62 void *pageaddr;
71 kunmap_atomic(actor->pageaddr); 63 int pos = 0, avail, i;
64
65 for (i = 0; i < actor->pages && pos < length; ++i) {
66 avail = min_t(int, length - pos, PAGE_CACHE_SIZE);
67 if (actor->page[i]) {
68 pageaddr = kmap_atomic(actor->page[i]);
69 memcpy(pageaddr, buf + pos, avail);
70 kunmap_atomic(pageaddr);
71 }
72 pos += avail;
73 }
74}
72 75
73 return actor->pageaddr = actor->next_page == actor->pages ? NULL : 76void squashfs_bh_to_actor(struct buffer_head **bh, int nr_buffers,
74 kmap_atomic(actor->page[actor->next_page++]); 77 struct squashfs_page_actor *actor, int offset, int length, int blksz)
78{
79 void *kaddr = NULL;
80 int bytes = 0, pgoff = 0, b = 0, p = 0, avail, i;
81
82 while (bytes < length) {
83 if (actor->page[p]) {
84 kaddr = kmap_atomic(actor->page[p]);
85 while (pgoff < PAGE_CACHE_SIZE && bytes < length) {
86 avail = min_t(int, blksz - offset,
87 PAGE_CACHE_SIZE - pgoff);
88 memcpy(kaddr + pgoff, bh[b]->b_data + offset,
89 avail);
90 pgoff += avail;
91 bytes += avail;
92 offset = (offset + avail) % blksz;
93 if (!offset) {
94 put_bh(bh[b]);
95 ++b;
96 }
97 }
98 kunmap_atomic(kaddr);
99 pgoff = 0;
100 } else {
101 for (i = 0; i < PAGE_CACHE_SIZE / blksz; ++i) {
102 if (bh[b])
103 put_bh(bh[b]);
104 ++b;
105 }
106 bytes += PAGE_CACHE_SIZE;
107 }
108 ++p;
109 }
75} 110}
76 111
77static void direct_finish_page(struct squashfs_page_actor *actor) 112void squashfs_bh_to_buf(struct buffer_head **bh, int nr_buffers, void *buf,
113 int offset, int length, int blksz)
78{ 114{
79 if (actor->pageaddr) 115 int i, avail, bytes = 0;
80 kunmap_atomic(actor->pageaddr); 116
117 for (i = 0; i < nr_buffers && bytes < length; ++i) {
118 avail = min_t(int, length - bytes, blksz - offset);
119 if (bh[i]) {
120 memcpy(buf + bytes, bh[i]->b_data + offset, avail);
121 put_bh(bh[i]);
122 }
123 bytes += avail;
124 offset = 0;
125 }
81} 126}
82 127
83struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page, 128void free_page_array(struct page **page, int nr_pages)
84 int pages, int length)
85{ 129{
86 struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); 130 int i;
87 131
88 if (actor == NULL) 132 for (i = 0; i < nr_pages; ++i)
89 return NULL; 133 __free_page(page[i]);
134 kfree(page);
135}
90 136
91 actor->length = length ? : pages * PAGE_CACHE_SIZE; 137struct page **alloc_page_array(int nr_pages, int gfp_mask)
92 actor->page = page; 138{
93 actor->pages = pages; 139 int i;
94 actor->next_page = 0; 140 struct page **page;
95 actor->pageaddr = NULL; 141
96 actor->squashfs_first_page = direct_first_page; 142 page = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
97 actor->squashfs_next_page = direct_next_page; 143 if (!page)
98 actor->squashfs_finish_page = direct_finish_page; 144 return NULL;
99 return actor; 145 for (i = 0; i < nr_pages; ++i) {
146 page[i] = alloc_page(gfp_mask);
147 if (!page[i]) {
148 free_page_array(page, i);
149 return NULL;
150 }
151 }
152 return page;
100} 153}
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index 26dd82008b82..aa1ed790b5a3 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h
@@ -5,77 +5,61 @@
5 * Phillip Lougher <phillip@squashfs.org.uk> 5 * Phillip Lougher <phillip@squashfs.org.uk>
6 * 6 *
7 * This work is licensed under the terms of the GNU GPL, version 2. See 7 * This work is licensed under the terms of the GNU GPL, version 2. See
8 * the COPYING file in the top-level directory. 8 * the COPYING file in the top-level squashfsory.
9 */ 9 */
10 10
11#ifndef CONFIG_SQUASHFS_FILE_DIRECT
12struct squashfs_page_actor { 11struct squashfs_page_actor {
13 void **page; 12 struct page **page;
13 void *pageaddr;
14 int pages; 14 int pages;
15 int length; 15 int length;
16 int next_page; 16 int next_page;
17 void (*release_pages)(struct page **, int, int);
17}; 18};
18 19
19static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page, 20extern struct squashfs_page_actor *squashfs_page_actor_init(struct page **,
20 int pages, int length) 21 int, int, void (*)(struct page **, int, int));
21{ 22extern void squashfs_page_actor_free(struct squashfs_page_actor *, int);
22 struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
23
24 if (actor == NULL)
25 return NULL;
26 23
27 actor->length = length ? : pages * PAGE_CACHE_SIZE; 24extern void squashfs_actor_to_buf(struct squashfs_page_actor *, void *, int);
28 actor->page = page; 25extern void squashfs_buf_to_actor(void *, struct squashfs_page_actor *, int);
29 actor->pages = pages; 26extern void squashfs_bh_to_actor(struct buffer_head **, int,
30 actor->next_page = 0; 27 struct squashfs_page_actor *, int, int, int);
31 return actor; 28extern void squashfs_bh_to_buf(struct buffer_head **, int, void *, int, int,
32} 29 int);
33 30
31/*
32 * Calling code should avoid sleeping between calls to squashfs_first_page()
33 * and squashfs_finish_page().
34 */
34static inline void *squashfs_first_page(struct squashfs_page_actor *actor) 35static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
35{ 36{
36 actor->next_page = 1; 37 actor->next_page = 1;
37 return actor->page[0]; 38 return actor->pageaddr = actor->page[0] ? kmap_atomic(actor->page[0])
39 : NULL;
38} 40}
39 41
40static inline void *squashfs_next_page(struct squashfs_page_actor *actor) 42static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
41{ 43{
42 return actor->next_page == actor->pages ? NULL : 44 if (!IS_ERR_OR_NULL(actor->pageaddr))
43 actor->page[actor->next_page++]; 45 kunmap_atomic(actor->pageaddr);
44}
45 46
46static inline void squashfs_finish_page(struct squashfs_page_actor *actor) 47 if (actor->next_page == actor->pages)
47{ 48 return actor->pageaddr = ERR_PTR(-ENODATA);
48 /* empty */
49}
50#else
51struct squashfs_page_actor {
52 union {
53 void **buffer;
54 struct page **page;
55 };
56 void *pageaddr;
57 void *(*squashfs_first_page)(struct squashfs_page_actor *);
58 void *(*squashfs_next_page)(struct squashfs_page_actor *);
59 void (*squashfs_finish_page)(struct squashfs_page_actor *);
60 int pages;
61 int length;
62 int next_page;
63};
64 49
65extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int); 50 actor->pageaddr = actor->page[actor->next_page] ?
66extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page 51 kmap_atomic(actor->page[actor->next_page]) : NULL;
67 **, int, int); 52 ++actor->next_page;
68static inline void *squashfs_first_page(struct squashfs_page_actor *actor) 53 return actor->pageaddr;
69{
70 return actor->squashfs_first_page(actor);
71}
72static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
73{
74 return actor->squashfs_next_page(actor);
75} 54}
55
76static inline void squashfs_finish_page(struct squashfs_page_actor *actor) 56static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
77{ 57{
78 actor->squashfs_finish_page(actor); 58 if (!IS_ERR_OR_NULL(actor->pageaddr))
59 kunmap_atomic(actor->pageaddr);
79} 60}
80#endif 61
62extern struct page **alloc_page_array(int, int);
63extern void free_page_array(struct page **, int);
64
81#endif 65#endif
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 887d6d270080..6093579c6c5d 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -28,8 +28,14 @@
28#define WARNING(s, args...) pr_warn("SQUASHFS: "s, ## args) 28#define WARNING(s, args...) pr_warn("SQUASHFS: "s, ## args)
29 29
30/* block.c */ 30/* block.c */
31extern int squashfs_init_read_wq(void);
32extern void squashfs_destroy_read_wq(void);
31extern int squashfs_read_data(struct super_block *, u64, int, u64 *, 33extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
32 struct squashfs_page_actor *); 34 struct squashfs_page_actor *);
35extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
36 struct squashfs_page_actor *);
37extern int squashfs_read_data_async(struct super_block *, u64, int, u64 *,
38 struct squashfs_page_actor *);
33 39
34/* cache.c */ 40/* cache.c */
35extern struct squashfs_cache *squashfs_cache_init(char *, int, int); 41extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
@@ -70,8 +76,9 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
70void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, 76void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int,
71 int); 77 int);
72 78
73/* file_xxx.c */ 79/* file_direct.c */
74extern int squashfs_readpage_block(struct page *, u64, int); 80extern int squashfs_readpages_block(struct page *, struct list_head *,
81 unsigned int *, struct address_space *, int, u64, int);
75 82
76/* id.c */ 83/* id.c */
77extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); 84extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 1da565cb50c3..8a6995de0277 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -49,7 +49,7 @@ struct squashfs_cache_entry {
49 int num_waiters; 49 int num_waiters;
50 wait_queue_head_t wait_queue; 50 wait_queue_head_t wait_queue;
51 struct squashfs_cache *cache; 51 struct squashfs_cache *cache;
52 void **data; 52 struct page **page;
53 struct squashfs_page_actor *actor; 53 struct squashfs_page_actor *actor;
54}; 54};
55 55
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 5056babe00df..61cd0b39ed0e 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -444,9 +444,15 @@ static int __init init_squashfs_fs(void)
444 if (err) 444 if (err)
445 return err; 445 return err;
446 446
447 if (!squashfs_init_read_wq()) {
448 destroy_inodecache();
449 return -ENOMEM;
450 }
451
447 err = register_filesystem(&squashfs_fs_type); 452 err = register_filesystem(&squashfs_fs_type);
448 if (err) { 453 if (err) {
449 destroy_inodecache(); 454 destroy_inodecache();
455 squashfs_destroy_read_wq();
450 return err; 456 return err;
451 } 457 }
452 458
@@ -460,6 +466,7 @@ static void __exit exit_squashfs_fs(void)
460{ 466{
461 unregister_filesystem(&squashfs_fs_type); 467 unregister_filesystem(&squashfs_fs_type);
462 destroy_inodecache(); 468 destroy_inodecache();
469 squashfs_destroy_read_wq();
463} 470}
464 471
465 472
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index c609624e4b8a..14cd373e1897 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -55,7 +55,7 @@ static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk,
55 struct comp_opts *opts; 55 struct comp_opts *opts;
56 int err = 0, n; 56 int err = 0, n;
57 57
58 opts = kmalloc(sizeof(*opts), GFP_KERNEL); 58 opts = kmalloc(sizeof(*opts), GFP_ATOMIC);
59 if (opts == NULL) { 59 if (opts == NULL) {
60 err = -ENOMEM; 60 err = -ENOMEM;
61 goto out2; 61 goto out2;
@@ -136,6 +136,7 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
136 enum xz_ret xz_err; 136 enum xz_ret xz_err;
137 int avail, total = 0, k = 0; 137 int avail, total = 0, k = 0;
138 struct squashfs_xz *stream = strm; 138 struct squashfs_xz *stream = strm;
139 void *buf = NULL;
139 140
140 xz_dec_reset(stream->state); 141 xz_dec_reset(stream->state);
141 stream->buf.in_pos = 0; 142 stream->buf.in_pos = 0;
@@ -156,12 +157,20 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
156 157
157 if (stream->buf.out_pos == stream->buf.out_size) { 158 if (stream->buf.out_pos == stream->buf.out_size) {
158 stream->buf.out = squashfs_next_page(output); 159 stream->buf.out = squashfs_next_page(output);
159 if (stream->buf.out != NULL) { 160 if (!IS_ERR(stream->buf.out)) {
160 stream->buf.out_pos = 0; 161 stream->buf.out_pos = 0;
161 total += PAGE_CACHE_SIZE; 162 total += PAGE_CACHE_SIZE;
162 } 163 }
163 } 164 }
164 165
166 if (!stream->buf.out) {
167 if (!buf) {
168 buf = kmalloc(PAGE_CACHE_SIZE, GFP_ATOMIC);
169 if (!buf)
170 goto out;
171 }
172 stream->buf.out = buf;
173 }
165 xz_err = xz_dec_run(stream->state, &stream->buf); 174 xz_err = xz_dec_run(stream->state, &stream->buf);
166 175
167 if (stream->buf.in_pos == stream->buf.in_size && k < b) 176 if (stream->buf.in_pos == stream->buf.in_size && k < b)
@@ -173,11 +182,13 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
173 if (xz_err != XZ_STREAM_END || k < b) 182 if (xz_err != XZ_STREAM_END || k < b)
174 goto out; 183 goto out;
175 184
185 kfree(buf);
176 return total + stream->buf.out_pos; 186 return total + stream->buf.out_pos;
177 187
178out: 188out:
179 for (; k < b; k++) 189 for (; k < b; k++)
180 put_bh(bh[k]); 190 put_bh(bh[k]);
191 kfree(buf);
181 192
182 return -EIO; 193 return -EIO;
183} 194}
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 8727caba6882..09c892b5308e 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -66,6 +66,7 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
66 struct buffer_head **bh, int b, int offset, int length, 66 struct buffer_head **bh, int b, int offset, int length,
67 struct squashfs_page_actor *output) 67 struct squashfs_page_actor *output)
68{ 68{
69 void *buf = NULL;
69 int zlib_err, zlib_init = 0, k = 0; 70 int zlib_err, zlib_init = 0, k = 0;
70 z_stream *stream = strm; 71 z_stream *stream = strm;
71 72
@@ -84,10 +85,19 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
84 85
85 if (stream->avail_out == 0) { 86 if (stream->avail_out == 0) {
86 stream->next_out = squashfs_next_page(output); 87 stream->next_out = squashfs_next_page(output);
87 if (stream->next_out != NULL) 88 if (!IS_ERR(stream->next_out))
88 stream->avail_out = PAGE_CACHE_SIZE; 89 stream->avail_out = PAGE_CACHE_SIZE;
89 } 90 }
90 91
92 if (!stream->next_out) {
93 if (!buf) {
94 buf = kmalloc(PAGE_CACHE_SIZE, GFP_ATOMIC);
95 if (!buf)
96 goto out;
97 }
98 stream->next_out = buf;
99 }
100
91 if (!zlib_init) { 101 if (!zlib_init) {
92 zlib_err = zlib_inflateInit(stream); 102 zlib_err = zlib_inflateInit(stream);
93 if (zlib_err != Z_OK) { 103 if (zlib_err != Z_OK) {
@@ -115,11 +125,13 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
115 if (k < b) 125 if (k < b)
116 goto out; 126 goto out;
117 127
128 kfree(buf);
118 return stream->total_out; 129 return stream->total_out;
119 130
120out: 131out:
121 for (; k < b; k++) 132 for (; k < b; k++)
122 put_bh(bh[k]); 133 put_bh(bh[k]);
134 kfree(buf);
123 135
124 return -EIO; 136 return -EIO;
125} 137}
diff --git a/fs/stat.c b/fs/stat.c
index d4a61d8dc021..004dd77c3b93 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -31,7 +31,7 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
31 stat->atime = inode->i_atime; 31 stat->atime = inode->i_atime;
32 stat->mtime = inode->i_mtime; 32 stat->mtime = inode->i_mtime;
33 stat->ctime = inode->i_ctime; 33 stat->ctime = inode->i_ctime;
34 stat->blksize = (1 << inode->i_blkbits); 34 stat->blksize = i_blocksize(inode);
35 stat->blocks = inode->i_blocks; 35 stat->blocks = inode->i_blocks;
36} 36}
37 37
@@ -454,6 +454,7 @@ void __inode_add_bytes(struct inode *inode, loff_t bytes)
454 inode->i_bytes -= 512; 454 inode->i_bytes -= 512;
455 } 455 }
456} 456}
457EXPORT_SYMBOL(__inode_add_bytes);
457 458
458void inode_add_bytes(struct inode *inode, loff_t bytes) 459void inode_add_bytes(struct inode *inode, loff_t bytes)
459{ 460{
diff --git a/fs/super.c b/fs/super.c
index b938b14f6041..c96434ea71e2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -703,7 +703,8 @@ rescan:
703} 703}
704 704
705/** 705/**
706 * do_remount_sb - asks filesystem to change mount options. 706 * do_remount_sb2 - asks filesystem to change mount options.
707 * @mnt: mount we are looking at
707 * @sb: superblock in question 708 * @sb: superblock in question
708 * @flags: numeric part of options 709 * @flags: numeric part of options
709 * @data: the rest of options 710 * @data: the rest of options
@@ -711,7 +712,7 @@ rescan:
711 * 712 *
712 * Alters the mount options of a mounted file system. 713 * Alters the mount options of a mounted file system.
713 */ 714 */
714int do_remount_sb(struct super_block *sb, int flags, void *data, int force) 715int do_remount_sb2(struct vfsmount *mnt, struct super_block *sb, int flags, void *data, int force)
715{ 716{
716 int retval; 717 int retval;
717 int remount_ro; 718 int remount_ro;
@@ -753,7 +754,16 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
753 } 754 }
754 } 755 }
755 756
756 if (sb->s_op->remount_fs) { 757 if (mnt && sb->s_op->remount_fs2) {
758 retval = sb->s_op->remount_fs2(mnt, sb, &flags, data);
759 if (retval) {
760 if (!force)
761 goto cancel_readonly;
762 /* If forced remount, go ahead despite any errors */
763 WARN(1, "forced remount of a %s fs returned %i\n",
764 sb->s_type->name, retval);
765 }
766 } else if (sb->s_op->remount_fs) {
757 retval = sb->s_op->remount_fs(sb, &flags, data); 767 retval = sb->s_op->remount_fs(sb, &flags, data);
758 if (retval) { 768 if (retval) {
759 if (!force) 769 if (!force)
@@ -785,6 +795,11 @@ cancel_readonly:
785 return retval; 795 return retval;
786} 796}
787 797
798int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
799{
800 return do_remount_sb2(NULL, sb, flags, data, force);
801}
802
788static void do_emergency_remount(struct work_struct *work) 803static void do_emergency_remount(struct work_struct *work)
789{ 804{
790 struct super_block *sb, *p = NULL; 805 struct super_block *sb, *p = NULL;
@@ -1104,7 +1119,7 @@ struct dentry *mount_single(struct file_system_type *fs_type,
1104EXPORT_SYMBOL(mount_single); 1119EXPORT_SYMBOL(mount_single);
1105 1120
1106struct dentry * 1121struct dentry *
1107mount_fs(struct file_system_type *type, int flags, const char *name, void *data) 1122mount_fs(struct file_system_type *type, int flags, const char *name, struct vfsmount *mnt, void *data)
1108{ 1123{
1109 struct dentry *root; 1124 struct dentry *root;
1110 struct super_block *sb; 1125 struct super_block *sb;
@@ -1121,7 +1136,10 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
1121 goto out_free_secdata; 1136 goto out_free_secdata;
1122 } 1137 }
1123 1138
1124 root = type->mount(type, flags, name, data); 1139 if (type->mount2)
1140 root = type->mount2(mnt, type, flags, name, data);
1141 else
1142 root = type->mount(type, flags, name, data);
1125 if (IS_ERR(root)) { 1143 if (IS_ERR(root)) {
1126 error = PTR_ERR(root); 1144 error = PTR_ERR(root);
1127 goto out_free_secdata; 1145 goto out_free_secdata;
diff --git a/fs/sync.c b/fs/sync.c
index dd5d1711c7ac..452179e31c39 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -218,6 +218,7 @@ static int do_fsync(unsigned int fd, int datasync)
218 if (f.file) { 218 if (f.file) {
219 ret = vfs_fsync(f.file, datasync); 219 ret = vfs_fsync(f.file, datasync);
220 fdput(f); 220 fdput(f);
221 inc_syscfs(current);
221 } 222 }
222 return ret; 223 return ret;
223} 224}
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index b803213d1307..39c75a86c67f 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -108,7 +108,7 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf,
108{ 108{
109 const struct sysfs_ops *ops = sysfs_file_ops(of->kn); 109 const struct sysfs_ops *ops = sysfs_file_ops(of->kn);
110 struct kobject *kobj = of->kn->parent->priv; 110 struct kobject *kobj = of->kn->parent->priv;
111 size_t len; 111 ssize_t len;
112 112
113 /* 113 /*
114 * If buf != of->prealloc_buf, we don't know how 114 * If buf != of->prealloc_buf, we don't know how
@@ -117,13 +117,15 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf,
117 if (WARN_ON_ONCE(buf != of->prealloc_buf)) 117 if (WARN_ON_ONCE(buf != of->prealloc_buf))
118 return 0; 118 return 0;
119 len = ops->show(kobj, of->kn->priv, buf); 119 len = ops->show(kobj, of->kn->priv, buf);
120 if (len < 0)
121 return len;
120 if (pos) { 122 if (pos) {
121 if (len <= pos) 123 if (len <= pos)
122 return 0; 124 return 0;
123 len -= pos; 125 len -= pos;
124 memmove(buf, buf + pos, len); 126 memmove(buf, buf + pos, len);
125 } 127 }
126 return min(count, len); 128 return min_t(ssize_t, count, len);
127} 129}
128 130
129/* kernfs write callback for regular sysfs files */ 131/* kernfs write callback for regular sysfs files */
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 053818dd6c18..1327a02ec778 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -40,6 +40,7 @@ struct timerfd_ctx {
40 short unsigned settime_flags; /* to show in fdinfo */ 40 short unsigned settime_flags; /* to show in fdinfo */
41 struct rcu_head rcu; 41 struct rcu_head rcu;
42 struct list_head clist; 42 struct list_head clist;
43 spinlock_t cancel_lock;
43 bool might_cancel; 44 bool might_cancel;
44}; 45};
45 46
@@ -112,7 +113,7 @@ void timerfd_clock_was_set(void)
112 rcu_read_unlock(); 113 rcu_read_unlock();
113} 114}
114 115
115static void timerfd_remove_cancel(struct timerfd_ctx *ctx) 116static void __timerfd_remove_cancel(struct timerfd_ctx *ctx)
116{ 117{
117 if (ctx->might_cancel) { 118 if (ctx->might_cancel) {
118 ctx->might_cancel = false; 119 ctx->might_cancel = false;
@@ -122,6 +123,13 @@ static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
122 } 123 }
123} 124}
124 125
126static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
127{
128 spin_lock(&ctx->cancel_lock);
129 __timerfd_remove_cancel(ctx);
130 spin_unlock(&ctx->cancel_lock);
131}
132
125static bool timerfd_canceled(struct timerfd_ctx *ctx) 133static bool timerfd_canceled(struct timerfd_ctx *ctx)
126{ 134{
127 if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX) 135 if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
@@ -132,6 +140,7 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx)
132 140
133static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) 141static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
134{ 142{
143 spin_lock(&ctx->cancel_lock);
135 if ((ctx->clockid == CLOCK_REALTIME || 144 if ((ctx->clockid == CLOCK_REALTIME ||
136 ctx->clockid == CLOCK_REALTIME_ALARM) && 145 ctx->clockid == CLOCK_REALTIME_ALARM) &&
137 (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { 146 (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) {
@@ -141,9 +150,10 @@ static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
141 list_add_rcu(&ctx->clist, &cancel_list); 150 list_add_rcu(&ctx->clist, &cancel_list);
142 spin_unlock(&cancel_lock); 151 spin_unlock(&cancel_lock);
143 } 152 }
144 } else if (ctx->might_cancel) { 153 } else {
145 timerfd_remove_cancel(ctx); 154 __timerfd_remove_cancel(ctx);
146 } 155 }
156 spin_unlock(&ctx->cancel_lock);
147} 157}
148 158
149static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) 159static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
@@ -395,6 +405,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
395 return -ENOMEM; 405 return -ENOMEM;
396 406
397 init_waitqueue_head(&ctx->wqh); 407 init_waitqueue_head(&ctx->wqh);
408 spin_lock_init(&ctx->cancel_lock);
398 ctx->clockid = clockid; 409 ctx->clockid = clockid;
399 410
400 if (isalarm(ctx)) 411 if (isalarm(ctx))
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 566df9b5a6cb..0e659d9c69a1 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1206,7 +1206,7 @@ int udf_setsize(struct inode *inode, loff_t newsize)
1206{ 1206{
1207 int err; 1207 int err;
1208 struct udf_inode_info *iinfo; 1208 struct udf_inode_info *iinfo;
1209 int bsize = 1 << inode->i_blkbits; 1209 int bsize = i_blocksize(inode);
1210 1210
1211 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 1211 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1212 S_ISLNK(inode->i_mode))) 1212 S_ISLNK(inode->i_mode)))
@@ -1235,8 +1235,8 @@ int udf_setsize(struct inode *inode, loff_t newsize)
1235 return err; 1235 return err;
1236 } 1236 }
1237set_size: 1237set_size:
1238 truncate_setsize(inode, newsize);
1239 up_write(&iinfo->i_data_sem); 1238 up_write(&iinfo->i_data_sem);
1239 truncate_setsize(inode, newsize);
1240 } else { 1240 } else {
1241 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 1241 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
1242 down_write(&iinfo->i_data_sem); 1242 down_write(&iinfo->i_data_sem);
@@ -1253,9 +1253,9 @@ set_size:
1253 udf_get_block); 1253 udf_get_block);
1254 if (err) 1254 if (err)
1255 return err; 1255 return err;
1256 truncate_setsize(inode, newsize);
1256 down_write(&iinfo->i_data_sem); 1257 down_write(&iinfo->i_data_sem);
1257 udf_clear_extent_cache(inode); 1258 udf_clear_extent_cache(inode);
1258 truncate_setsize(inode, newsize);
1259 udf_truncate_extents(inode); 1259 udf_truncate_extents(inode);
1260 up_write(&iinfo->i_data_sem); 1260 up_write(&iinfo->i_data_sem);
1261 } 1261 }
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index dc5fae601c24..637e17cb0edd 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -81,7 +81,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
81 ufs_error (sb, "ufs_free_fragments", 81 ufs_error (sb, "ufs_free_fragments",
82 "bit already cleared for fragment %u", i); 82 "bit already cleared for fragment %u", i);
83 } 83 }
84 84
85 inode_sub_bytes(inode, count << uspi->s_fshift);
85 fs32_add(sb, &ucg->cg_cs.cs_nffree, count); 86 fs32_add(sb, &ucg->cg_cs.cs_nffree, count);
86 uspi->cs_total.cs_nffree += count; 87 uspi->cs_total.cs_nffree += count;
87 fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count); 88 fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count);
@@ -183,6 +184,7 @@ do_more:
183 ufs_error(sb, "ufs_free_blocks", "freeing free fragment"); 184 ufs_error(sb, "ufs_free_blocks", "freeing free fragment");
184 } 185 }
185 ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); 186 ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
187 inode_sub_bytes(inode, uspi->s_fpb << uspi->s_fshift);
186 if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) 188 if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
187 ufs_clusteracct (sb, ucpi, blkno, 1); 189 ufs_clusteracct (sb, ucpi, blkno, 1);
188 190
@@ -494,6 +496,20 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
494 return 0; 496 return 0;
495} 497}
496 498
499static bool try_add_frags(struct inode *inode, unsigned frags)
500{
501 unsigned size = frags * i_blocksize(inode);
502 spin_lock(&inode->i_lock);
503 __inode_add_bytes(inode, size);
504 if (unlikely((u32)inode->i_blocks != inode->i_blocks)) {
505 __inode_sub_bytes(inode, size);
506 spin_unlock(&inode->i_lock);
507 return false;
508 }
509 spin_unlock(&inode->i_lock);
510 return true;
511}
512
497static u64 ufs_add_fragments(struct inode *inode, u64 fragment, 513static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
498 unsigned oldcount, unsigned newcount) 514 unsigned oldcount, unsigned newcount)
499{ 515{
@@ -530,6 +546,9 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
530 for (i = oldcount; i < newcount; i++) 546 for (i = oldcount; i < newcount; i++)
531 if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i)) 547 if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i))
532 return 0; 548 return 0;
549
550 if (!try_add_frags(inode, count))
551 return 0;
533 /* 552 /*
534 * Block can be extended 553 * Block can be extended
535 */ 554 */
@@ -647,6 +666,7 @@ cg_found:
647 ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i); 666 ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i);
648 i = uspi->s_fpb - count; 667 i = uspi->s_fpb - count;
649 668
669 inode_sub_bytes(inode, i << uspi->s_fshift);
650 fs32_add(sb, &ucg->cg_cs.cs_nffree, i); 670 fs32_add(sb, &ucg->cg_cs.cs_nffree, i);
651 uspi->cs_total.cs_nffree += i; 671 uspi->cs_total.cs_nffree += i;
652 fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, i); 672 fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, i);
@@ -657,6 +677,8 @@ cg_found:
657 result = ufs_bitmap_search (sb, ucpi, goal, allocsize); 677 result = ufs_bitmap_search (sb, ucpi, goal, allocsize);
658 if (result == INVBLOCK) 678 if (result == INVBLOCK)
659 return 0; 679 return 0;
680 if (!try_add_frags(inode, count))
681 return 0;
660 for (i = 0; i < count; i++) 682 for (i = 0; i < count; i++)
661 ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i); 683 ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i);
662 684
@@ -716,6 +738,8 @@ norot:
716 return INVBLOCK; 738 return INVBLOCK;
717 ucpi->c_rotor = result; 739 ucpi->c_rotor = result;
718gotit: 740gotit:
741 if (!try_add_frags(inode, uspi->s_fpb))
742 return 0;
719 blkno = ufs_fragstoblks(result); 743 blkno = ufs_fragstoblks(result);
720 ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); 744 ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
721 if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) 745 if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index a064cf44b143..1f69bb9b1e9d 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -235,7 +235,8 @@ ufs_extend_tail(struct inode *inode, u64 writes_to,
235 235
236 p = ufs_get_direct_data_ptr(uspi, ufsi, block); 236 p = ufs_get_direct_data_ptr(uspi, ufsi, block);
237 tmp = ufs_new_fragments(inode, p, lastfrag, ufs_data_ptr_to_cpu(sb, p), 237 tmp = ufs_new_fragments(inode, p, lastfrag, ufs_data_ptr_to_cpu(sb, p),
238 new_size, err, locked_page); 238 new_size - (lastfrag & uspi->s_fpbmask), err,
239 locked_page);
239 return tmp != 0; 240 return tmp != 0;
240} 241}
241 242
@@ -284,7 +285,7 @@ ufs_inode_getfrag(struct inode *inode, unsigned index,
284 goal += uspi->s_fpb; 285 goal += uspi->s_fpb;
285 } 286 }
286 tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment), 287 tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment),
287 goal, uspi->s_fpb, err, locked_page); 288 goal, nfrags, err, locked_page);
288 289
289 if (!tmp) { 290 if (!tmp) {
290 *err = -ENOSPC; 291 *err = -ENOSPC;
@@ -402,7 +403,9 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
402 403
403 if (!create) { 404 if (!create) {
404 phys64 = ufs_frag_map(inode, offsets, depth); 405 phys64 = ufs_frag_map(inode, offsets, depth);
405 goto out; 406 if (phys64)
407 map_bh(bh_result, sb, phys64 + frag);
408 return 0;
406 } 409 }
407 410
408 /* This code entered only while writing ....? */ 411 /* This code entered only while writing ....? */
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index f6390eec02ca..10f364490833 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -746,6 +746,23 @@ static void ufs_put_super(struct super_block *sb)
746 return; 746 return;
747} 747}
748 748
749static u64 ufs_max_bytes(struct super_block *sb)
750{
751 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
752 int bits = uspi->s_apbshift;
753 u64 res;
754
755 if (bits > 21)
756 res = ~0ULL;
757 else
758 res = UFS_NDADDR + (1LL << bits) + (1LL << (2*bits)) +
759 (1LL << (3*bits));
760
761 if (res >= (MAX_LFS_FILESIZE >> uspi->s_bshift))
762 return MAX_LFS_FILESIZE;
763 return res << uspi->s_bshift;
764}
765
749static int ufs_fill_super(struct super_block *sb, void *data, int silent) 766static int ufs_fill_super(struct super_block *sb, void *data, int silent)
750{ 767{
751 struct ufs_sb_info * sbi; 768 struct ufs_sb_info * sbi;
@@ -1212,6 +1229,7 @@ magic_found:
1212 "fast symlink size (%u)\n", uspi->s_maxsymlinklen); 1229 "fast symlink size (%u)\n", uspi->s_maxsymlinklen);
1213 uspi->s_maxsymlinklen = maxsymlen; 1230 uspi->s_maxsymlinklen = maxsymlen;
1214 } 1231 }
1232 sb->s_maxbytes = ufs_max_bytes(sb);
1215 sb->s_max_links = UFS_LINK_MAX; 1233 sb->s_max_links = UFS_LINK_MAX;
1216 1234
1217 inode = ufs_iget(sb, UFS_ROOTINO); 1235 inode = ufs_iget(sb, UFS_ROOTINO);
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 954175928240..3f9463f8cf2f 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -473,15 +473,19 @@ static inline unsigned _ubh_find_last_zero_bit_(
473static inline int _ubh_isblockset_(struct ufs_sb_private_info * uspi, 473static inline int _ubh_isblockset_(struct ufs_sb_private_info * uspi,
474 struct ufs_buffer_head * ubh, unsigned begin, unsigned block) 474 struct ufs_buffer_head * ubh, unsigned begin, unsigned block)
475{ 475{
476 u8 mask;
476 switch (uspi->s_fpb) { 477 switch (uspi->s_fpb) {
477 case 8: 478 case 8:
478 return (*ubh_get_addr (ubh, begin + block) == 0xff); 479 return (*ubh_get_addr (ubh, begin + block) == 0xff);
479 case 4: 480 case 4:
480 return (*ubh_get_addr (ubh, begin + (block >> 1)) == (0x0f << ((block & 0x01) << 2))); 481 mask = 0x0f << ((block & 0x01) << 2);
482 return (*ubh_get_addr (ubh, begin + (block >> 1)) & mask) == mask;
481 case 2: 483 case 2:
482 return (*ubh_get_addr (ubh, begin + (block >> 2)) == (0x03 << ((block & 0x03) << 1))); 484 mask = 0x03 << ((block & 0x03) << 1);
485 return (*ubh_get_addr (ubh, begin + (block >> 2)) & mask) == mask;
483 case 1: 486 case 1:
484 return (*ubh_get_addr (ubh, begin + (block >> 3)) == (0x01 << (block & 0x07))); 487 mask = 0x01 << (block & 0x07);
488 return (*ubh_get_addr (ubh, begin + (block >> 3)) & mask) == mask;
485 } 489 }
486 return 0; 490 return 0;
487} 491}
diff --git a/fs/utimes.c b/fs/utimes.c
index cb771c30d102..a35e909cf8e3 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -91,7 +91,7 @@ static int utimes_common(struct path *path, struct timespec *times)
91 } 91 }
92retry_deleg: 92retry_deleg:
93 mutex_lock(&inode->i_mutex); 93 mutex_lock(&inode->i_mutex);
94 error = notify_change(path->dentry, &newattrs, &delegated_inode); 94 error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode);
95 mutex_unlock(&inode->i_mutex); 95 mutex_unlock(&inode->i_mutex);
96 if (delegated_inode) { 96 if (delegated_inode) {
97 error = break_deleg_wait(&delegated_inode); 97 error = break_deleg_wait(&delegated_inode);
diff --git a/fs/xattr.c b/fs/xattr.c
index 9b932b95d74e..f0da9d24e9ca 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -442,7 +442,7 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
442 size = XATTR_SIZE_MAX; 442 size = XATTR_SIZE_MAX;
443 kvalue = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); 443 kvalue = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
444 if (!kvalue) { 444 if (!kvalue) {
445 vvalue = vmalloc(size); 445 vvalue = vzalloc(size);
446 if (!vvalue) 446 if (!vvalue)
447 return -ENOMEM; 447 return -ENOMEM;
448 kvalue = vvalue; 448 kvalue = vvalue;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 119c2422aac7..75884aecf920 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -2179,8 +2179,10 @@ xfs_bmap_add_extent_delay_real(
2179 } 2179 }
2180 temp = xfs_bmap_worst_indlen(bma->ip, temp); 2180 temp = xfs_bmap_worst_indlen(bma->ip, temp);
2181 temp2 = xfs_bmap_worst_indlen(bma->ip, temp2); 2181 temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
2182 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - 2182 diff = (int)(temp + temp2 -
2183 (bma->cur ? bma->cur->bc_private.b.allocated : 0)); 2183 (startblockval(PREV.br_startblock) -
2184 (bma->cur ?
2185 bma->cur->bc_private.b.allocated : 0)));
2184 if (diff > 0) { 2186 if (diff > 0) {
2185 error = xfs_mod_fdblocks(bma->ip->i_mount, 2187 error = xfs_mod_fdblocks(bma->ip->i_mount,
2186 -((int64_t)diff), false); 2188 -((int64_t)diff), false);
@@ -2232,7 +2234,6 @@ xfs_bmap_add_extent_delay_real(
2232 temp = da_new; 2234 temp = da_new;
2233 if (bma->cur) 2235 if (bma->cur)
2234 temp += bma->cur->bc_private.b.allocated; 2236 temp += bma->cur->bc_private.b.allocated;
2235 ASSERT(temp <= da_old);
2236 if (temp < da_old) 2237 if (temp < da_old)
2237 xfs_mod_fdblocks(bma->ip->i_mount, 2238 xfs_mod_fdblocks(bma->ip->i_mount,
2238 (int64_t)(da_old - temp), false); 2239 (int64_t)(da_old - temp), false);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index af1bbee5586e..28bc5e78b110 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4064,7 +4064,7 @@ xfs_btree_change_owner(
4064 xfs_btree_readahead_ptr(cur, ptr, 1); 4064 xfs_btree_readahead_ptr(cur, ptr, 1);
4065 4065
4066 /* save for the next iteration of the loop */ 4066 /* save for the next iteration of the loop */
4067 lptr = *ptr; 4067 xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
4068 } 4068 }
4069 4069
4070 /* for each buffer in the level */ 4070 /* for each buffer in the level */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 1aabfda669b0..7183b7ea065b 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -299,6 +299,14 @@ xfs_dinode_verify(
299 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) 299 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
300 return false; 300 return false;
301 301
302 /* don't allow invalid i_size */
303 if (be64_to_cpu(dip->di_size) & (1ULL << 63))
304 return false;
305
306 /* No zero-length symlinks. */
307 if (S_ISLNK(be16_to_cpu(dip->di_mode)) && dip->di_size == 0)
308 return false;
309
302 /* only version 3 or greater inodes are extensively verified here */ 310 /* only version 3 or greater inodes are extensively verified here */
303 if (dip->di_version < 3) 311 if (dip->di_version < 3)
304 return true; 312 return true;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 29e7e5dd5178..a9063ac50c4e 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -288,7 +288,7 @@ xfs_map_blocks(
288{ 288{
289 struct xfs_inode *ip = XFS_I(inode); 289 struct xfs_inode *ip = XFS_I(inode);
290 struct xfs_mount *mp = ip->i_mount; 290 struct xfs_mount *mp = ip->i_mount;
291 ssize_t count = 1 << inode->i_blkbits; 291 ssize_t count = i_blocksize(inode);
292 xfs_fileoff_t offset_fsb, end_fsb; 292 xfs_fileoff_t offset_fsb, end_fsb;
293 int error = 0; 293 int error = 0;
294 int bmapi_flags = XFS_BMAPI_ENTIRE; 294 int bmapi_flags = XFS_BMAPI_ENTIRE;
@@ -921,7 +921,7 @@ xfs_aops_discard_page(
921 break; 921 break;
922 } 922 }
923next_buffer: 923next_buffer:
924 offset += 1 << inode->i_blkbits; 924 offset += i_blocksize(inode);
925 925
926 } while ((bh = bh->b_this_page) != head); 926 } while ((bh = bh->b_this_page) != head);
927 927
@@ -1363,7 +1363,7 @@ xfs_map_trim_size(
1363 offset + mapping_size >= i_size_read(inode)) { 1363 offset + mapping_size >= i_size_read(inode)) {
1364 /* limit mapping to block that spans EOF */ 1364 /* limit mapping to block that spans EOF */
1365 mapping_size = roundup_64(i_size_read(inode) - offset, 1365 mapping_size = roundup_64(i_size_read(inode) - offset,
1366 1 << inode->i_blkbits); 1366 i_blocksize(inode));
1367 } 1367 }
1368 if (mapping_size > LONG_MAX) 1368 if (mapping_size > LONG_MAX)
1369 mapping_size = LONG_MAX; 1369 mapping_size = LONG_MAX;
@@ -1395,7 +1395,7 @@ __xfs_get_blocks(
1395 return -EIO; 1395 return -EIO;
1396 1396
1397 offset = (xfs_off_t)iblock << inode->i_blkbits; 1397 offset = (xfs_off_t)iblock << inode->i_blkbits;
1398 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1398 ASSERT(bh_result->b_size >= i_blocksize(inode));
1399 size = bh_result->b_size; 1399 size = bh_result->b_size;
1400 1400
1401 if (!create && direct && offset >= i_size_read(inode)) 1401 if (!create && direct && offset >= i_size_read(inode))
@@ -1426,6 +1426,26 @@ __xfs_get_blocks(
1426 if (error) 1426 if (error)
1427 goto out_unlock; 1427 goto out_unlock;
1428 1428
1429 /*
1430 * The only time we can ever safely find delalloc blocks on direct I/O
1431 * is a dio write to post-eof speculative preallocation. All other
1432 * scenarios are indicative of a problem or misuse (such as mixing
1433 * direct and mapped I/O).
1434 *
1435 * The file may be unmapped by the time we get here so we cannot
1436 * reliably fail the I/O based on mapping. Instead, fail the I/O if this
1437 * is a read or a write within eof. Otherwise, carry on but warn as a
1438 * precuation if the file happens to be mapped.
1439 */
1440 if (direct && imap.br_startblock == DELAYSTARTBLOCK) {
1441 if (!create || offset < i_size_read(VFS_I(ip))) {
1442 WARN_ON_ONCE(1);
1443 error = -EIO;
1444 goto out_unlock;
1445 }
1446 WARN_ON_ONCE(mapping_mapped(VFS_I(ip)->i_mapping));
1447 }
1448
1429 /* for DAX, we convert unwritten extents directly */ 1449 /* for DAX, we convert unwritten extents directly */
1430 if (create && 1450 if (create &&
1431 (!nimaps || 1451 (!nimaps ||
@@ -1525,7 +1545,6 @@ __xfs_get_blocks(
1525 set_buffer_new(bh_result); 1545 set_buffer_new(bh_result);
1526 1546
1527 if (imap.br_startblock == DELAYSTARTBLOCK) { 1547 if (imap.br_startblock == DELAYSTARTBLOCK) {
1528 BUG_ON(direct);
1529 if (create) { 1548 if (create) {
1530 set_buffer_uptodate(bh_result); 1549 set_buffer_uptodate(bh_result);
1531 set_buffer_mapped(bh_result); 1550 set_buffer_mapped(bh_result);
@@ -1968,7 +1987,7 @@ xfs_vm_set_page_dirty(
1968 if (offset < end_offset) 1987 if (offset < end_offset)
1969 set_buffer_dirty(bh); 1988 set_buffer_dirty(bh);
1970 bh = bh->b_this_page; 1989 bh = bh->b_this_page;
1971 offset += 1 << inode->i_blkbits; 1990 offset += i_blocksize(inode);
1972 } while (bh != head); 1991 } while (bh != head);
1973 } 1992 }
1974 /* 1993 /*
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index dd4824589470..234331227c0c 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -112,6 +112,7 @@ typedef struct attrlist_cursor_kern {
112 *========================================================================*/ 112 *========================================================================*/
113 113
114 114
115/* Return 0 on success, or -errno; other state communicated via *context */
115typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int, 116typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
116 unsigned char *, int, int, unsigned char *); 117 unsigned char *, int, int, unsigned char *);
117 118
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 4fa14820e2e2..c8be331a3196 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -108,16 +108,14 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
108 (int)sfe->namelen, 108 (int)sfe->namelen,
109 (int)sfe->valuelen, 109 (int)sfe->valuelen,
110 &sfe->nameval[sfe->namelen]); 110 &sfe->nameval[sfe->namelen]);
111 111 if (error)
112 return error;
112 /* 113 /*
113 * Either search callback finished early or 114 * Either search callback finished early or
114 * didn't fit it all in the buffer after all. 115 * didn't fit it all in the buffer after all.
115 */ 116 */
116 if (context->seen_enough) 117 if (context->seen_enough)
117 break; 118 break;
118
119 if (error)
120 return error;
121 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 119 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
122 } 120 }
123 trace_xfs_attr_list_sf_all(context); 121 trace_xfs_attr_list_sf_all(context);
@@ -581,7 +579,7 @@ xfs_attr_put_listent(
581 trace_xfs_attr_list_full(context); 579 trace_xfs_attr_list_full(context);
582 alist->al_more = 1; 580 alist->al_more = 1;
583 context->seen_enough = 1; 581 context->seen_enough = 1;
584 return 1; 582 return 0;
585 } 583 }
586 584
587 aep = (attrlist_ent_t *)&context->alist[context->firstu]; 585 aep = (attrlist_ent_t *)&context->alist[context->firstu];
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index dbae6490a79a..863e1bff403b 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -682,7 +682,7 @@ xfs_getbmap(
682 * extents. 682 * extents.
683 */ 683 */
684 if (map[i].br_startblock == DELAYSTARTBLOCK && 684 if (map[i].br_startblock == DELAYSTARTBLOCK &&
685 map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) 685 map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
686 ASSERT((iflags & BMV_IF_DELALLOC) != 0); 686 ASSERT((iflags & BMV_IF_DELALLOC) != 0);
687 687
688 if (map[i].br_startblock == HOLESTARTBLOCK && 688 if (map[i].br_startblock == HOLESTARTBLOCK &&
@@ -1713,6 +1713,7 @@ xfs_swap_extents(
1713 xfs_trans_t *tp; 1713 xfs_trans_t *tp;
1714 xfs_bstat_t *sbp = &sxp->sx_stat; 1714 xfs_bstat_t *sbp = &sxp->sx_stat;
1715 xfs_ifork_t *tempifp, *ifp, *tifp; 1715 xfs_ifork_t *tempifp, *ifp, *tifp;
1716 xfs_extnum_t nextents;
1716 int src_log_flags, target_log_flags; 1717 int src_log_flags, target_log_flags;
1717 int error = 0; 1718 int error = 0;
1718 int aforkblks = 0; 1719 int aforkblks = 0;
@@ -1899,7 +1900,8 @@ xfs_swap_extents(
1899 * pointer. Otherwise it's already NULL or 1900 * pointer. Otherwise it's already NULL or
1900 * pointing to the extent. 1901 * pointing to the extent.
1901 */ 1902 */
1902 if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { 1903 nextents = ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1904 if (nextents <= XFS_INLINE_EXTS) {
1903 ifp->if_u1.if_extents = 1905 ifp->if_u1.if_extents =
1904 ifp->if_u2.if_inline_ext; 1906 ifp->if_u2.if_inline_ext;
1905 } 1907 }
@@ -1918,7 +1920,8 @@ xfs_swap_extents(
1918 * pointer. Otherwise it's already NULL or 1920 * pointer. Otherwise it's already NULL or
1919 * pointing to the extent. 1921 * pointing to the extent.
1920 */ 1922 */
1921 if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { 1923 nextents = tip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1924 if (nextents <= XFS_INLINE_EXTS) {
1922 tifp->if_u1.if_extents = 1925 tifp->if_u1.if_extents =
1923 tifp->if_u2.if_inline_ext; 1926 tifp->if_u2.if_inline_ext;
1924 } 1927 }
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index eb1b8c8acfcb..dcb70969ff1c 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -375,6 +375,7 @@ retry:
375out_free_pages: 375out_free_pages:
376 for (i = 0; i < bp->b_page_count; i++) 376 for (i = 0; i < bp->b_page_count; i++)
377 __free_page(bp->b_pages[i]); 377 __free_page(bp->b_pages[i]);
378 bp->b_flags &= ~_XBF_PAGES;
378 return error; 379 return error;
379} 380}
380 381
@@ -978,6 +979,8 @@ void
978xfs_buf_unlock( 979xfs_buf_unlock(
979 struct xfs_buf *bp) 980 struct xfs_buf *bp)
980{ 981{
982 ASSERT(xfs_buf_islocked(bp));
983
981 XB_CLEAR_OWNER(bp); 984 XB_CLEAR_OWNER(bp);
982 up(&bp->b_sema); 985 up(&bp->b_sema);
983 986
@@ -1712,6 +1715,28 @@ error:
1712} 1715}
1713 1716
1714/* 1717/*
1718 * Cancel a delayed write list.
1719 *
1720 * Remove each buffer from the list, clear the delwri queue flag and drop the
1721 * associated buffer reference.
1722 */
1723void
1724xfs_buf_delwri_cancel(
1725 struct list_head *list)
1726{
1727 struct xfs_buf *bp;
1728
1729 while (!list_empty(list)) {
1730 bp = list_first_entry(list, struct xfs_buf, b_list);
1731
1732 xfs_buf_lock(bp);
1733 bp->b_flags &= ~_XBF_DELWRI_Q;
1734 list_del_init(&bp->b_list);
1735 xfs_buf_relse(bp);
1736 }
1737}
1738
1739/*
1715 * Add a buffer to the delayed write list. 1740 * Add a buffer to the delayed write list.
1716 * 1741 *
1717 * This queues a buffer for writeout if it hasn't already been. Note that 1742 * This queues a buffer for writeout if it hasn't already been. Note that
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index c75721acd867..149bbd451731 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -304,6 +304,7 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
304extern void *xfs_buf_offset(struct xfs_buf *, size_t); 304extern void *xfs_buf_offset(struct xfs_buf *, size_t);
305 305
306/* Delayed Write Buffer Routines */ 306/* Delayed Write Buffer Routines */
307extern void xfs_buf_delwri_cancel(struct list_head *);
307extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); 308extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
308extern int xfs_buf_delwri_submit(struct list_head *); 309extern int xfs_buf_delwri_submit(struct list_head *);
309extern int xfs_buf_delwri_submit_nowait(struct list_head *); 310extern int xfs_buf_delwri_submit_nowait(struct list_head *);
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 642d55d10075..2fbf643fa10a 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -406,6 +406,7 @@ xfs_dir2_leaf_readbuf(
406 406
407 /* 407 /*
408 * Do we need more readahead? 408 * Do we need more readahead?
409 * Each loop tries to process 1 full dir blk; last may be partial.
409 */ 410 */
410 blk_start_plug(&plug); 411 blk_start_plug(&plug);
411 for (mip->ra_index = mip->ra_offset = i = 0; 412 for (mip->ra_index = mip->ra_offset = i = 0;
@@ -416,7 +417,8 @@ xfs_dir2_leaf_readbuf(
416 * Read-ahead a contiguous directory block. 417 * Read-ahead a contiguous directory block.
417 */ 418 */
418 if (i > mip->ra_current && 419 if (i > mip->ra_current &&
419 map[mip->ra_index].br_blockcount >= geo->fsbcount) { 420 (map[mip->ra_index].br_blockcount - mip->ra_offset) >=
421 geo->fsbcount) {
420 xfs_dir3_data_readahead(dp, 422 xfs_dir3_data_readahead(dp,
421 map[mip->ra_index].br_startoff + mip->ra_offset, 423 map[mip->ra_index].br_startoff + mip->ra_offset,
422 XFS_FSB_TO_DADDR(dp->i_mount, 424 XFS_FSB_TO_DADDR(dp->i_mount,
@@ -437,14 +439,19 @@ xfs_dir2_leaf_readbuf(
437 } 439 }
438 440
439 /* 441 /*
440 * Advance offset through the mapping table. 442 * Advance offset through the mapping table, processing a full
443 * dir block even if it is fragmented into several extents.
444 * But stop if we have consumed all valid mappings, even if
445 * it's not yet a full directory block.
441 */ 446 */
442 for (j = 0; j < geo->fsbcount; j += length ) { 447 for (j = 0;
448 j < geo->fsbcount && mip->ra_index < mip->map_valid;
449 j += length ) {
443 /* 450 /*
444 * The rest of this extent but not more than a dir 451 * The rest of this extent but not more than a dir
445 * block. 452 * block.
446 */ 453 */
447 length = min_t(int, geo->fsbcount, 454 length = min_t(int, geo->fsbcount - j,
448 map[mip->ra_index].br_blockcount - 455 map[mip->ra_index].br_blockcount -
449 mip->ra_offset); 456 mip->ra_offset);
450 mip->ra_offset += length; 457 mip->ra_offset += length;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f5392ab2def1..3dd47307363f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -947,7 +947,7 @@ xfs_file_fallocate(
947 if (error) 947 if (error)
948 goto out_unlock; 948 goto out_unlock;
949 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { 949 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
950 unsigned blksize_mask = (1 << inode->i_blkbits) - 1; 950 unsigned int blksize_mask = i_blocksize(inode) - 1;
951 951
952 if (offset & blksize_mask || len & blksize_mask) { 952 if (offset & blksize_mask || len & blksize_mask) {
953 error = -EINVAL; 953 error = -EINVAL;
@@ -969,7 +969,7 @@ xfs_file_fallocate(
969 if (error) 969 if (error)
970 goto out_unlock; 970 goto out_unlock;
971 } else if (mode & FALLOC_FL_INSERT_RANGE) { 971 } else if (mode & FALLOC_FL_INSERT_RANGE) {
972 unsigned blksize_mask = (1 << inode->i_blkbits) - 1; 972 unsigned int blksize_mask = i_blocksize(inode) - 1;
973 973
974 new_size = i_size_read(inode) + len; 974 new_size = i_size_read(inode) + len;
975 if (offset & blksize_mask || len & blksize_mask) { 975 if (offset & blksize_mask || len & blksize_mask) {
@@ -1208,7 +1208,7 @@ xfs_find_get_desired_pgoff(
1208 unsigned nr_pages; 1208 unsigned nr_pages;
1209 unsigned int i; 1209 unsigned int i;
1210 1210
1211 want = min_t(pgoff_t, end - index, PAGEVEC_SIZE); 1211 want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
1212 nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, 1212 nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
1213 want); 1213 want);
1214 /* 1214 /*
@@ -1235,17 +1235,6 @@ xfs_find_get_desired_pgoff(
1235 break; 1235 break;
1236 } 1236 }
1237 1237
1238 /*
1239 * At lease we found one page. If this is the first time we
1240 * step into the loop, and if the first page index offset is
1241 * greater than the given search offset, a hole was found.
1242 */
1243 if (type == HOLE_OFF && lastoff == startoff &&
1244 lastoff < page_offset(pvec.pages[0])) {
1245 found = true;
1246 break;
1247 }
1248
1249 for (i = 0; i < nr_pages; i++) { 1238 for (i = 0; i < nr_pages; i++) {
1250 struct page *page = pvec.pages[i]; 1239 struct page *page = pvec.pages[i];
1251 loff_t b_offset; 1240 loff_t b_offset;
@@ -1257,18 +1246,18 @@ xfs_find_get_desired_pgoff(
1257 * file mapping. However, page->index will not change 1246 * file mapping. However, page->index will not change
1258 * because we have a reference on the page. 1247 * because we have a reference on the page.
1259 * 1248 *
1260 * Searching done if the page index is out of range. 1249 * If current page offset is beyond where we've ended,
1261 * If the current offset is not reaches the end of 1250 * we've found a hole.
1262 * the specified search range, there should be a hole
1263 * between them.
1264 */ 1251 */
1265 if (page->index > end) { 1252 if (type == HOLE_OFF && lastoff < endoff &&
1266 if (type == HOLE_OFF && lastoff < endoff) { 1253 lastoff < page_offset(pvec.pages[i])) {
1267 *offset = lastoff; 1254 found = true;
1268 found = true; 1255 *offset = lastoff;
1269 }
1270 goto out; 1256 goto out;
1271 } 1257 }
1258 /* Searching done if the page index is out of range. */
1259 if (page->index > end)
1260 goto out;
1272 1261
1273 lock_page(page); 1262 lock_page(page);
1274 /* 1263 /*
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index d7a490f24ead..adbc1f59969a 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -210,14 +210,17 @@ xfs_iget_cache_hit(
210 210
211 error = inode_init_always(mp->m_super, inode); 211 error = inode_init_always(mp->m_super, inode);
212 if (error) { 212 if (error) {
213 bool wake;
213 /* 214 /*
214 * Re-initializing the inode failed, and we are in deep 215 * Re-initializing the inode failed, and we are in deep
215 * trouble. Try to re-add it to the reclaim list. 216 * trouble. Try to re-add it to the reclaim list.
216 */ 217 */
217 rcu_read_lock(); 218 rcu_read_lock();
218 spin_lock(&ip->i_flags_lock); 219 spin_lock(&ip->i_flags_lock);
219 220 wake = !!__xfs_iflags_test(ip, XFS_INEW);
220 ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); 221 ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM);
222 if (wake)
223 wake_up_bit(&ip->i_flags, __XFS_INEW_BIT);
221 ASSERT(ip->i_flags & XFS_IRECLAIMABLE); 224 ASSERT(ip->i_flags & XFS_IRECLAIMABLE);
222 trace_xfs_iget_reclaim_fail(ip); 225 trace_xfs_iget_reclaim_fail(ip);
223 goto out_error; 226 goto out_error;
@@ -363,6 +366,22 @@ out_destroy:
363 return error; 366 return error;
364} 367}
365 368
369static void
370xfs_inew_wait(
371 struct xfs_inode *ip)
372{
373 wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_INEW_BIT);
374 DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT);
375
376 do {
377 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
378 if (!xfs_iflags_test(ip, XFS_INEW))
379 break;
380 schedule();
381 } while (true);
382 finish_wait(wq, &wait.wait);
383}
384
366/* 385/*
367 * Look up an inode by number in the given file system. 386 * Look up an inode by number in the given file system.
368 * The inode is looked up in the cache held in each AG. 387 * The inode is looked up in the cache held in each AG.
@@ -467,9 +486,11 @@ out_error_or_again:
467 486
468STATIC int 487STATIC int
469xfs_inode_ag_walk_grab( 488xfs_inode_ag_walk_grab(
470 struct xfs_inode *ip) 489 struct xfs_inode *ip,
490 int flags)
471{ 491{
472 struct inode *inode = VFS_I(ip); 492 struct inode *inode = VFS_I(ip);
493 bool newinos = !!(flags & XFS_AGITER_INEW_WAIT);
473 494
474 ASSERT(rcu_read_lock_held()); 495 ASSERT(rcu_read_lock_held());
475 496
@@ -487,7 +508,8 @@ xfs_inode_ag_walk_grab(
487 goto out_unlock_noent; 508 goto out_unlock_noent;
488 509
489 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ 510 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
490 if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) 511 if ((!newinos && __xfs_iflags_test(ip, XFS_INEW)) ||
512 __xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM))
491 goto out_unlock_noent; 513 goto out_unlock_noent;
492 spin_unlock(&ip->i_flags_lock); 514 spin_unlock(&ip->i_flags_lock);
493 515
@@ -515,7 +537,8 @@ xfs_inode_ag_walk(
515 void *args), 537 void *args),
516 int flags, 538 int flags,
517 void *args, 539 void *args,
518 int tag) 540 int tag,
541 int iter_flags)
519{ 542{
520 uint32_t first_index; 543 uint32_t first_index;
521 int last_error = 0; 544 int last_error = 0;
@@ -557,7 +580,7 @@ restart:
557 for (i = 0; i < nr_found; i++) { 580 for (i = 0; i < nr_found; i++) {
558 struct xfs_inode *ip = batch[i]; 581 struct xfs_inode *ip = batch[i];
559 582
560 if (done || xfs_inode_ag_walk_grab(ip)) 583 if (done || xfs_inode_ag_walk_grab(ip, iter_flags))
561 batch[i] = NULL; 584 batch[i] = NULL;
562 585
563 /* 586 /*
@@ -585,6 +608,9 @@ restart:
585 for (i = 0; i < nr_found; i++) { 608 for (i = 0; i < nr_found; i++) {
586 if (!batch[i]) 609 if (!batch[i])
587 continue; 610 continue;
611 if ((iter_flags & XFS_AGITER_INEW_WAIT) &&
612 xfs_iflags_test(batch[i], XFS_INEW))
613 xfs_inew_wait(batch[i]);
588 error = execute(batch[i], flags, args); 614 error = execute(batch[i], flags, args);
589 IRELE(batch[i]); 615 IRELE(batch[i]);
590 if (error == -EAGAIN) { 616 if (error == -EAGAIN) {
@@ -637,12 +663,13 @@ xfs_eofblocks_worker(
637} 663}
638 664
639int 665int
640xfs_inode_ag_iterator( 666xfs_inode_ag_iterator_flags(
641 struct xfs_mount *mp, 667 struct xfs_mount *mp,
642 int (*execute)(struct xfs_inode *ip, int flags, 668 int (*execute)(struct xfs_inode *ip, int flags,
643 void *args), 669 void *args),
644 int flags, 670 int flags,
645 void *args) 671 void *args,
672 int iter_flags)
646{ 673{
647 struct xfs_perag *pag; 674 struct xfs_perag *pag;
648 int error = 0; 675 int error = 0;
@@ -652,7 +679,8 @@ xfs_inode_ag_iterator(
652 ag = 0; 679 ag = 0;
653 while ((pag = xfs_perag_get(mp, ag))) { 680 while ((pag = xfs_perag_get(mp, ag))) {
654 ag = pag->pag_agno + 1; 681 ag = pag->pag_agno + 1;
655 error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1); 682 error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1,
683 iter_flags);
656 xfs_perag_put(pag); 684 xfs_perag_put(pag);
657 if (error) { 685 if (error) {
658 last_error = error; 686 last_error = error;
@@ -664,6 +692,17 @@ xfs_inode_ag_iterator(
664} 692}
665 693
666int 694int
695xfs_inode_ag_iterator(
696 struct xfs_mount *mp,
697 int (*execute)(struct xfs_inode *ip, int flags,
698 void *args),
699 int flags,
700 void *args)
701{
702 return xfs_inode_ag_iterator_flags(mp, execute, flags, args, 0);
703}
704
705int
667xfs_inode_ag_iterator_tag( 706xfs_inode_ag_iterator_tag(
668 struct xfs_mount *mp, 707 struct xfs_mount *mp,
669 int (*execute)(struct xfs_inode *ip, int flags, 708 int (*execute)(struct xfs_inode *ip, int flags,
@@ -680,7 +719,8 @@ xfs_inode_ag_iterator_tag(
680 ag = 0; 719 ag = 0;
681 while ((pag = xfs_perag_get_tag(mp, ag, tag))) { 720 while ((pag = xfs_perag_get_tag(mp, ag, tag))) {
682 ag = pag->pag_agno + 1; 721 ag = pag->pag_agno + 1;
683 error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag); 722 error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag,
723 0);
684 xfs_perag_put(pag); 724 xfs_perag_put(pag);
685 if (error) { 725 if (error) {
686 last_error = error; 726 last_error = error;
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 62f1f91c32cb..147a79212e63 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -48,6 +48,11 @@ struct xfs_eofblocks {
48#define XFS_IGET_UNTRUSTED 0x2 48#define XFS_IGET_UNTRUSTED 0x2
49#define XFS_IGET_DONTCACHE 0x4 49#define XFS_IGET_DONTCACHE 0x4
50 50
51/*
52 * flags for AG inode iterator
53 */
54#define XFS_AGITER_INEW_WAIT 0x1 /* wait on new inodes */
55
51int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino, 56int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
52 uint flags, uint lock_flags, xfs_inode_t **ipp); 57 uint flags, uint lock_flags, xfs_inode_t **ipp);
53 58
@@ -72,6 +77,9 @@ void xfs_eofblocks_worker(struct work_struct *);
72int xfs_inode_ag_iterator(struct xfs_mount *mp, 77int xfs_inode_ag_iterator(struct xfs_mount *mp,
73 int (*execute)(struct xfs_inode *ip, int flags, void *args), 78 int (*execute)(struct xfs_inode *ip, int flags, void *args),
74 int flags, void *args); 79 int flags, void *args);
80int xfs_inode_ag_iterator_flags(struct xfs_mount *mp,
81 int (*execute)(struct xfs_inode *ip, int flags, void *args),
82 int flags, void *args, int iter_flags);
75int xfs_inode_ag_iterator_tag(struct xfs_mount *mp, 83int xfs_inode_ag_iterator_tag(struct xfs_mount *mp,
76 int (*execute)(struct xfs_inode *ip, int flags, void *args), 84 int (*execute)(struct xfs_inode *ip, int flags, void *args),
77 int flags, void *args, int tag); 85 int flags, void *args, int tag);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index ca9e11989cbd..ae1a49845744 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -208,7 +208,8 @@ xfs_get_initial_prid(struct xfs_inode *dp)
208#define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */ 208#define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */
209#define XFS_ISTALE (1 << 1) /* inode has been staled */ 209#define XFS_ISTALE (1 << 1) /* inode has been staled */
210#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ 210#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */
211#define XFS_INEW (1 << 3) /* inode has just been allocated */ 211#define __XFS_INEW_BIT 3 /* inode has just been allocated */
212#define XFS_INEW (1 << __XFS_INEW_BIT)
212#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ 213#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
213#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ 214#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
214#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ 215#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
@@ -453,6 +454,7 @@ static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
453 xfs_iflags_clear(ip, XFS_INEW); 454 xfs_iflags_clear(ip, XFS_INEW);
454 barrier(); 455 barrier();
455 unlock_new_inode(VFS_I(ip)); 456 unlock_new_inode(VFS_I(ip));
457 wake_up_bit(&ip->i_flags, __XFS_INEW_BIT);
456} 458}
457 459
458static inline void xfs_setup_existing_inode(struct xfs_inode *ip) 460static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d42738deec6d..e4a4f82ea13f 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -403,6 +403,7 @@ xfs_attrlist_by_handle(
403{ 403{
404 int error = -ENOMEM; 404 int error = -ENOMEM;
405 attrlist_cursor_kern_t *cursor; 405 attrlist_cursor_kern_t *cursor;
406 struct xfs_fsop_attrlist_handlereq __user *p = arg;
406 xfs_fsop_attrlist_handlereq_t al_hreq; 407 xfs_fsop_attrlist_handlereq_t al_hreq;
407 struct dentry *dentry; 408 struct dentry *dentry;
408 char *kbuf; 409 char *kbuf;
@@ -435,6 +436,11 @@ xfs_attrlist_by_handle(
435 if (error) 436 if (error)
436 goto out_kfree; 437 goto out_kfree;
437 438
439 if (copy_to_user(&p->pos, cursor, sizeof(attrlist_cursor_kern_t))) {
440 error = -EFAULT;
441 goto out_kfree;
442 }
443
438 if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen)) 444 if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
439 error = -EFAULT; 445 error = -EFAULT;
440 446
@@ -1379,10 +1385,11 @@ xfs_ioc_getbmap(
1379 unsigned int cmd, 1385 unsigned int cmd,
1380 void __user *arg) 1386 void __user *arg)
1381{ 1387{
1382 struct getbmapx bmx; 1388 struct getbmapx bmx = { 0 };
1383 int error; 1389 int error;
1384 1390
1385 if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) 1391 /* struct getbmap is a strict subset of struct getbmapx. */
1392 if (copy_from_user(&bmx, arg, offsetof(struct getbmapx, bmv_iflags)))
1386 return -EFAULT; 1393 return -EFAULT;
1387 1394
1388 if (bmx.bmv_count < 2) 1395 if (bmx.bmv_count < 2)
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 532ab79d38fe..572b64a135b3 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1355,12 +1355,7 @@ xfs_qm_quotacheck(
1355 mp->m_qflags |= flags; 1355 mp->m_qflags |= flags;
1356 1356
1357 error_return: 1357 error_return:
1358 while (!list_empty(&buffer_list)) { 1358 xfs_buf_delwri_cancel(&buffer_list);
1359 struct xfs_buf *bp =
1360 list_first_entry(&buffer_list, struct xfs_buf, b_list);
1361 list_del_init(&bp->b_list);
1362 xfs_buf_relse(bp);
1363 }
1364 1359
1365 if (error) { 1360 if (error) {
1366 xfs_warn(mp, 1361 xfs_warn(mp,
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 3640c6e896af..4d334440bd94 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -764,5 +764,6 @@ xfs_qm_dqrele_all_inodes(
764 uint flags) 764 uint flags)
765{ 765{
766 ASSERT(mp->m_quotainfo); 766 ASSERT(mp->m_quotainfo);
767 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, NULL); 767 xfs_inode_ag_iterator_flags(mp, xfs_dqrele_inode, flags, NULL,
768 XFS_AGITER_INEW_WAIT);
768} 769}
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 839b35ca21c6..9beaf192b4bb 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -180,7 +180,8 @@ xfs_xattr_put_listent(
180 arraytop = context->count + prefix_len + namelen + 1; 180 arraytop = context->count + prefix_len + namelen + 1;
181 if (arraytop > context->firstu) { 181 if (arraytop > context->firstu) {
182 context->count = -1; /* insufficient space */ 182 context->count = -1; /* insufficient space */
183 return 1; 183 context->seen_enough = 1;
184 return 0;
184 } 185 }
185 offset = (char *)context->alist + context->count; 186 offset = (char *)context->alist + context->count;
186 strncpy(offset, xfs_xattr_prefix(flags), prefix_len); 187 strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
@@ -222,12 +223,15 @@ list_one_attr(const char *name, const size_t len, void *data,
222} 223}
223 224
224ssize_t 225ssize_t
225xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) 226xfs_vn_listxattr(
227 struct dentry *dentry,
228 char *data,
229 size_t size)
226{ 230{
227 struct xfs_attr_list_context context; 231 struct xfs_attr_list_context context;
228 struct attrlist_cursor_kern cursor = { 0 }; 232 struct attrlist_cursor_kern cursor = { 0 };
229 struct inode *inode = d_inode(dentry); 233 struct inode *inode = d_inode(dentry);
230 int error; 234 int error;
231 235
232 /* 236 /*
233 * First read the regular on-disk attributes. 237 * First read the regular on-disk attributes.
@@ -245,7 +249,9 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
245 else 249 else
246 context.put_listent = xfs_xattr_put_listent_sizes; 250 context.put_listent = xfs_xattr_put_listent_sizes;
247 251
248 xfs_attr_list_int(&context); 252 error = xfs_attr_list_int(&context);
253 if (error)
254 return error;
249 if (context.count < 0) 255 if (context.count < 0)
250 return -ERANGE; 256 return -ERANGE;
251 257