aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric W. Biederman2016-12-14 06:24:51 -0600
committerSasha Levin2016-12-23 07:56:35 -0600
commit1171afc4a34e2926e6e8e27c896cf328c8825ac3 (patch)
tree16c6e981770b3e664fcc5c5c1bd341bced63b3e5
parent62fa696b7b435e93ed114dd6a23aa0881d7f81b9 (diff)
downloadti-linux-kernel-1171afc4a34e2926e6e8e27c896cf328c8825ac3.tar.gz
ti-linux-kernel-1171afc4a34e2926e6e8e27c896cf328c8825ac3.tar.xz
ti-linux-kernel-1171afc4a34e2926e6e8e27c896cf328c8825ac3.zip
mnt: Add a per mount namespace limit on the number of mounts
[ Upstream commit d29216842a85c7970c536108e093963f02714498 ] CAI Qian <caiqian@redhat.com> pointed out that the semantics of shared subtrees make it possible to create an exponentially increasing number of mounts in a mount namespace. mkdir /tmp/1 /tmp/2 mount --make-rshared / for i in $(seq 1 20) ; do mount --bind /tmp/1 /tmp/2 ; done Will create create 2^20 or 1048576 mounts, which is a practical problem as some people have managed to hit this by accident. As such CVE-2016-6213 was assigned. Ian Kent <raven@themaw.net> described the situation for autofs users as follows: > The number of mounts for direct mount maps is usually not very large because of > the way they are implemented, large direct mount maps can have performance > problems. There can be anywhere from a few (likely case a few hundred) to less > than 10000, plus mounts that have been triggered and not yet expired. > > Indirect mounts have one autofs mount at the root plus the number of mounts that > have been triggered and not yet expired. > > The number of autofs indirect map entries can range from a few to the common > case of several thousand and in rare cases up to between 30000 and 50000. I've > not heard of people with maps larger than 50000 entries. > > The larger the number of map entries the greater the possibility for a large > number of active mounts so it's not hard to expect cases of a 1000 or somewhat > more active mounts. So I am setting the default number of mounts allowed per mount namespace at 100,000. This is more than enough for any use case I know of, but small enough to quickly stop an exponential increase in mounts. Which should be perfect to catch misconfigurations and malfunctioning programs. For anyone who needs a higher limit this can be changed by writing to the new /proc/sys/fs/mount-max sysctl. Tested-by: CAI Qian <caiqian@redhat.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> Conflicts: fs/namespace.c kernel/sysctl.c Signed-off-by: Philipp Hahn <hahn@univention.de> Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
-rw-r--r--Documentation/sysctl/fs.txt7
-rw-r--r--fs/mount.h2
-rw-r--r--fs/namespace.c49
-rw-r--r--fs/pnode.c2
-rw-r--r--fs/pnode.h1
-rw-r--r--include/linux/mount.h2
-rw-r--r--kernel/sysctl.c9
7 files changed, 70 insertions, 2 deletions
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 302b5ed616a6..35e17f748ca7 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -265,6 +265,13 @@ aio-nr can grow to.
265 265
266============================================================== 266==============================================================
267 267
268mount-max:
269
270This denotes the maximum number of mounts that may exist
271in a mount namespace.
272
273==============================================================
274
268 275
2692. /proc/sys/fs/binfmt_misc 2762. /proc/sys/fs/binfmt_misc
270---------------------------------------------------------- 277----------------------------------------------------------
diff --git a/fs/mount.h b/fs/mount.h
index 6a61c2b3e385..2152c16ddf74 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -13,6 +13,8 @@ struct mnt_namespace {
13 u64 seq; /* Sequence number to prevent loops */ 13 u64 seq; /* Sequence number to prevent loops */
14 wait_queue_head_t poll; 14 wait_queue_head_t poll;
15 u64 event; 15 u64 event;
16 unsigned int mounts; /* # of mounts in the namespace */
17 unsigned int pending_mounts;
16}; 18};
17 19
18struct mnt_pcp { 20struct mnt_pcp {
diff --git a/fs/namespace.c b/fs/namespace.c
index 556721fb0cf6..f853aaf92ec9 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -27,6 +27,9 @@
27#include "pnode.h" 27#include "pnode.h"
28#include "internal.h" 28#include "internal.h"
29 29
30/* Maximum number of mounts in a mount namespace */
31unsigned int sysctl_mount_max __read_mostly = 100000;
32
30static unsigned int m_hash_mask __read_mostly; 33static unsigned int m_hash_mask __read_mostly;
31static unsigned int m_hash_shift __read_mostly; 34static unsigned int m_hash_shift __read_mostly;
32static unsigned int mp_hash_mask __read_mostly; 35static unsigned int mp_hash_mask __read_mostly;
@@ -888,6 +891,9 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
888 891
889 list_splice(&head, n->list.prev); 892 list_splice(&head, n->list.prev);
890 893
894 n->mounts += n->pending_mounts;
895 n->pending_mounts = 0;
896
891 attach_shadowed(mnt, parent, shadows); 897 attach_shadowed(mnt, parent, shadows);
892 touch_mnt_namespace(n); 898 touch_mnt_namespace(n);
893} 899}
@@ -1408,11 +1414,16 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1408 propagate_umount(&tmp_list); 1414 propagate_umount(&tmp_list);
1409 1415
1410 while (!list_empty(&tmp_list)) { 1416 while (!list_empty(&tmp_list)) {
1417 struct mnt_namespace *ns;
1411 bool disconnect; 1418 bool disconnect;
1412 p = list_first_entry(&tmp_list, struct mount, mnt_list); 1419 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1413 list_del_init(&p->mnt_expire); 1420 list_del_init(&p->mnt_expire);
1414 list_del_init(&p->mnt_list); 1421 list_del_init(&p->mnt_list);
1415 __touch_mnt_namespace(p->mnt_ns); 1422 ns = p->mnt_ns;
1423 if (ns) {
1424 ns->mounts--;
1425 __touch_mnt_namespace(ns);
1426 }
1416 p->mnt_ns = NULL; 1427 p->mnt_ns = NULL;
1417 if (how & UMOUNT_SYNC) 1428 if (how & UMOUNT_SYNC)
1418 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; 1429 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
@@ -1821,6 +1832,28 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
1821 return 0; 1832 return 0;
1822} 1833}
1823 1834
1835int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
1836{
1837 unsigned int max = READ_ONCE(sysctl_mount_max);
1838 unsigned int mounts = 0, old, pending, sum;
1839 struct mount *p;
1840
1841 for (p = mnt; p; p = next_mnt(p, mnt))
1842 mounts++;
1843
1844 old = ns->mounts;
1845 pending = ns->pending_mounts;
1846 sum = old + pending;
1847 if ((old > sum) ||
1848 (pending > sum) ||
1849 (max < sum) ||
1850 (mounts > (max - sum)))
1851 return -ENOSPC;
1852
1853 ns->pending_mounts = pending + mounts;
1854 return 0;
1855}
1856
1824/* 1857/*
1825 * @source_mnt : mount tree to be attached 1858 * @source_mnt : mount tree to be attached
1826 * @nd : place the mount tree @source_mnt is attached 1859 * @nd : place the mount tree @source_mnt is attached
@@ -1890,10 +1923,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1890 struct path *parent_path) 1923 struct path *parent_path)
1891{ 1924{
1892 HLIST_HEAD(tree_list); 1925 HLIST_HEAD(tree_list);
1926 struct mnt_namespace *ns = dest_mnt->mnt_ns;
1893 struct mount *child, *p; 1927 struct mount *child, *p;
1894 struct hlist_node *n; 1928 struct hlist_node *n;
1895 int err; 1929 int err;
1896 1930
1931 /* Is there space to add these mounts to the mount namespace? */
1932 if (!parent_path) {
1933 err = count_mounts(ns, source_mnt);
1934 if (err)
1935 goto out;
1936 }
1937
1897 if (IS_MNT_SHARED(dest_mnt)) { 1938 if (IS_MNT_SHARED(dest_mnt)) {
1898 err = invent_group_ids(source_mnt, true); 1939 err = invent_group_ids(source_mnt, true);
1899 if (err) 1940 if (err)
@@ -1930,11 +1971,13 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1930 out_cleanup_ids: 1971 out_cleanup_ids:
1931 while (!hlist_empty(&tree_list)) { 1972 while (!hlist_empty(&tree_list)) {
1932 child = hlist_entry(tree_list.first, struct mount, mnt_hash); 1973 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
1974 child->mnt_parent->mnt_ns->pending_mounts = 0;
1933 umount_tree(child, UMOUNT_SYNC); 1975 umount_tree(child, UMOUNT_SYNC);
1934 } 1976 }
1935 unlock_mount_hash(); 1977 unlock_mount_hash();
1936 cleanup_group_ids(source_mnt, NULL); 1978 cleanup_group_ids(source_mnt, NULL);
1937 out: 1979 out:
1980 ns->pending_mounts = 0;
1938 return err; 1981 return err;
1939} 1982}
1940 1983
@@ -2758,6 +2801,8 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2758 init_waitqueue_head(&new_ns->poll); 2801 init_waitqueue_head(&new_ns->poll);
2759 new_ns->event = 0; 2802 new_ns->event = 0;
2760 new_ns->user_ns = get_user_ns(user_ns); 2803 new_ns->user_ns = get_user_ns(user_ns);
2804 new_ns->mounts = 0;
2805 new_ns->pending_mounts = 0;
2761 return new_ns; 2806 return new_ns;
2762} 2807}
2763 2808
@@ -2807,6 +2852,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2807 q = new; 2852 q = new;
2808 while (p) { 2853 while (p) {
2809 q->mnt_ns = new_ns; 2854 q->mnt_ns = new_ns;
2855 new_ns->mounts++;
2810 if (new_fs) { 2856 if (new_fs) {
2811 if (&p->mnt == new_fs->root.mnt) { 2857 if (&p->mnt == new_fs->root.mnt) {
2812 new_fs->root.mnt = mntget(&q->mnt); 2858 new_fs->root.mnt = mntget(&q->mnt);
@@ -2845,6 +2891,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2845 struct mount *mnt = real_mount(m); 2891 struct mount *mnt = real_mount(m);
2846 mnt->mnt_ns = new_ns; 2892 mnt->mnt_ns = new_ns;
2847 new_ns->root = mnt; 2893 new_ns->root = mnt;
2894 new_ns->mounts++;
2848 list_add(&mnt->mnt_list, &new_ns->list); 2895 list_add(&mnt->mnt_list, &new_ns->list);
2849 } else { 2896 } else {
2850 mntput(m); 2897 mntput(m);
diff --git a/fs/pnode.c b/fs/pnode.c
index 99899705b105..234a9ac49958 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -259,7 +259,7 @@ static int propagate_one(struct mount *m)
259 read_sequnlock_excl(&mount_lock); 259 read_sequnlock_excl(&mount_lock);
260 } 260 }
261 hlist_add_head(&child->mnt_hash, list); 261 hlist_add_head(&child->mnt_hash, list);
262 return 0; 262 return count_mounts(m->mnt_ns, child);
263} 263}
264 264
265/* 265/*
diff --git a/fs/pnode.h b/fs/pnode.h
index 0fcdbe7ca648..550f5a8b4fcf 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -52,4 +52,5 @@ void mnt_set_mountpoint(struct mount *, struct mountpoint *,
52struct mount *copy_tree(struct mount *, struct dentry *, int); 52struct mount *copy_tree(struct mount *, struct dentry *, int);
53bool is_path_reachable(struct mount *, struct dentry *, 53bool is_path_reachable(struct mount *, struct dentry *,
54 const struct path *root); 54 const struct path *root);
55int count_mounts(struct mnt_namespace *ns, struct mount *mnt);
55#endif /* _LINUX_PNODE_H */ 56#endif /* _LINUX_PNODE_H */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index f822c3c11377..dc6cd800cd5d 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -95,4 +95,6 @@ extern void mark_mounts_for_expiry(struct list_head *mounts);
95 95
96extern dev_t name_to_dev_t(const char *name); 96extern dev_t name_to_dev_t(const char *name);
97 97
98extern unsigned int sysctl_mount_max;
99
98#endif /* _LINUX_MOUNT_H */ 100#endif /* _LINUX_MOUNT_H */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7d4900404c94..cebbff5f34fe 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -64,6 +64,7 @@
64#include <linux/binfmts.h> 64#include <linux/binfmts.h>
65#include <linux/sched/sysctl.h> 65#include <linux/sched/sysctl.h>
66#include <linux/kexec.h> 66#include <linux/kexec.h>
67#include <linux/mount.h>
67 68
68#include <asm/uaccess.h> 69#include <asm/uaccess.h>
69#include <asm/processor.h> 70#include <asm/processor.h>
@@ -1709,6 +1710,14 @@ static struct ctl_table fs_table[] = {
1709 .mode = 0644, 1710 .mode = 0644,
1710 .proc_handler = proc_doulongvec_minmax, 1711 .proc_handler = proc_doulongvec_minmax,
1711 }, 1712 },
1713 {
1714 .procname = "mount-max",
1715 .data = &sysctl_mount_max,
1716 .maxlen = sizeof(unsigned int),
1717 .mode = 0644,
1718 .proc_handler = proc_dointvec_minmax,
1719 .extra1 = &one,
1720 },
1712 { } 1721 { }
1713}; 1722};
1714 1723