aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorHugh Dickins2013-01-02 04:01:33 -0600
committerGreg Kroah-Hartman2013-01-11 11:03:36 -0600
commit51afc625ccd2e38f76f690ccce93e11a21d9543b (patch)
tree054b3a7d31c3d1b5740502a4e57336404bf6fd99 /mm
parent8fafe011ed532384b845e53fec7b9392310e56e6 (diff)
downloadkernel-common-51afc625ccd2e38f76f690ccce93e11a21d9543b.tar.gz
kernel-common-51afc625ccd2e38f76f690ccce93e11a21d9543b.tar.xz
kernel-common-51afc625ccd2e38f76f690ccce93e11a21d9543b.zip
tmpfs mempolicy: fix /proc/mounts corrupting memory
commit f2a07f40dbc603c15f8b06e6ec7f768af67b424f upstream. Recently I suggested using "mount -o remount,mpol=local /tmp" in NUMA mempolicy testing. Very nasty. Reading /proc/mounts, /proc/pid/mounts or /proc/pid/mountinfo may then corrupt one bit of kernel memory, often in a page table (causing "Bad swap" or "Bad page map" warning or "Bad pagetable" oops), sometimes in a vm_area_struct or rbnode or somewhere worse. "mpol=prefer" and "mpol=prefer:Node" are equally toxic. Recent NUMA enhancements are not to blame: this dates back to 2.6.35, when commit e17f74af351c "mempolicy: don't call mpol_set_nodemask() when no_context" skipped mpol_parse_str()'s call to mpol_set_nodemask(), which used to initialize v.preferred_node, or set MPOL_F_LOCAL in flags. With slab poisoning, you can then rely on mpol_to_str() to set the bit for node 0x6b6b, probably in the next page above the caller's stack. mpol_parse_str() is only called from shmem_parse_options(): no_context is always true, so call it unused for now, and remove !no_context code. Set v.nodes or v.preferred_node or MPOL_F_LOCAL as mpol_to_str() might expect. Then mpol_to_str() can ignore its no_context argument also, the mpol being appropriately initialized whether contextualized or not. Rename its no_context unused too, and let subsequent patch remove them (that's not needed for stable backporting, which would involve rejects). I don't understand why MPOL_LOCAL is described as a pseudo-policy: it's a reasonable policy which suffers from a confusing implementation in terms of MPOL_PREFERRED with MPOL_F_LOCAL. I believe this would be much more robust if MPOL_LOCAL were recognized in switch statements throughout, MPOL_F_LOCAL deleted, and MPOL_PREFERRED use the (possibly empty) nodes mask like everyone else, instead of its preferred_node variant (I presume an optimization from the days before MPOL_LOCAL). But that would take me too long to get right and fully tested. Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/mempolicy.c64
1 files changed, 26 insertions, 38 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 04282baf96b..0367beb1b79 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2308,8 +2308,7 @@ void numa_default_policy(void)
2308 */ 2308 */
2309 2309
2310/* 2310/*
2311 * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag 2311 * "local" is implemented internally by MPOL_PREFERRED with MPOL_F_LOCAL flag.
2312 * Used only for mpol_parse_str() and mpol_to_str()
2313 */ 2312 */
2314#define MPOL_LOCAL MPOL_MAX 2313#define MPOL_LOCAL MPOL_MAX
2315static const char * const policy_modes[] = 2314static const char * const policy_modes[] =
@@ -2324,28 +2323,21 @@ static const char * const policy_modes[] =
2324 2323
2325#ifdef CONFIG_TMPFS 2324#ifdef CONFIG_TMPFS
2326/** 2325/**
2327 * mpol_parse_str - parse string to mempolicy 2326 * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option.
2328 * @str: string containing mempolicy to parse 2327 * @str: string containing mempolicy to parse
2329 * @mpol: pointer to struct mempolicy pointer, returned on success. 2328 * @mpol: pointer to struct mempolicy pointer, returned on success.
2330 * @no_context: flag whether to "contextualize" the mempolicy 2329 * @unused: redundant argument, to be removed later.
2331 * 2330 *
2332 * Format of input: 2331 * Format of input:
2333 * <mode>[=<flags>][:<nodelist>] 2332 * <mode>[=<flags>][:<nodelist>]
2334 * 2333 *
2335 * if @no_context is true, save the input nodemask in w.user_nodemask in
2336 * the returned mempolicy. This will be used to "clone" the mempolicy in
2337 * a specific context [cpuset] at a later time. Used to parse tmpfs mpol
2338 * mount option. Note that if 'static' or 'relative' mode flags were
2339 * specified, the input nodemask will already have been saved. Saving
2340 * it again is redundant, but safe.
2341 *
2342 * On success, returns 0, else 1 2334 * On success, returns 0, else 1
2343 */ 2335 */
2344int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) 2336int mpol_parse_str(char *str, struct mempolicy **mpol, int unused)
2345{ 2337{
2346 struct mempolicy *new = NULL; 2338 struct mempolicy *new = NULL;
2347 unsigned short mode; 2339 unsigned short mode;
2348 unsigned short uninitialized_var(mode_flags); 2340 unsigned short mode_flags;
2349 nodemask_t nodes; 2341 nodemask_t nodes;
2350 char *nodelist = strchr(str, ':'); 2342 char *nodelist = strchr(str, ':');
2351 char *flags = strchr(str, '='); 2343 char *flags = strchr(str, '=');
@@ -2433,24 +2425,23 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
2433 if (IS_ERR(new)) 2425 if (IS_ERR(new))
2434 goto out; 2426 goto out;
2435 2427
2436 if (no_context) { 2428 /*
2437 /* save for contextualization */ 2429 * Save nodes for mpol_to_str() to show the tmpfs mount options
2438 new->w.user_nodemask = nodes; 2430 * for /proc/mounts, /proc/pid/mounts and /proc/pid/mountinfo.
2439 } else { 2431 */
2440 int ret; 2432 if (mode != MPOL_PREFERRED)
2441 NODEMASK_SCRATCH(scratch); 2433 new->v.nodes = nodes;
2442 if (scratch) { 2434 else if (nodelist)
2443 task_lock(current); 2435 new->v.preferred_node = first_node(nodes);
2444 ret = mpol_set_nodemask(new, &nodes, scratch); 2436 else
2445 task_unlock(current); 2437 new->flags |= MPOL_F_LOCAL;
2446 } else 2438
2447 ret = -ENOMEM; 2439 /*
2448 NODEMASK_SCRATCH_FREE(scratch); 2440 * Save nodes for contextualization: this will be used to "clone"
2449 if (ret) { 2441 * the mempolicy in a specific context [cpuset] at a later time.
2450 mpol_put(new); 2442 */
2451 goto out; 2443 new->w.user_nodemask = nodes;
2452 } 2444
2453 }
2454 err = 0; 2445 err = 0;
2455 2446
2456out: 2447out:
@@ -2470,13 +2461,13 @@ out:
2470 * @buffer: to contain formatted mempolicy string 2461 * @buffer: to contain formatted mempolicy string
2471 * @maxlen: length of @buffer 2462 * @maxlen: length of @buffer
2472 * @pol: pointer to mempolicy to be formatted 2463 * @pol: pointer to mempolicy to be formatted
2473 * @no_context: "context free" mempolicy - use nodemask in w.user_nodemask 2464 * @unused: redundant argument, to be removed later.
2474 * 2465 *
2475 * Convert a mempolicy into a string. 2466 * Convert a mempolicy into a string.
2476 * Returns the number of characters in buffer (if positive) 2467 * Returns the number of characters in buffer (if positive)
2477 * or an error (negative) 2468 * or an error (negative)
2478 */ 2469 */
2479int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) 2470int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int unused)
2480{ 2471{
2481 char *p = buffer; 2472 char *p = buffer;
2482 int l; 2473 int l;
@@ -2502,7 +2493,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
2502 case MPOL_PREFERRED: 2493 case MPOL_PREFERRED:
2503 nodes_clear(nodes); 2494 nodes_clear(nodes);
2504 if (flags & MPOL_F_LOCAL) 2495 if (flags & MPOL_F_LOCAL)
2505 mode = MPOL_LOCAL; /* pseudo-policy */ 2496 mode = MPOL_LOCAL;
2506 else 2497 else
2507 node_set(pol->v.preferred_node, nodes); 2498 node_set(pol->v.preferred_node, nodes);
2508 break; 2499 break;
@@ -2510,10 +2501,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
2510 case MPOL_BIND: 2501 case MPOL_BIND:
2511 /* Fall through */ 2502 /* Fall through */
2512 case MPOL_INTERLEAVE: 2503 case MPOL_INTERLEAVE:
2513 if (no_context) 2504 nodes = pol->v.nodes;
2514 nodes = pol->w.user_nodemask;
2515 else
2516 nodes = pol->v.nodes;
2517 break; 2505 break;
2518 2506
2519 default: 2507 default: