diff options
author | Hugh Dickins | 2013-01-02 04:01:33 -0600 |
---|---|---|
committer | Greg Kroah-Hartman | 2013-01-11 11:03:36 -0600 |
commit | 51afc625ccd2e38f76f690ccce93e11a21d9543b (patch) | |
tree | 054b3a7d31c3d1b5740502a4e57336404bf6fd99 /mm | |
parent | 8fafe011ed532384b845e53fec7b9392310e56e6 (diff) | |
download | kernel-common-51afc625ccd2e38f76f690ccce93e11a21d9543b.tar.gz kernel-common-51afc625ccd2e38f76f690ccce93e11a21d9543b.tar.xz kernel-common-51afc625ccd2e38f76f690ccce93e11a21d9543b.zip |
tmpfs mempolicy: fix /proc/mounts corrupting memory
commit f2a07f40dbc603c15f8b06e6ec7f768af67b424f upstream.
Recently I suggested using "mount -o remount,mpol=local /tmp" in NUMA
mempolicy testing. Very nasty. Reading /proc/mounts, /proc/pid/mounts
or /proc/pid/mountinfo may then corrupt one bit of kernel memory, often
in a page table (causing "Bad swap" or "Bad page map" warning or "Bad
pagetable" oops), sometimes in a vm_area_struct or rbnode or somewhere
worse. "mpol=prefer" and "mpol=prefer:Node" are equally toxic.
Recent NUMA enhancements are not to blame: this dates back to 2.6.35,
when commit e17f74af351c "mempolicy: don't call mpol_set_nodemask() when
no_context" skipped mpol_parse_str()'s call to mpol_set_nodemask(),
which used to initialize v.preferred_node, or set MPOL_F_LOCAL in flags.
With slab poisoning, you can then rely on mpol_to_str() to set the bit
for node 0x6b6b, probably in the next page above the caller's stack.
mpol_parse_str() is only called from shmem_parse_options(): no_context
is always true, so call it unused for now, and remove !no_context code.
Set v.nodes or v.preferred_node or MPOL_F_LOCAL as mpol_to_str() might
expect. Then mpol_to_str() can ignore its no_context argument also,
the mpol being appropriately initialized whether contextualized or not.
Rename its no_context unused too, and let subsequent patch remove them
(that's not needed for stable backporting, which would involve rejects).
I don't understand why MPOL_LOCAL is described as a pseudo-policy:
it's a reasonable policy which suffers from a confusing implementation
in terms of MPOL_PREFERRED with MPOL_F_LOCAL. I believe this would be
much more robust if MPOL_LOCAL were recognized in switch statements
throughout, MPOL_F_LOCAL deleted, and MPOL_PREFERRED use the (possibly
empty) nodes mask like everyone else, instead of its preferred_node
variant (I presume an optimization from the days before MPOL_LOCAL).
But that would take me too long to get right and fully tested.
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/mempolicy.c | 64 |
1 files changed, 26 insertions, 38 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 04282baf96b..0367beb1b79 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -2308,8 +2308,7 @@ void numa_default_policy(void) | |||
2308 | */ | 2308 | */ |
2309 | 2309 | ||
2310 | /* | 2310 | /* |
2311 | * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag | 2311 | * "local" is implemented internally by MPOL_PREFERRED with MPOL_F_LOCAL flag. |
2312 | * Used only for mpol_parse_str() and mpol_to_str() | ||
2313 | */ | 2312 | */ |
2314 | #define MPOL_LOCAL MPOL_MAX | 2313 | #define MPOL_LOCAL MPOL_MAX |
2315 | static const char * const policy_modes[] = | 2314 | static const char * const policy_modes[] = |
@@ -2324,28 +2323,21 @@ static const char * const policy_modes[] = | |||
2324 | 2323 | ||
2325 | #ifdef CONFIG_TMPFS | 2324 | #ifdef CONFIG_TMPFS |
2326 | /** | 2325 | /** |
2327 | * mpol_parse_str - parse string to mempolicy | 2326 | * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option. |
2328 | * @str: string containing mempolicy to parse | 2327 | * @str: string containing mempolicy to parse |
2329 | * @mpol: pointer to struct mempolicy pointer, returned on success. | 2328 | * @mpol: pointer to struct mempolicy pointer, returned on success. |
2330 | * @no_context: flag whether to "contextualize" the mempolicy | 2329 | * @unused: redundant argument, to be removed later. |
2331 | * | 2330 | * |
2332 | * Format of input: | 2331 | * Format of input: |
2333 | * <mode>[=<flags>][:<nodelist>] | 2332 | * <mode>[=<flags>][:<nodelist>] |
2334 | * | 2333 | * |
2335 | * if @no_context is true, save the input nodemask in w.user_nodemask in | ||
2336 | * the returned mempolicy. This will be used to "clone" the mempolicy in | ||
2337 | * a specific context [cpuset] at a later time. Used to parse tmpfs mpol | ||
2338 | * mount option. Note that if 'static' or 'relative' mode flags were | ||
2339 | * specified, the input nodemask will already have been saved. Saving | ||
2340 | * it again is redundant, but safe. | ||
2341 | * | ||
2342 | * On success, returns 0, else 1 | 2334 | * On success, returns 0, else 1 |
2343 | */ | 2335 | */ |
2344 | int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | 2336 | int mpol_parse_str(char *str, struct mempolicy **mpol, int unused) |
2345 | { | 2337 | { |
2346 | struct mempolicy *new = NULL; | 2338 | struct mempolicy *new = NULL; |
2347 | unsigned short mode; | 2339 | unsigned short mode; |
2348 | unsigned short uninitialized_var(mode_flags); | 2340 | unsigned short mode_flags; |
2349 | nodemask_t nodes; | 2341 | nodemask_t nodes; |
2350 | char *nodelist = strchr(str, ':'); | 2342 | char *nodelist = strchr(str, ':'); |
2351 | char *flags = strchr(str, '='); | 2343 | char *flags = strchr(str, '='); |
@@ -2433,24 +2425,23 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | |||
2433 | if (IS_ERR(new)) | 2425 | if (IS_ERR(new)) |
2434 | goto out; | 2426 | goto out; |
2435 | 2427 | ||
2436 | if (no_context) { | 2428 | /* |
2437 | /* save for contextualization */ | 2429 | * Save nodes for mpol_to_str() to show the tmpfs mount options |
2438 | new->w.user_nodemask = nodes; | 2430 | * for /proc/mounts, /proc/pid/mounts and /proc/pid/mountinfo. |
2439 | } else { | 2431 | */ |
2440 | int ret; | 2432 | if (mode != MPOL_PREFERRED) |
2441 | NODEMASK_SCRATCH(scratch); | 2433 | new->v.nodes = nodes; |
2442 | if (scratch) { | 2434 | else if (nodelist) |
2443 | task_lock(current); | 2435 | new->v.preferred_node = first_node(nodes); |
2444 | ret = mpol_set_nodemask(new, &nodes, scratch); | 2436 | else |
2445 | task_unlock(current); | 2437 | new->flags |= MPOL_F_LOCAL; |
2446 | } else | 2438 | |
2447 | ret = -ENOMEM; | 2439 | /* |
2448 | NODEMASK_SCRATCH_FREE(scratch); | 2440 | * Save nodes for contextualization: this will be used to "clone" |
2449 | if (ret) { | 2441 | * the mempolicy in a specific context [cpuset] at a later time. |
2450 | mpol_put(new); | 2442 | */ |
2451 | goto out; | 2443 | new->w.user_nodemask = nodes; |
2452 | } | 2444 | |
2453 | } | ||
2454 | err = 0; | 2445 | err = 0; |
2455 | 2446 | ||
2456 | out: | 2447 | out: |
@@ -2470,13 +2461,13 @@ out: | |||
2470 | * @buffer: to contain formatted mempolicy string | 2461 | * @buffer: to contain formatted mempolicy string |
2471 | * @maxlen: length of @buffer | 2462 | * @maxlen: length of @buffer |
2472 | * @pol: pointer to mempolicy to be formatted | 2463 | * @pol: pointer to mempolicy to be formatted |
2473 | * @no_context: "context free" mempolicy - use nodemask in w.user_nodemask | 2464 | * @unused: redundant argument, to be removed later. |
2474 | * | 2465 | * |
2475 | * Convert a mempolicy into a string. | 2466 | * Convert a mempolicy into a string. |
2476 | * Returns the number of characters in buffer (if positive) | 2467 | * Returns the number of characters in buffer (if positive) |
2477 | * or an error (negative) | 2468 | * or an error (negative) |
2478 | */ | 2469 | */ |
2479 | int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) | 2470 | int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int unused) |
2480 | { | 2471 | { |
2481 | char *p = buffer; | 2472 | char *p = buffer; |
2482 | int l; | 2473 | int l; |
@@ -2502,7 +2493,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) | |||
2502 | case MPOL_PREFERRED: | 2493 | case MPOL_PREFERRED: |
2503 | nodes_clear(nodes); | 2494 | nodes_clear(nodes); |
2504 | if (flags & MPOL_F_LOCAL) | 2495 | if (flags & MPOL_F_LOCAL) |
2505 | mode = MPOL_LOCAL; /* pseudo-policy */ | 2496 | mode = MPOL_LOCAL; |
2506 | else | 2497 | else |
2507 | node_set(pol->v.preferred_node, nodes); | 2498 | node_set(pol->v.preferred_node, nodes); |
2508 | break; | 2499 | break; |
@@ -2510,10 +2501,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) | |||
2510 | case MPOL_BIND: | 2501 | case MPOL_BIND: |
2511 | /* Fall through */ | 2502 | /* Fall through */ |
2512 | case MPOL_INTERLEAVE: | 2503 | case MPOL_INTERLEAVE: |
2513 | if (no_context) | 2504 | nodes = pol->v.nodes; |
2514 | nodes = pol->w.user_nodemask; | ||
2515 | else | ||
2516 | nodes = pol->v.nodes; | ||
2517 | break; | 2505 | break; |
2518 | 2506 | ||
2519 | default: | 2507 | default: |