aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit_watch.c12
-rw-r--r--kernel/bpf/verifier.c5
-rw-r--r--kernel/cgroup.c9
-rw-r--r--kernel/cpuset.c5
-rw-r--r--kernel/events/core.c106
-rw-r--r--kernel/extable.c2
-rw-r--r--kernel/fork.c25
-rw-r--r--kernel/futex.c24
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/irq/manage.c4
-rw-r--r--kernel/kprobes.c2
-rw-r--r--kernel/kthread.c3
-rw-r--r--kernel/membarrier.c4
-rw-r--r--kernel/padata.c7
-rw-r--r--kernel/panic.c2
-rw-r--r--kernel/pid.c11
-rw-r--r--kernel/pid_namespace.c2
-rw-r--r--kernel/power/hibernate.c20
-rw-r--r--kernel/power/main.c11
-rw-r--r--kernel/power/power.h2
-rw-r--r--kernel/power/suspend.c10
-rw-r--r--kernel/power/user.c14
-rw-r--r--kernel/printk/printk.c2
-rw-r--r--kernel/ptrace.c34
-rw-r--r--kernel/resource.c13
-rw-r--r--kernel/sched/core.c37
-rw-r--r--kernel/sched/deadline.c3
-rw-r--r--kernel/sched/fair.c36
-rw-r--r--kernel/sched/loadavg.c4
-rw-r--r--kernel/sched/rt.c3
-rw-r--r--kernel/sched/sched.h17
-rw-r--r--kernel/signal.c24
-rw-r--r--kernel/sysctl.c15
-rw-r--r--kernel/time/alarmtimer.c15
-rw-r--r--kernel/time/timekeeping.c47
-rw-r--r--kernel/trace/ftrace.c22
-rw-r--r--kernel/trace/ring_buffer.c24
-rw-r--r--kernel/trace/trace.c11
-rw-r--r--kernel/trace/trace_kprobe.c26
-rw-r--r--kernel/workqueue.c23
40 files changed, 439 insertions, 199 deletions
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 939945a5649c..a162661c9d60 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -457,13 +457,15 @@ void audit_remove_watch_rule(struct audit_krule *krule)
457 list_del(&krule->rlist); 457 list_del(&krule->rlist);
458 458
459 if (list_empty(&watch->rules)) { 459 if (list_empty(&watch->rules)) {
460 /*
461 * audit_remove_watch() drops our reference to 'parent' which
462 * can get freed. Grab our own reference to be safe.
463 */
464 audit_get_parent(parent);
460 audit_remove_watch(watch); 465 audit_remove_watch(watch);
461 466 if (list_empty(&parent->watches))
462 if (list_empty(&parent->watches)) {
463 audit_get_parent(parent);
464 fsnotify_destroy_mark(&parent->mark, audit_watch_group); 467 fsnotify_destroy_mark(&parent->mark, audit_watch_group);
465 audit_put_parent(parent); 468 audit_put_parent(parent);
466 }
467 } 469 }
468} 470}
469 471
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2cbfba78d3db..863e24f1e62e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -754,6 +754,11 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
754 if (err) 754 if (err)
755 return err; 755 return err;
756 756
757 if (is_pointer_value(env, insn->src_reg)) {
758 verbose("R%d leaks addr into mem\n", insn->src_reg);
759 return -EACCES;
760 }
761
757 /* check whether atomic_add can read the memory */ 762 /* check whether atomic_add can read the memory */
758 err = check_mem_access(env, insn->dst_reg, insn->off, 763 err = check_mem_access(env, insn->dst_reg, insn->off,
759 BPF_SIZE(insn->code), BPF_READ, -1); 764 BPF_SIZE(insn->code), BPF_READ, -1);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 03a1b3f754d6..16d5b7a46850 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2751,11 +2751,12 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
2751 tsk = tsk->group_leader; 2751 tsk = tsk->group_leader;
2752 2752
2753 /* 2753 /*
2754 * Workqueue threads may acquire PF_NO_SETAFFINITY and become 2754 * kthreads may acquire PF_NO_SETAFFINITY during initialization.
2755 * trapped in a cpuset, or RT worker may be born in a cgroup 2755 * If userland migrates such a kthread to a non-root cgroup, it can
2756 * with no rt_runtime allocated. Just say no. 2756 * become trapped in a cpuset, or RT kthread may be born in a
2757 * cgroup with no rt_runtime allocated. Just say no.
2757 */ 2758 */
2758 if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { 2759 if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
2759 ret = -EINVAL; 2760 ret = -EINVAL;
2760 goto out_unlock_rcu; 2761 goto out_unlock_rcu;
2761 } 2762 }
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3f9db31c5d04..f93a9f9b4b97 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -60,6 +60,7 @@
60#include <linux/cgroup.h> 60#include <linux/cgroup.h>
61#include <linux/wait.h> 61#include <linux/wait.h>
62 62
63struct static_key cpusets_pre_enable_key __read_mostly = STATIC_KEY_INIT_FALSE;
63struct static_key cpusets_enabled_key __read_mostly = STATIC_KEY_INIT_FALSE; 64struct static_key cpusets_enabled_key __read_mostly = STATIC_KEY_INIT_FALSE;
64 65
65/* See "Frequency meter" comments, below. */ 66/* See "Frequency meter" comments, below. */
@@ -174,9 +175,9 @@ typedef enum {
174} cpuset_flagbits_t; 175} cpuset_flagbits_t;
175 176
176/* convenient tests for these bits */ 177/* convenient tests for these bits */
177static inline bool is_cpuset_online(const struct cpuset *cs) 178static inline bool is_cpuset_online(struct cpuset *cs)
178{ 179{
179 return test_bit(CS_ONLINE, &cs->flags); 180 return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css);
180} 181}
181 182
182static inline int is_cpu_exclusive(const struct cpuset *cs) 183static inline int is_cpu_exclusive(const struct cpuset *cs)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f4fdaff76f6d..3421b1271970 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5823,7 +5823,7 @@ static int __perf_pmu_output_stop(void *info)
5823{ 5823{
5824 struct perf_event *event = info; 5824 struct perf_event *event = info;
5825 struct pmu *pmu = event->pmu; 5825 struct pmu *pmu = event->pmu;
5826 struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 5826 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
5827 struct remote_output ro = { 5827 struct remote_output ro = {
5828 .rb = event->rb, 5828 .rb = event->rb,
5829 }; 5829 };
@@ -6169,6 +6169,27 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
6169 char *buf = NULL; 6169 char *buf = NULL;
6170 char *name; 6170 char *name;
6171 6171
6172 if (vma->vm_flags & VM_READ)
6173 prot |= PROT_READ;
6174 if (vma->vm_flags & VM_WRITE)
6175 prot |= PROT_WRITE;
6176 if (vma->vm_flags & VM_EXEC)
6177 prot |= PROT_EXEC;
6178
6179 if (vma->vm_flags & VM_MAYSHARE)
6180 flags = MAP_SHARED;
6181 else
6182 flags = MAP_PRIVATE;
6183
6184 if (vma->vm_flags & VM_DENYWRITE)
6185 flags |= MAP_DENYWRITE;
6186 if (vma->vm_flags & VM_MAYEXEC)
6187 flags |= MAP_EXECUTABLE;
6188 if (vma->vm_flags & VM_LOCKED)
6189 flags |= MAP_LOCKED;
6190 if (vma->vm_flags & VM_HUGETLB)
6191 flags |= MAP_HUGETLB;
6192
6172 if (file) { 6193 if (file) {
6173 struct inode *inode; 6194 struct inode *inode;
6174 dev_t dev; 6195 dev_t dev;
@@ -6195,27 +6216,6 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
6195 maj = MAJOR(dev); 6216 maj = MAJOR(dev);
6196 min = MINOR(dev); 6217 min = MINOR(dev);
6197 6218
6198 if (vma->vm_flags & VM_READ)
6199 prot |= PROT_READ;
6200 if (vma->vm_flags & VM_WRITE)
6201 prot |= PROT_WRITE;
6202 if (vma->vm_flags & VM_EXEC)
6203 prot |= PROT_EXEC;
6204
6205 if (vma->vm_flags & VM_MAYSHARE)
6206 flags = MAP_SHARED;
6207 else
6208 flags = MAP_PRIVATE;
6209
6210 if (vma->vm_flags & VM_DENYWRITE)
6211 flags |= MAP_DENYWRITE;
6212 if (vma->vm_flags & VM_MAYEXEC)
6213 flags |= MAP_EXECUTABLE;
6214 if (vma->vm_flags & VM_LOCKED)
6215 flags |= MAP_LOCKED;
6216 if (vma->vm_flags & VM_HUGETLB)
6217 flags |= MAP_HUGETLB;
6218
6219 goto got_name; 6219 goto got_name;
6220 } else { 6220 } else {
6221 if (vma->vm_ops && vma->vm_ops->name) { 6221 if (vma->vm_ops && vma->vm_ops->name) {
@@ -8390,6 +8390,37 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
8390 return 0; 8390 return 0;
8391} 8391}
8392 8392
8393/*
8394 * Variation on perf_event_ctx_lock_nested(), except we take two context
8395 * mutexes.
8396 */
8397static struct perf_event_context *
8398__perf_event_ctx_lock_double(struct perf_event *group_leader,
8399 struct perf_event_context *ctx)
8400{
8401 struct perf_event_context *gctx;
8402
8403again:
8404 rcu_read_lock();
8405 gctx = READ_ONCE(group_leader->ctx);
8406 if (!atomic_inc_not_zero(&gctx->refcount)) {
8407 rcu_read_unlock();
8408 goto again;
8409 }
8410 rcu_read_unlock();
8411
8412 mutex_lock_double(&gctx->mutex, &ctx->mutex);
8413
8414 if (group_leader->ctx != gctx) {
8415 mutex_unlock(&ctx->mutex);
8416 mutex_unlock(&gctx->mutex);
8417 put_ctx(gctx);
8418 goto again;
8419 }
8420
8421 return gctx;
8422}
8423
8393/** 8424/**
8394 * sys_perf_event_open - open a performance event, associate it to a task/cpu 8425 * sys_perf_event_open - open a performance event, associate it to a task/cpu
8395 * 8426 *
@@ -8630,8 +8661,26 @@ SYSCALL_DEFINE5(perf_event_open,
8630 } 8661 }
8631 8662
8632 if (move_group) { 8663 if (move_group) {
8633 gctx = group_leader->ctx; 8664 gctx = __perf_event_ctx_lock_double(group_leader, ctx);
8634 mutex_lock_double(&gctx->mutex, &ctx->mutex); 8665
8666 /*
8667 * Check if we raced against another sys_perf_event_open() call
8668 * moving the software group underneath us.
8669 */
8670 if (!(group_leader->group_flags & PERF_GROUP_SOFTWARE)) {
8671 /*
8672 * If someone moved the group out from under us, check
8673 * if this new event wound up on the same ctx, if so
8674 * its the regular !move_group case, otherwise fail.
8675 */
8676 if (gctx != ctx) {
8677 err = -EINVAL;
8678 goto err_locked;
8679 } else {
8680 perf_event_ctx_unlock(group_leader, gctx);
8681 move_group = 0;
8682 }
8683 }
8635 } else { 8684 } else {
8636 mutex_lock(&ctx->mutex); 8685 mutex_lock(&ctx->mutex);
8637 } 8686 }
@@ -8726,7 +8775,7 @@ SYSCALL_DEFINE5(perf_event_open,
8726 perf_unpin_context(ctx); 8775 perf_unpin_context(ctx);
8727 8776
8728 if (move_group) 8777 if (move_group)
8729 mutex_unlock(&gctx->mutex); 8778 perf_event_ctx_unlock(group_leader, gctx);
8730 mutex_unlock(&ctx->mutex); 8779 mutex_unlock(&ctx->mutex);
8731 8780
8732 if (task) { 8781 if (task) {
@@ -8754,7 +8803,7 @@ SYSCALL_DEFINE5(perf_event_open,
8754 8803
8755err_locked: 8804err_locked:
8756 if (move_group) 8805 if (move_group)
8757 mutex_unlock(&gctx->mutex); 8806 perf_event_ctx_unlock(group_leader, gctx);
8758 mutex_unlock(&ctx->mutex); 8807 mutex_unlock(&ctx->mutex);
8759/* err_file: */ 8808/* err_file: */
8760 fput(event_file); 8809 fput(event_file);
@@ -9374,7 +9423,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
9374 ret = inherit_task_group(event, parent, parent_ctx, 9423 ret = inherit_task_group(event, parent, parent_ctx,
9375 child, ctxn, &inherited_all); 9424 child, ctxn, &inherited_all);
9376 if (ret) 9425 if (ret)
9377 break; 9426 goto out_unlock;
9378 } 9427 }
9379 9428
9380 /* 9429 /*
@@ -9390,7 +9439,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
9390 ret = inherit_task_group(event, parent, parent_ctx, 9439 ret = inherit_task_group(event, parent, parent_ctx,
9391 child, ctxn, &inherited_all); 9440 child, ctxn, &inherited_all);
9392 if (ret) 9441 if (ret)
9393 break; 9442 goto out_unlock;
9394 } 9443 }
9395 9444
9396 raw_spin_lock_irqsave(&parent_ctx->lock, flags); 9445 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
@@ -9418,6 +9467,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn)
9418 } 9467 }
9419 9468
9420 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); 9469 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
9470out_unlock:
9421 mutex_unlock(&parent_ctx->mutex); 9471 mutex_unlock(&parent_ctx->mutex);
9422 9472
9423 perf_unpin_context(parent_ctx); 9473 perf_unpin_context(parent_ctx);
diff --git a/kernel/extable.c b/kernel/extable.c
index e820ccee9846..4f06fc34313f 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -66,7 +66,7 @@ static inline int init_kernel_text(unsigned long addr)
66 return 0; 66 return 0;
67} 67}
68 68
69int core_kernel_text(unsigned long addr) 69int notrace core_kernel_text(unsigned long addr)
70{ 70{
71 if (addr >= (unsigned long)_stext && 71 if (addr >= (unsigned long)_stext &&
72 addr < (unsigned long)_etext) 72 addr < (unsigned long)_etext)
diff --git a/kernel/fork.c b/kernel/fork.c
index c85efa77e825..968917653c2c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -331,13 +331,14 @@ void set_task_stack_end_magic(struct task_struct *tsk)
331 *stackend = STACK_END_MAGIC; /* for overflow detection */ 331 *stackend = STACK_END_MAGIC; /* for overflow detection */
332} 332}
333 333
334static struct task_struct *dup_task_struct(struct task_struct *orig) 334static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
335{ 335{
336 struct task_struct *tsk; 336 struct task_struct *tsk;
337 struct thread_info *ti; 337 struct thread_info *ti;
338 int node = tsk_fork_get_node(orig);
339 int err; 338 int err;
340 339
340 if (node == NUMA_NO_NODE)
341 node = tsk_fork_get_node(orig);
341 tsk = alloc_task_struct_node(node); 342 tsk = alloc_task_struct_node(node);
342 if (!tsk) 343 if (!tsk)
343 return NULL; 344 return NULL;
@@ -367,7 +368,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
367 set_task_stack_end_magic(tsk); 368 set_task_stack_end_magic(tsk);
368 369
369#ifdef CONFIG_CC_STACKPROTECTOR 370#ifdef CONFIG_CC_STACKPROTECTOR
370 tsk->stack_canary = get_random_int(); 371 tsk->stack_canary = get_random_long();
371#endif 372#endif
372 373
373 /* 374 /*
@@ -1271,7 +1272,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1271 int __user *child_tidptr, 1272 int __user *child_tidptr,
1272 struct pid *pid, 1273 struct pid *pid,
1273 int trace, 1274 int trace,
1274 unsigned long tls) 1275 unsigned long tls,
1276 int node)
1275{ 1277{
1276 int retval; 1278 int retval;
1277 struct task_struct *p; 1279 struct task_struct *p;
@@ -1324,7 +1326,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1324 goto fork_out; 1326 goto fork_out;
1325 1327
1326 retval = -ENOMEM; 1328 retval = -ENOMEM;
1327 p = dup_task_struct(current); 1329 p = dup_task_struct(current, node);
1328 if (!p) 1330 if (!p)
1329 goto fork_out; 1331 goto fork_out;
1330 1332
@@ -1589,11 +1591,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1589 */ 1591 */
1590 recalc_sigpending(); 1592 recalc_sigpending();
1591 if (signal_pending(current)) { 1593 if (signal_pending(current)) {
1592 spin_unlock(&current->sighand->siglock);
1593 write_unlock_irq(&tasklist_lock);
1594 retval = -ERESTARTNOINTR; 1594 retval = -ERESTARTNOINTR;
1595 goto bad_fork_cancel_cgroup; 1595 goto bad_fork_cancel_cgroup;
1596 } 1596 }
1597 if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) {
1598 retval = -ENOMEM;
1599 goto bad_fork_cancel_cgroup;
1600 }
1597 1601
1598 if (likely(p->pid)) { 1602 if (likely(p->pid)) {
1599 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); 1603 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
@@ -1644,6 +1648,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1644 return p; 1648 return p;
1645 1649
1646bad_fork_cancel_cgroup: 1650bad_fork_cancel_cgroup:
1651 spin_unlock(&current->sighand->siglock);
1652 write_unlock_irq(&tasklist_lock);
1647 cgroup_cancel_fork(p, cgrp_ss_priv); 1653 cgroup_cancel_fork(p, cgrp_ss_priv);
1648bad_fork_free_pid: 1654bad_fork_free_pid:
1649 threadgroup_change_end(current); 1655 threadgroup_change_end(current);
@@ -1700,7 +1706,8 @@ static inline void init_idle_pids(struct pid_link *links)
1700struct task_struct *fork_idle(int cpu) 1706struct task_struct *fork_idle(int cpu)
1701{ 1707{
1702 struct task_struct *task; 1708 struct task_struct *task;
1703 task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0); 1709 task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
1710 cpu_to_node(cpu));
1704 if (!IS_ERR(task)) { 1711 if (!IS_ERR(task)) {
1705 init_idle_pids(task->pids); 1712 init_idle_pids(task->pids);
1706 init_idle(task, cpu); 1713 init_idle(task, cpu);
@@ -1745,7 +1752,7 @@ long _do_fork(unsigned long clone_flags,
1745 } 1752 }
1746 1753
1747 p = copy_process(clone_flags, stack_start, stack_size, 1754 p = copy_process(clone_flags, stack_start, stack_size,
1748 child_tidptr, NULL, trace, tls); 1755 child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
1749 /* 1756 /*
1750 * Do this prior waking up the new thread - the thread pointer 1757 * Do this prior waking up the new thread - the thread pointer
1751 * might get invalid after that point, if the thread exits quickly. 1758 * might get invalid after that point, if the thread exits quickly.
diff --git a/kernel/futex.c b/kernel/futex.c
index e8af73cc51a7..af29863f3349 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2690,7 +2690,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2690{ 2690{
2691 struct hrtimer_sleeper timeout, *to = NULL; 2691 struct hrtimer_sleeper timeout, *to = NULL;
2692 struct rt_mutex_waiter rt_waiter; 2692 struct rt_mutex_waiter rt_waiter;
2693 struct rt_mutex *pi_mutex = NULL;
2694 struct futex_hash_bucket *hb; 2693 struct futex_hash_bucket *hb;
2695 union futex_key key2 = FUTEX_KEY_INIT; 2694 union futex_key key2 = FUTEX_KEY_INIT;
2696 struct futex_q q = futex_q_init; 2695 struct futex_q q = futex_q_init;
@@ -2774,6 +2773,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2774 if (q.pi_state && (q.pi_state->owner != current)) { 2773 if (q.pi_state && (q.pi_state->owner != current)) {
2775 spin_lock(q.lock_ptr); 2774 spin_lock(q.lock_ptr);
2776 ret = fixup_pi_state_owner(uaddr2, &q, current); 2775 ret = fixup_pi_state_owner(uaddr2, &q, current);
2776 if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current)
2777 rt_mutex_unlock(&q.pi_state->pi_mutex);
2777 /* 2778 /*
2778 * Drop the reference to the pi state which 2779 * Drop the reference to the pi state which
2779 * the requeue_pi() code acquired for us. 2780 * the requeue_pi() code acquired for us.
@@ -2782,6 +2783,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2782 spin_unlock(q.lock_ptr); 2783 spin_unlock(q.lock_ptr);
2783 } 2784 }
2784 } else { 2785 } else {
2786 struct rt_mutex *pi_mutex;
2787
2785 /* 2788 /*
2786 * We have been woken up by futex_unlock_pi(), a timeout, or a 2789 * We have been woken up by futex_unlock_pi(), a timeout, or a
2787 * signal. futex_unlock_pi() will not destroy the lock_ptr nor 2790 * signal. futex_unlock_pi() will not destroy the lock_ptr nor
@@ -2805,18 +2808,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2805 if (res) 2808 if (res)
2806 ret = (res < 0) ? res : 0; 2809 ret = (res < 0) ? res : 0;
2807 2810
2811 /*
2812 * If fixup_pi_state_owner() faulted and was unable to handle
2813 * the fault, unlock the rt_mutex and return the fault to
2814 * userspace.
2815 */
2816 if (ret && rt_mutex_owner(pi_mutex) == current)
2817 rt_mutex_unlock(pi_mutex);
2818
2808 /* Unqueue and drop the lock. */ 2819 /* Unqueue and drop the lock. */
2809 unqueue_me_pi(&q); 2820 unqueue_me_pi(&q);
2810 } 2821 }
2811 2822
2812 /* 2823 if (ret == -EINTR) {
2813 * If fixup_pi_state_owner() faulted and was unable to handle the
2814 * fault, unlock the rt_mutex and return the fault to userspace.
2815 */
2816 if (ret == -EFAULT) {
2817 if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
2818 rt_mutex_unlock(pi_mutex);
2819 } else if (ret == -EINTR) {
2820 /* 2824 /*
2821 * We've already been requeued, but cannot restart by calling 2825 * We've already been requeued, but cannot restart by calling
2822 * futex_lock_pi() directly. We could restart this syscall, but 2826 * futex_lock_pi() directly. We could restart this syscall, but
@@ -3199,4 +3203,4 @@ static int __init futex_init(void)
3199 3203
3200 return 0; 3204 return 0;
3201} 3205}
3202__initcall(futex_init); 3206core_initcall(futex_init);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 15206453b12a..e4453d9f788c 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -810,8 +810,8 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
810 if (!desc) 810 if (!desc)
811 return; 811 return;
812 812
813 __irq_do_set_handler(desc, handle, 1, NULL);
814 desc->irq_common_data.handler_data = data; 813 desc->irq_common_data.handler_data = data;
814 __irq_do_set_handler(desc, handle, 1, NULL);
815 815
816 irq_put_desc_busunlock(desc, flags); 816 irq_put_desc_busunlock(desc, flags);
817} 817}
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 6ead200370da..a079ed14f230 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1287,8 +1287,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
1287 ret = __irq_set_trigger(desc, 1287 ret = __irq_set_trigger(desc,
1288 new->flags & IRQF_TRIGGER_MASK); 1288 new->flags & IRQF_TRIGGER_MASK);
1289 1289
1290 if (ret) 1290 if (ret) {
1291 irq_release_resources(desc);
1291 goto out_mask; 1292 goto out_mask;
1293 }
1292 } 1294 }
1293 1295
1294 desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \ 1296 desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d10ab6b9b5e0..695763516908 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -563,7 +563,7 @@ static void kprobe_optimizer(struct work_struct *work)
563} 563}
564 564
565/* Wait for completing optimization and unoptimization */ 565/* Wait for completing optimization and unoptimization */
566static void wait_for_kprobe_optimizer(void) 566void wait_for_kprobe_optimizer(void)
567{ 567{
568 mutex_lock(&kprobe_mutex); 568 mutex_lock(&kprobe_mutex);
569 569
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 9ff173dca1ae..850b255649a2 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -18,6 +18,7 @@
18#include <linux/freezer.h> 18#include <linux/freezer.h>
19#include <linux/ptrace.h> 19#include <linux/ptrace.h>
20#include <linux/uaccess.h> 20#include <linux/uaccess.h>
21#include <linux/cgroup.h>
21#include <trace/events/sched.h> 22#include <trace/events/sched.h>
22 23
23static DEFINE_SPINLOCK(kthread_create_lock); 24static DEFINE_SPINLOCK(kthread_create_lock);
@@ -205,6 +206,7 @@ static int kthread(void *_create)
205 ret = -EINTR; 206 ret = -EINTR;
206 207
207 if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) { 208 if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) {
209 cgroup_kthread_ready();
208 __kthread_parkme(&self); 210 __kthread_parkme(&self);
209 ret = threadfn(data); 211 ret = threadfn(data);
210 } 212 }
@@ -510,6 +512,7 @@ int kthreadd(void *unused)
510 set_mems_allowed(node_states[N_MEMORY]); 512 set_mems_allowed(node_states[N_MEMORY]);
511 513
512 current->flags |= PF_NOFREEZE; 514 current->flags |= PF_NOFREEZE;
515 cgroup_init_kthreadd();
513 516
514 for (;;) { 517 for (;;) {
515 set_current_state(TASK_INTERRUPTIBLE); 518 set_current_state(TASK_INTERRUPTIBLE);
diff --git a/kernel/membarrier.c b/kernel/membarrier.c
index 536c727a56e9..9f9284f37f8d 100644
--- a/kernel/membarrier.c
+++ b/kernel/membarrier.c
@@ -16,6 +16,7 @@
16 16
17#include <linux/syscalls.h> 17#include <linux/syscalls.h>
18#include <linux/membarrier.h> 18#include <linux/membarrier.h>
19#include <linux/tick.h>
19 20
20/* 21/*
21 * Bitmask made from a "or" of all commands within enum membarrier_cmd, 22 * Bitmask made from a "or" of all commands within enum membarrier_cmd,
@@ -51,6 +52,9 @@
51 */ 52 */
52SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) 53SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
53{ 54{
55 /* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
56 if (tick_nohz_full_enabled())
57 return -ENOSYS;
54 if (unlikely(flags)) 58 if (unlikely(flags))
55 return -EINVAL; 59 return -EINVAL;
56 switch (cmd) { 60 switch (cmd) {
diff --git a/kernel/padata.c b/kernel/padata.c
index b38bea9c466a..ecc7b3f452c7 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -189,19 +189,20 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
189 189
190 reorder = &next_queue->reorder; 190 reorder = &next_queue->reorder;
191 191
192 spin_lock(&reorder->lock);
192 if (!list_empty(&reorder->list)) { 193 if (!list_empty(&reorder->list)) {
193 padata = list_entry(reorder->list.next, 194 padata = list_entry(reorder->list.next,
194 struct padata_priv, list); 195 struct padata_priv, list);
195 196
196 spin_lock(&reorder->lock);
197 list_del_init(&padata->list); 197 list_del_init(&padata->list);
198 atomic_dec(&pd->reorder_objects); 198 atomic_dec(&pd->reorder_objects);
199 spin_unlock(&reorder->lock);
200 199
201 pd->processed++; 200 pd->processed++;
202 201
202 spin_unlock(&reorder->lock);
203 goto out; 203 goto out;
204 } 204 }
205 spin_unlock(&reorder->lock);
205 206
206 if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) { 207 if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
207 padata = ERR_PTR(-ENODATA); 208 padata = ERR_PTR(-ENODATA);
@@ -356,7 +357,7 @@ static int padata_setup_cpumasks(struct parallel_data *pd,
356 357
357 cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask); 358 cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
358 if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) { 359 if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
359 free_cpumask_var(pd->cpumask.cbcpu); 360 free_cpumask_var(pd->cpumask.pcpu);
360 return -ENOMEM; 361 return -ENOMEM;
361 } 362 }
362 363
diff --git a/kernel/panic.c b/kernel/panic.c
index 41e2b54f36b5..1d07cf9af849 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -167,7 +167,7 @@ void panic(const char *fmt, ...)
167 * Delay timeout seconds before rebooting the machine. 167 * Delay timeout seconds before rebooting the machine.
168 * We can't use the "normal" timers since we just panicked. 168 * We can't use the "normal" timers since we just panicked.
169 */ 169 */
170 pr_emerg("Rebooting in %d seconds..", panic_timeout); 170 pr_emerg("Rebooting in %d seconds..\n", panic_timeout);
171 171
172 for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) { 172 for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) {
173 touch_nmi_watchdog(); 173 touch_nmi_watchdog();
diff --git a/kernel/pid.c b/kernel/pid.c
index 78b3d9f80d44..b17263be9082 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -526,8 +526,11 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
526 if (!ns) 526 if (!ns)
527 ns = task_active_pid_ns(current); 527 ns = task_active_pid_ns(current);
528 if (likely(pid_alive(task))) { 528 if (likely(pid_alive(task))) {
529 if (type != PIDTYPE_PID) 529 if (type != PIDTYPE_PID) {
530 if (type == __PIDTYPE_TGID)
531 type = PIDTYPE_PID;
530 task = task->group_leader; 532 task = task->group_leader;
533 }
531 nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns); 534 nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
532 } 535 }
533 rcu_read_unlock(); 536 rcu_read_unlock();
@@ -536,12 +539,6 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
536} 539}
537EXPORT_SYMBOL(__task_pid_nr_ns); 540EXPORT_SYMBOL(__task_pid_nr_ns);
538 541
539pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
540{
541 return pid_nr_ns(task_tgid(tsk), ns);
542}
543EXPORT_SYMBOL(task_tgid_nr_ns);
544
545struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) 542struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
546{ 543{
547 return ns_of_pid(task_pid(tsk)); 544 return ns_of_pid(task_pid(tsk));
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a65ba137fd15..567ecc826bc8 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -255,7 +255,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
255 * if reparented. 255 * if reparented.
256 */ 256 */
257 for (;;) { 257 for (;;) {
258 set_current_state(TASK_UNINTERRUPTIBLE); 258 set_current_state(TASK_INTERRUPTIBLE);
259 if (pid_ns->nr_hashed == init_pids) 259 if (pid_ns->nr_hashed == init_pids)
260 break; 260 break;
261 schedule(); 261 schedule();
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 3124cebaec31..797f19e2aaa9 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -647,7 +647,7 @@ static void power_down(void)
647 */ 647 */
648int hibernate(void) 648int hibernate(void)
649{ 649{
650 int error; 650 int error, nr_calls = 0;
651 651
652 if (!hibernation_available()) { 652 if (!hibernation_available()) {
653 pr_debug("PM: Hibernation not available.\n"); 653 pr_debug("PM: Hibernation not available.\n");
@@ -662,9 +662,11 @@ int hibernate(void)
662 } 662 }
663 663
664 pm_prepare_console(); 664 pm_prepare_console();
665 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); 665 error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls);
666 if (error) 666 if (error) {
667 nr_calls--;
667 goto Exit; 668 goto Exit;
669 }
668 670
669 printk(KERN_INFO "PM: Syncing filesystems ... "); 671 printk(KERN_INFO "PM: Syncing filesystems ... ");
670 sys_sync(); 672 sys_sync();
@@ -714,7 +716,7 @@ int hibernate(void)
714 /* Don't bother checking whether freezer_test_done is true */ 716 /* Don't bother checking whether freezer_test_done is true */
715 freezer_test_done = false; 717 freezer_test_done = false;
716 Exit: 718 Exit:
717 pm_notifier_call_chain(PM_POST_HIBERNATION); 719 __pm_notifier_call_chain(PM_POST_HIBERNATION, nr_calls, NULL);
718 pm_restore_console(); 720 pm_restore_console();
719 atomic_inc(&snapshot_device_available); 721 atomic_inc(&snapshot_device_available);
720 Unlock: 722 Unlock:
@@ -740,7 +742,7 @@ int hibernate(void)
740 */ 742 */
741static int software_resume(void) 743static int software_resume(void)
742{ 744{
743 int error; 745 int error, nr_calls = 0;
744 unsigned int flags; 746 unsigned int flags;
745 747
746 /* 748 /*
@@ -827,9 +829,11 @@ static int software_resume(void)
827 } 829 }
828 830
829 pm_prepare_console(); 831 pm_prepare_console();
830 error = pm_notifier_call_chain(PM_RESTORE_PREPARE); 832 error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls);
831 if (error) 833 if (error) {
834 nr_calls--;
832 goto Close_Finish; 835 goto Close_Finish;
836 }
833 837
834 pr_debug("PM: Preparing processes for restore.\n"); 838 pr_debug("PM: Preparing processes for restore.\n");
835 error = freeze_processes(); 839 error = freeze_processes();
@@ -855,7 +859,7 @@ static int software_resume(void)
855 unlock_device_hotplug(); 859 unlock_device_hotplug();
856 thaw_processes(); 860 thaw_processes();
857 Finish: 861 Finish:
858 pm_notifier_call_chain(PM_POST_RESTORE); 862 __pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL);
859 pm_restore_console(); 863 pm_restore_console();
860 atomic_inc(&snapshot_device_available); 864 atomic_inc(&snapshot_device_available);
861 /* For success case, the suspend path will release the lock */ 865 /* For success case, the suspend path will release the lock */
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 27946975eff0..5ea50b1b7595 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -38,12 +38,19 @@ int unregister_pm_notifier(struct notifier_block *nb)
38} 38}
39EXPORT_SYMBOL_GPL(unregister_pm_notifier); 39EXPORT_SYMBOL_GPL(unregister_pm_notifier);
40 40
41int pm_notifier_call_chain(unsigned long val) 41int __pm_notifier_call_chain(unsigned long val, int nr_to_call, int *nr_calls)
42{ 42{
43 int ret = blocking_notifier_call_chain(&pm_chain_head, val, NULL); 43 int ret;
44
45 ret = __blocking_notifier_call_chain(&pm_chain_head, val, NULL,
46 nr_to_call, nr_calls);
44 47
45 return notifier_to_errno(ret); 48 return notifier_to_errno(ret);
46} 49}
50int pm_notifier_call_chain(unsigned long val)
51{
52 return __pm_notifier_call_chain(val, -1, NULL);
53}
47 54
48/* If set, devices may be suspended and resumed asynchronously. */ 55/* If set, devices may be suspended and resumed asynchronously. */
49int pm_async_enabled = 1; 56int pm_async_enabled = 1;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index efe1b3b17c88..51f02ecaf125 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -200,6 +200,8 @@ static inline void suspend_test_finish(const char *label) {}
200 200
201#ifdef CONFIG_PM_SLEEP 201#ifdef CONFIG_PM_SLEEP
202/* kernel/power/main.c */ 202/* kernel/power/main.c */
203extern int __pm_notifier_call_chain(unsigned long val, int nr_to_call,
204 int *nr_calls);
203extern int pm_notifier_call_chain(unsigned long val); 205extern int pm_notifier_call_chain(unsigned long val);
204#endif 206#endif
205 207
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 024411816ccf..58209d8bfc56 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -268,16 +268,18 @@ static int suspend_test(int level)
268 */ 268 */
269static int suspend_prepare(suspend_state_t state) 269static int suspend_prepare(suspend_state_t state)
270{ 270{
271 int error; 271 int error, nr_calls = 0;
272 272
273 if (!sleep_state_supported(state)) 273 if (!sleep_state_supported(state))
274 return -EPERM; 274 return -EPERM;
275 275
276 pm_prepare_console(); 276 pm_prepare_console();
277 277
278 error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); 278 error = __pm_notifier_call_chain(PM_SUSPEND_PREPARE, -1, &nr_calls);
279 if (error) 279 if (error) {
280 nr_calls--;
280 goto Finish; 281 goto Finish;
282 }
281 283
282 trace_suspend_resume(TPS("freeze_processes"), 0, true); 284 trace_suspend_resume(TPS("freeze_processes"), 0, true);
283 error = suspend_freeze_processes(); 285 error = suspend_freeze_processes();
@@ -288,7 +290,7 @@ static int suspend_prepare(suspend_state_t state)
288 suspend_stats.failed_freeze++; 290 suspend_stats.failed_freeze++;
289 dpm_save_failed_step(SUSPEND_FREEZE); 291 dpm_save_failed_step(SUSPEND_FREEZE);
290 Finish: 292 Finish:
291 pm_notifier_call_chain(PM_POST_SUSPEND); 293 __pm_notifier_call_chain(PM_POST_SUSPEND, nr_calls, NULL);
292 pm_restore_console(); 294 pm_restore_console();
293 return error; 295 return error;
294} 296}
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 526e8911460a..35310b627388 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -47,7 +47,7 @@ atomic_t snapshot_device_available = ATOMIC_INIT(1);
47static int snapshot_open(struct inode *inode, struct file *filp) 47static int snapshot_open(struct inode *inode, struct file *filp)
48{ 48{
49 struct snapshot_data *data; 49 struct snapshot_data *data;
50 int error; 50 int error, nr_calls = 0;
51 51
52 if (!hibernation_available()) 52 if (!hibernation_available())
53 return -EPERM; 53 return -EPERM;
@@ -74,9 +74,9 @@ static int snapshot_open(struct inode *inode, struct file *filp)
74 swap_type_of(swsusp_resume_device, 0, NULL) : -1; 74 swap_type_of(swsusp_resume_device, 0, NULL) : -1;
75 data->mode = O_RDONLY; 75 data->mode = O_RDONLY;
76 data->free_bitmaps = false; 76 data->free_bitmaps = false;
77 error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); 77 error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls);
78 if (error) 78 if (error)
79 pm_notifier_call_chain(PM_POST_HIBERNATION); 79 __pm_notifier_call_chain(PM_POST_HIBERNATION, --nr_calls, NULL);
80 } else { 80 } else {
81 /* 81 /*
82 * Resuming. We may need to wait for the image device to 82 * Resuming. We may need to wait for the image device to
@@ -86,13 +86,15 @@ static int snapshot_open(struct inode *inode, struct file *filp)
86 86
87 data->swap = -1; 87 data->swap = -1;
88 data->mode = O_WRONLY; 88 data->mode = O_WRONLY;
89 error = pm_notifier_call_chain(PM_RESTORE_PREPARE); 89 error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls);
90 if (!error) { 90 if (!error) {
91 error = create_basic_memory_bitmaps(); 91 error = create_basic_memory_bitmaps();
92 data->free_bitmaps = !error; 92 data->free_bitmaps = !error;
93 } 93 } else
94 nr_calls--;
95
94 if (error) 96 if (error)
95 pm_notifier_call_chain(PM_POST_RESTORE); 97 __pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL);
96 } 98 }
97 if (error) 99 if (error)
98 atomic_inc(&snapshot_device_available); 100 atomic_inc(&snapshot_device_available);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index e7e586bb2022..1a698158face 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1440,7 +1440,7 @@ static void call_console_drivers(int level,
1440{ 1440{
1441 struct console *con; 1441 struct console *con;
1442 1442
1443 trace_console(text, len); 1443 trace_console_rcuidle(text, len);
1444 1444
1445 if (level >= console_loglevel && !ignore_loglevel) 1445 if (level >= console_loglevel && !ignore_loglevel)
1446 return; 1446 return;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index a46c40bfb5f6..5e2cd1030702 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -28,19 +28,25 @@
28#include <linux/compat.h> 28#include <linux/compat.h>
29 29
30 30
31void __ptrace_link(struct task_struct *child, struct task_struct *new_parent,
32 const struct cred *ptracer_cred)
33{
34 BUG_ON(!list_empty(&child->ptrace_entry));
35 list_add(&child->ptrace_entry, &new_parent->ptraced);
36 child->parent = new_parent;
37 child->ptracer_cred = get_cred(ptracer_cred);
38}
39
31/* 40/*
32 * ptrace a task: make the debugger its new parent and 41 * ptrace a task: make the debugger its new parent and
33 * move it to the ptrace list. 42 * move it to the ptrace list.
34 * 43 *
35 * Must be called with the tasklist lock write-held. 44 * Must be called with the tasklist lock write-held.
36 */ 45 */
37void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) 46static void ptrace_link(struct task_struct *child, struct task_struct *new_parent)
38{ 47{
39 BUG_ON(!list_empty(&child->ptrace_entry));
40 list_add(&child->ptrace_entry, &new_parent->ptraced);
41 child->parent = new_parent;
42 rcu_read_lock(); 48 rcu_read_lock();
43 child->ptracer_cred = get_cred(__task_cred(new_parent)); 49 __ptrace_link(child, new_parent, __task_cred(new_parent));
44 rcu_read_unlock(); 50 rcu_read_unlock();
45} 51}
46 52
@@ -151,11 +157,17 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
151 157
152 WARN_ON(!task->ptrace || task->parent != current); 158 WARN_ON(!task->ptrace || task->parent != current);
153 159
160 /*
161 * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely.
162 * Recheck state under the lock to close this race.
163 */
154 spin_lock_irq(&task->sighand->siglock); 164 spin_lock_irq(&task->sighand->siglock);
155 if (__fatal_signal_pending(task)) 165 if (task->state == __TASK_TRACED) {
156 wake_up_state(task, __TASK_TRACED); 166 if (__fatal_signal_pending(task))
157 else 167 wake_up_state(task, __TASK_TRACED);
158 task->state = TASK_TRACED; 168 else
169 task->state = TASK_TRACED;
170 }
159 spin_unlock_irq(&task->sighand->siglock); 171 spin_unlock_irq(&task->sighand->siglock);
160} 172}
161 173
@@ -347,7 +359,7 @@ static int ptrace_attach(struct task_struct *task, long request,
347 flags |= PT_SEIZED; 359 flags |= PT_SEIZED;
348 task->ptrace = flags; 360 task->ptrace = flags;
349 361
350 __ptrace_link(task, current); 362 ptrace_link(task, current);
351 363
352 /* SEIZE doesn't trap tracee on attach */ 364 /* SEIZE doesn't trap tracee on attach */
353 if (!seize) 365 if (!seize)
@@ -414,7 +426,7 @@ static int ptrace_traceme(void)
414 */ 426 */
415 if (!ret && !(current->real_parent->flags & PF_EXITING)) { 427 if (!ret && !(current->real_parent->flags & PF_EXITING)) {
416 current->ptrace = PT_PTRACED; 428 current->ptrace = PT_PTRACED;
417 __ptrace_link(current, current->real_parent); 429 ptrace_link(current, current->real_parent);
418 } 430 }
419 } 431 }
420 write_unlock_irq(&tasklist_lock); 432 write_unlock_irq(&tasklist_lock);
diff --git a/kernel/resource.c b/kernel/resource.c
index 249b1eb1e6e1..a4a94e700fb9 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -105,16 +105,25 @@ static int r_show(struct seq_file *m, void *v)
105{ 105{
106 struct resource *root = m->private; 106 struct resource *root = m->private;
107 struct resource *r = v, *p; 107 struct resource *r = v, *p;
108 unsigned long long start, end;
108 int width = root->end < 0x10000 ? 4 : 8; 109 int width = root->end < 0x10000 ? 4 : 8;
109 int depth; 110 int depth;
110 111
111 for (depth = 0, p = r; depth < MAX_IORES_LEVEL; depth++, p = p->parent) 112 for (depth = 0, p = r; depth < MAX_IORES_LEVEL; depth++, p = p->parent)
112 if (p->parent == root) 113 if (p->parent == root)
113 break; 114 break;
115
116 if (file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN)) {
117 start = r->start;
118 end = r->end;
119 } else {
120 start = end = 0;
121 }
122
114 seq_printf(m, "%*s%0*llx-%0*llx : %s\n", 123 seq_printf(m, "%*s%0*llx-%0*llx : %s\n",
115 depth * 2, "", 124 depth * 2, "",
116 width, (unsigned long long) r->start, 125 width, start,
117 width, (unsigned long long) r->end, 126 width, end,
118 r->name ? r->name : "<BAD>"); 127 r->name ? r->name : "<BAD>");
119 return 0; 128 return 0;
120} 129}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1df6da0094f0..c2ef34d5d9f1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5786,7 +5786,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5786 walt_set_window_start(rq); 5786 walt_set_window_start(rq);
5787 raw_spin_unlock_irqrestore(&rq->lock, flags); 5787 raw_spin_unlock_irqrestore(&rq->lock, flags);
5788 rq->calc_load_update = calc_load_update; 5788 rq->calc_load_update = calc_load_update;
5789 account_reset_rq(rq);
5790 break; 5789 break;
5791 5790
5792 case CPU_ONLINE: 5791 case CPU_ONLINE:
@@ -6374,6 +6373,9 @@ enum s_alloc {
6374 * Build an iteration mask that can exclude certain CPUs from the upwards 6373 * Build an iteration mask that can exclude certain CPUs from the upwards
6375 * domain traversal. 6374 * domain traversal.
6376 * 6375 *
6376 * Only CPUs that can arrive at this group should be considered to continue
6377 * balancing.
6378 *
6377 * Asymmetric node setups can result in situations where the domain tree is of 6379 * Asymmetric node setups can result in situations where the domain tree is of
6378 * unequal depth, make sure to skip domains that already cover the entire 6380 * unequal depth, make sure to skip domains that already cover the entire
6379 * range. 6381 * range.
@@ -6385,18 +6387,31 @@ enum s_alloc {
6385 */ 6387 */
6386static void build_group_mask(struct sched_domain *sd, struct sched_group *sg) 6388static void build_group_mask(struct sched_domain *sd, struct sched_group *sg)
6387{ 6389{
6388 const struct cpumask *span = sched_domain_span(sd); 6390 const struct cpumask *sg_span = sched_group_cpus(sg);
6389 struct sd_data *sdd = sd->private; 6391 struct sd_data *sdd = sd->private;
6390 struct sched_domain *sibling; 6392 struct sched_domain *sibling;
6391 int i; 6393 int i;
6392 6394
6393 for_each_cpu(i, span) { 6395 for_each_cpu(i, sg_span) {
6394 sibling = *per_cpu_ptr(sdd->sd, i); 6396 sibling = *per_cpu_ptr(sdd->sd, i);
6395 if (!cpumask_test_cpu(i, sched_domain_span(sibling))) 6397
6398 /*
6399 * Can happen in the asymmetric case, where these siblings are
6400 * unused. The mask will not be empty because those CPUs that
6401 * do have the top domain _should_ span the domain.
6402 */
6403 if (!sibling->child)
6404 continue;
6405
6406 /* If we would not end up here, we can't continue from here */
6407 if (!cpumask_equal(sg_span, sched_domain_span(sibling->child)))
6396 continue; 6408 continue;
6397 6409
6398 cpumask_set_cpu(i, sched_group_mask(sg)); 6410 cpumask_set_cpu(i, sched_group_mask(sg));
6399 } 6411 }
6412
6413 /* We must not have empty masks here */
6414 WARN_ON_ONCE(cpumask_empty(sched_group_mask(sg)));
6400} 6415}
6401 6416
6402/* 6417/*
@@ -8572,11 +8587,20 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
8572 if (IS_ERR(tg)) 8587 if (IS_ERR(tg))
8573 return ERR_PTR(-ENOMEM); 8588 return ERR_PTR(-ENOMEM);
8574 8589
8575 sched_online_group(tg, parent);
8576
8577 return &tg->css; 8590 return &tg->css;
8578} 8591}
8579 8592
8593/* Expose task group only after completing cgroup initialization */
8594static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
8595{
8596 struct task_group *tg = css_tg(css);
8597 struct task_group *parent = css_tg(css->parent);
8598
8599 if (parent)
8600 sched_online_group(tg, parent);
8601 return 0;
8602}
8603
8580static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) 8604static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
8581{ 8605{
8582 struct task_group *tg = css_tg(css); 8606 struct task_group *tg = css_tg(css);
@@ -8951,6 +8975,7 @@ static struct cftype cpu_files[] = {
8951 8975
8952struct cgroup_subsys cpu_cgrp_subsys = { 8976struct cgroup_subsys cpu_cgrp_subsys = {
8953 .css_alloc = cpu_cgroup_css_alloc, 8977 .css_alloc = cpu_cgroup_css_alloc,
8978 .css_online = cpu_cgroup_css_online,
8954 .css_released = cpu_cgroup_css_released, 8979 .css_released = cpu_cgroup_css_released,
8955 .css_free = cpu_cgroup_css_free, 8980 .css_free = cpu_cgroup_css_free,
8956 .fork = cpu_cgroup_fork, 8981 .fork = cpu_cgroup_fork,
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 9d9eb50d4059..f10b1cb255b2 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1800,12 +1800,11 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
1800#ifdef CONFIG_SMP 1800#ifdef CONFIG_SMP
1801 if (p->nr_cpus_allowed > 1 && rq->dl.overloaded) 1801 if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
1802 queue_push_tasks(rq); 1802 queue_push_tasks(rq);
1803#else 1803#endif
1804 if (dl_task(rq->curr)) 1804 if (dl_task(rq->curr))
1805 check_preempt_curr_dl(rq, p, 0); 1805 check_preempt_curr_dl(rq, p, 0);
1806 else 1806 else
1807 resched_curr(rq); 1807 resched_curr(rq);
1808#endif
1809 } 1808 }
1810} 1809}
1811 1810
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 30d76a18ae1a..6f353de3f390 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2757,6 +2757,10 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
2757 cfs_rq->load_last_update_time_copy = sa->last_update_time; 2757 cfs_rq->load_last_update_time_copy = sa->last_update_time;
2758#endif 2758#endif
2759 2759
2760 /* Trace CPU load, unless cfs_rq belongs to a non-root task_group */
2761 if (cfs_rq == &rq_of(cfs_rq)->cfs)
2762 trace_sched_load_avg_cpu(cpu_of(rq_of(cfs_rq)), cfs_rq);
2763
2760 return decayed || removed; 2764 return decayed || removed;
2761} 2765}
2762 2766
@@ -2780,7 +2784,6 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
2780 2784
2781 if (entity_is_task(se)) 2785 if (entity_is_task(se))
2782 trace_sched_load_avg_task(task_of(se), &se->avg); 2786 trace_sched_load_avg_task(task_of(se), &se->avg);
2783 trace_sched_load_avg_cpu(cpu, cfs_rq);
2784} 2787}
2785 2788
2786static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) 2789static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -3958,6 +3961,26 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
3958 if (!cfs_bandwidth_used()) 3961 if (!cfs_bandwidth_used())
3959 return; 3962 return;
3960 3963
3964 /* Synchronize hierarchical throttle counter: */
3965 if (unlikely(!cfs_rq->throttle_uptodate)) {
3966 struct rq *rq = rq_of(cfs_rq);
3967 struct cfs_rq *pcfs_rq;
3968 struct task_group *tg;
3969
3970 cfs_rq->throttle_uptodate = 1;
3971
3972 /* Get closest up-to-date node, because leaves go first: */
3973 for (tg = cfs_rq->tg->parent; tg; tg = tg->parent) {
3974 pcfs_rq = tg->cfs_rq[cpu_of(rq)];
3975 if (pcfs_rq->throttle_uptodate)
3976 break;
3977 }
3978 if (tg) {
3979 cfs_rq->throttle_count = pcfs_rq->throttle_count;
3980 cfs_rq->throttled_clock_task = rq_clock_task(rq);
3981 }
3982 }
3983
3961 /* an active group must be handled by the update_curr()->put() path */ 3984 /* an active group must be handled by the update_curr()->put() path */
3962 if (!cfs_rq->runtime_enabled || cfs_rq->curr) 3985 if (!cfs_rq->runtime_enabled || cfs_rq->curr)
3963 return; 3986 return;
@@ -4343,15 +4366,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
4343 4366
4344 /* Don't dequeue parent if it has other entities besides us */ 4367 /* Don't dequeue parent if it has other entities besides us */
4345 if (cfs_rq->load.weight) { 4368 if (cfs_rq->load.weight) {
4369 /* Avoid re-evaluating load for this entity: */
4370 se = parent_entity(se);
4346 /* 4371 /*
4347 * Bias pick_next to pick a task from this cfs_rq, as 4372 * Bias pick_next to pick a task from this cfs_rq, as
4348 * p is sleeping when it is within its sched_slice. 4373 * p is sleeping when it is within its sched_slice.
4349 */ 4374 */
4350 if (task_sleep && parent_entity(se)) 4375 if (task_sleep && se && !throttled_hierarchy(cfs_rq))
4351 set_next_buddy(parent_entity(se)); 4376 set_next_buddy(se);
4352
4353 /* avoid re-evaluating load for this entity */
4354 se = parent_entity(se);
4355 break; 4377 break;
4356 } 4378 }
4357 flags |= DEQUEUE_SLEEP; 4379 flags |= DEQUEUE_SLEEP;
@@ -4916,7 +4938,7 @@ long group_norm_util(struct energy_env *eenv, struct sched_group *sg)
4916} 4938}
4917 4939
4918static int find_new_capacity(struct energy_env *eenv, 4940static int find_new_capacity(struct energy_env *eenv,
4919 const struct sched_group_energy const *sge) 4941 const struct sched_group_energy * const sge)
4920{ 4942{
4921 int idx; 4943 int idx;
4922 unsigned long util = group_max_util(eenv); 4944 unsigned long util = group_max_util(eenv);
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index b0b93fd33af9..f8e8d68ed3fd 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -201,8 +201,9 @@ void calc_load_exit_idle(void)
201 struct rq *this_rq = this_rq(); 201 struct rq *this_rq = this_rq();
202 202
203 /* 203 /*
204 * If we're still before the sample window, we're done. 204 * If we're still before the pending sample window, we're done.
205 */ 205 */
206 this_rq->calc_load_update = calc_load_update;
206 if (time_before(jiffies, this_rq->calc_load_update)) 207 if (time_before(jiffies, this_rq->calc_load_update))
207 return; 208 return;
208 209
@@ -211,7 +212,6 @@ void calc_load_exit_idle(void)
211 * accounted through the nohz accounting, so skip the entire deal and 212 * accounted through the nohz accounting, so skip the entire deal and
212 * sync up for the next window. 213 * sync up for the next window.
213 */ 214 */
214 this_rq->calc_load_update = calc_load_update;
215 if (time_before(jiffies, this_rq->calc_load_update + 10)) 215 if (time_before(jiffies, this_rq->calc_load_update + 10))
216 this_rq->calc_load_update += LOAD_FREQ; 216 this_rq->calc_load_update += LOAD_FREQ;
217} 217}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 8a16cba968c4..541b8494450e 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2235,10 +2235,9 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
2235#ifdef CONFIG_SMP 2235#ifdef CONFIG_SMP
2236 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) 2236 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
2237 queue_push_tasks(rq); 2237 queue_push_tasks(rq);
2238#else 2238#endif /* CONFIG_SMP */
2239 if (p->prio < rq->curr->prio) 2239 if (p->prio < rq->curr->prio)
2240 resched_curr(rq); 2240 resched_curr(rq);
2241#endif /* CONFIG_SMP */
2242 } 2241 }
2243} 2242}
2244 2243
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2f2b959ad244..0f18bcc49025 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -421,7 +421,7 @@ struct cfs_rq {
421 421
422 u64 throttled_clock, throttled_clock_task; 422 u64 throttled_clock, throttled_clock_task;
423 u64 throttled_clock_task_time; 423 u64 throttled_clock_task_time;
424 int throttled, throttle_count; 424 int throttled, throttle_count, throttle_uptodate;
425 struct list_head throttled_list; 425 struct list_head throttled_list;
426#endif /* CONFIG_CFS_BANDWIDTH */ 426#endif /* CONFIG_CFS_BANDWIDTH */
427#endif /* CONFIG_FAIR_GROUP_SCHED */ 427#endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -915,7 +915,7 @@ struct sched_group {
915 915
916 unsigned int group_weight; 916 unsigned int group_weight;
917 struct sched_group_capacity *sgc; 917 struct sched_group_capacity *sgc;
918 const struct sched_group_energy const *sge; 918 const struct sched_group_energy *sge;
919 919
920 /* 920 /*
921 * The CPUs this group covers. 921 * The CPUs this group covers.
@@ -2025,16 +2025,3 @@ static inline u64 irq_time_read(int cpu)
2025} 2025}
2026#endif /* CONFIG_64BIT */ 2026#endif /* CONFIG_64BIT */
2027#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ 2027#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
2028
2029static inline void account_reset_rq(struct rq *rq)
2030{
2031#ifdef CONFIG_IRQ_TIME_ACCOUNTING
2032 rq->prev_irq_time = 0;
2033#endif
2034#ifdef CONFIG_PARAVIRT
2035 rq->prev_steal_time = 0;
2036#endif
2037#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
2038 rq->prev_steal_time_rq = 0;
2039#endif
2040}
diff --git a/kernel/signal.c b/kernel/signal.c
index f3f1f7a972fd..5d50ea899b6d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -346,7 +346,7 @@ static bool task_participate_group_stop(struct task_struct *task)
346 * fresh group stop. Read comment in do_signal_stop() for details. 346 * fresh group stop. Read comment in do_signal_stop() for details.
347 */ 347 */
348 if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) { 348 if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) {
349 sig->flags = SIGNAL_STOP_STOPPED; 349 signal_set_stop_flags(sig, SIGNAL_STOP_STOPPED);
350 return true; 350 return true;
351 } 351 }
352 return false; 352 return false;
@@ -503,7 +503,8 @@ int unhandled_signal(struct task_struct *tsk, int sig)
503 return !tsk->ptrace; 503 return !tsk->ptrace;
504} 504}
505 505
506static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) 506static void collect_signal(int sig, struct sigpending *list, siginfo_t *info,
507 bool *resched_timer)
507{ 508{
508 struct sigqueue *q, *first = NULL; 509 struct sigqueue *q, *first = NULL;
509 510
@@ -525,6 +526,12 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
525still_pending: 526still_pending:
526 list_del_init(&first->list); 527 list_del_init(&first->list);
527 copy_siginfo(info, &first->info); 528 copy_siginfo(info, &first->info);
529
530 *resched_timer =
531 (first->flags & SIGQUEUE_PREALLOC) &&
532 (info->si_code == SI_TIMER) &&
533 (info->si_sys_private);
534
528 __sigqueue_free(first); 535 __sigqueue_free(first);
529 } else { 536 } else {
530 /* 537 /*
@@ -541,12 +548,12 @@ still_pending:
541} 548}
542 549
543static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, 550static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
544 siginfo_t *info) 551 siginfo_t *info, bool *resched_timer)
545{ 552{
546 int sig = next_signal(pending, mask); 553 int sig = next_signal(pending, mask);
547 554
548 if (sig) 555 if (sig)
549 collect_signal(sig, pending, info); 556 collect_signal(sig, pending, info, resched_timer);
550 return sig; 557 return sig;
551} 558}
552 559
@@ -558,15 +565,16 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
558 */ 565 */
559int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) 566int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
560{ 567{
568 bool resched_timer = false;
561 int signr; 569 int signr;
562 570
563 /* We only dequeue private signals from ourselves, we don't let 571 /* We only dequeue private signals from ourselves, we don't let
564 * signalfd steal them 572 * signalfd steal them
565 */ 573 */
566 signr = __dequeue_signal(&tsk->pending, mask, info); 574 signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer);
567 if (!signr) { 575 if (!signr) {
568 signr = __dequeue_signal(&tsk->signal->shared_pending, 576 signr = __dequeue_signal(&tsk->signal->shared_pending,
569 mask, info); 577 mask, info, &resched_timer);
570 /* 578 /*
571 * itimer signal ? 579 * itimer signal ?
572 * 580 *
@@ -611,7 +619,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
611 */ 619 */
612 current->jobctl |= JOBCTL_STOP_DEQUEUED; 620 current->jobctl |= JOBCTL_STOP_DEQUEUED;
613 } 621 }
614 if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) { 622 if (resched_timer) {
615 /* 623 /*
616 * Release the siglock to ensure proper locking order 624 * Release the siglock to ensure proper locking order
617 * of timer locks outside of siglocks. Note, we leave 625 * of timer locks outside of siglocks. Note, we leave
@@ -837,7 +845,7 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
837 * will take ->siglock, notice SIGNAL_CLD_MASK, and 845 * will take ->siglock, notice SIGNAL_CLD_MASK, and
838 * notify its parent. See get_signal_to_deliver(). 846 * notify its parent. See get_signal_to_deliver().
839 */ 847 */
840 signal->flags = why | SIGNAL_STOP_CONTINUED; 848 signal_set_stop_flags(signal, why | SIGNAL_STOP_CONTINUED);
841 signal->group_stop_count = 0; 849 signal->group_stop_count = 0;
842 signal->group_exit_code = 0; 850 signal->group_exit_code = 0;
843 } 851 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index abb795e8a6f1..a098831020f4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -65,6 +65,7 @@
65#include <linux/sched/sysctl.h> 65#include <linux/sched/sysctl.h>
66#include <linux/kexec.h> 66#include <linux/kexec.h>
67#include <linux/bpf.h> 67#include <linux/bpf.h>
68#include <linux/mount.h>
68 69
69#include <asm/uaccess.h> 70#include <asm/uaccess.h>
70#include <asm/processor.h> 71#include <asm/processor.h>
@@ -174,7 +175,7 @@ extern int no_unaligned_warning;
174#define SYSCTL_WRITES_WARN 0 175#define SYSCTL_WRITES_WARN 0
175#define SYSCTL_WRITES_STRICT 1 176#define SYSCTL_WRITES_STRICT 1
176 177
177static int sysctl_writes_strict = SYSCTL_WRITES_WARN; 178static int sysctl_writes_strict = SYSCTL_WRITES_STRICT;
178 179
179static int proc_do_cad_pid(struct ctl_table *table, int write, 180static int proc_do_cad_pid(struct ctl_table *table, int write,
180 void __user *buffer, size_t *lenp, loff_t *ppos); 181 void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -1853,6 +1854,14 @@ static struct ctl_table fs_table[] = {
1853 .mode = 0644, 1854 .mode = 0644,
1854 .proc_handler = proc_doulongvec_minmax, 1855 .proc_handler = proc_doulongvec_minmax,
1855 }, 1856 },
1857 {
1858 .procname = "mount-max",
1859 .data = &sysctl_mount_max,
1860 .maxlen = sizeof(unsigned int),
1861 .mode = 0644,
1862 .proc_handler = proc_dointvec_minmax,
1863 .extra1 = &one,
1864 },
1856 { } 1865 { }
1857}; 1866};
1858 1867
@@ -2162,9 +2171,12 @@ static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
2162 if (write) { 2171 if (write) {
2163 if (*negp) 2172 if (*negp)
2164 return -EINVAL; 2173 return -EINVAL;
2174 if (*lvalp > UINT_MAX)
2175 return -EINVAL;
2165 *valp = *lvalp; 2176 *valp = *lvalp;
2166 } else { 2177 } else {
2167 unsigned int val = *valp; 2178 unsigned int val = *valp;
2179 *negp = false;
2168 *lvalp = (unsigned long)val; 2180 *lvalp = (unsigned long)val;
2169 } 2181 }
2170 return 0; 2182 return 0;
@@ -2518,6 +2530,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
2518 break; 2530 break;
2519 if (neg) 2531 if (neg)
2520 continue; 2532 continue;
2533 val = convmul * val / convdiv;
2521 if ((min && val < *min) || (max && val > *max)) 2534 if ((min && val < *min) || (max && val > *max))
2522 continue; 2535 continue;
2523 *i = val; 2536 *i = val;
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 7fbba635a549..6fcc367ad531 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -339,7 +339,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start)
339{ 339{
340 struct alarm_base *base = &alarm_bases[alarm->type]; 340 struct alarm_base *base = &alarm_bases[alarm->type];
341 341
342 start = ktime_add(start, base->gettime()); 342 start = ktime_add_safe(start, base->gettime());
343 alarm_start(alarm, start); 343 alarm_start(alarm, start);
344} 344}
345EXPORT_SYMBOL_GPL(alarm_start_relative); 345EXPORT_SYMBOL_GPL(alarm_start_relative);
@@ -425,7 +425,7 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
425 overrun++; 425 overrun++;
426 } 426 }
427 427
428 alarm->node.expires = ktime_add(alarm->node.expires, interval); 428 alarm->node.expires = ktime_add_safe(alarm->node.expires, interval);
429 return overrun; 429 return overrun;
430} 430}
431EXPORT_SYMBOL_GPL(alarm_forward); 431EXPORT_SYMBOL_GPL(alarm_forward);
@@ -611,13 +611,22 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
611 611
612 /* start the timer */ 612 /* start the timer */
613 timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval); 613 timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval);
614
615 /*
616 * Rate limit to the tick as a hot fix to prevent DOS. Will be
617 * mopped up later.
618 */
619 if (timr->it.alarm.interval.tv64 &&
620 ktime_to_ns(timr->it.alarm.interval) < TICK_NSEC)
621 timr->it.alarm.interval = ktime_set(0, TICK_NSEC);
622
614 exp = timespec_to_ktime(new_setting->it_value); 623 exp = timespec_to_ktime(new_setting->it_value);
615 /* Convert (if necessary) to absolute time */ 624 /* Convert (if necessary) to absolute time */
616 if (flags != TIMER_ABSTIME) { 625 if (flags != TIMER_ABSTIME) {
617 ktime_t now; 626 ktime_t now;
618 627
619 now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime(); 628 now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime();
620 exp = ktime_add(now, exp); 629 exp = ktime_add_safe(now, exp);
621 } 630 }
622 631
623 alarm_start(&timr->it.alarm.alarmtimer, exp); 632 alarm_start(&timr->it.alarm.alarmtimer, exp);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5fa544f3f560..738f3467d169 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -116,6 +116,26 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
116 tk->offs_boot = ktime_add(tk->offs_boot, delta); 116 tk->offs_boot = ktime_add(tk->offs_boot, delta);
117} 117}
118 118
119/*
120 * tk_clock_read - atomic clocksource read() helper
121 *
122 * This helper is necessary to use in the read paths because, while the
123 * seqlock ensures we don't return a bad value while structures are updated,
124 * it doesn't protect from potential crashes. There is the possibility that
125 * the tkr's clocksource may change between the read reference, and the
126 * clock reference passed to the read function. This can cause crashes if
127 * the wrong clocksource is passed to the wrong read function.
128 * This isn't necessary to use when holding the timekeeper_lock or doing
129 * a read of the fast-timekeeper tkrs (which is protected by its own locking
130 * and update logic).
131 */
132static inline u64 tk_clock_read(struct tk_read_base *tkr)
133{
134 struct clocksource *clock = READ_ONCE(tkr->clock);
135
136 return clock->read(clock);
137}
138
119#ifdef CONFIG_DEBUG_TIMEKEEPING 139#ifdef CONFIG_DEBUG_TIMEKEEPING
120#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */ 140#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
121 141
@@ -173,7 +193,7 @@ static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
173 */ 193 */
174 do { 194 do {
175 seq = read_seqcount_begin(&tk_core.seq); 195 seq = read_seqcount_begin(&tk_core.seq);
176 now = tkr->read(tkr->clock); 196 now = tk_clock_read(tkr);
177 last = tkr->cycle_last; 197 last = tkr->cycle_last;
178 mask = tkr->mask; 198 mask = tkr->mask;
179 max = tkr->clock->max_cycles; 199 max = tkr->clock->max_cycles;
@@ -207,7 +227,7 @@ static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
207 cycle_t cycle_now, delta; 227 cycle_t cycle_now, delta;
208 228
209 /* read clocksource */ 229 /* read clocksource */
210 cycle_now = tkr->read(tkr->clock); 230 cycle_now = tk_clock_read(tkr);
211 231
212 /* calculate the delta since the last update_wall_time */ 232 /* calculate the delta since the last update_wall_time */
213 delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); 233 delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
@@ -235,12 +255,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
235 255
236 old_clock = tk->tkr_mono.clock; 256 old_clock = tk->tkr_mono.clock;
237 tk->tkr_mono.clock = clock; 257 tk->tkr_mono.clock = clock;
238 tk->tkr_mono.read = clock->read;
239 tk->tkr_mono.mask = clock->mask; 258 tk->tkr_mono.mask = clock->mask;
240 tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock); 259 tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
241 260
242 tk->tkr_raw.clock = clock; 261 tk->tkr_raw.clock = clock;
243 tk->tkr_raw.read = clock->read;
244 tk->tkr_raw.mask = clock->mask; 262 tk->tkr_raw.mask = clock->mask;
245 tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last; 263 tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
246 264
@@ -404,7 +422,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
404 422
405 now += timekeeping_delta_to_ns(tkr, 423 now += timekeeping_delta_to_ns(tkr,
406 clocksource_delta( 424 clocksource_delta(
407 tkr->read(tkr->clock), 425 tk_clock_read(tkr),
408 tkr->cycle_last, 426 tkr->cycle_last,
409 tkr->mask)); 427 tkr->mask));
410 } while (read_seqcount_retry(&tkf->seq, seq)); 428 } while (read_seqcount_retry(&tkf->seq, seq));
@@ -461,6 +479,10 @@ static cycle_t dummy_clock_read(struct clocksource *cs)
461 return cycles_at_suspend; 479 return cycles_at_suspend;
462} 480}
463 481
482static struct clocksource dummy_clock = {
483 .read = dummy_clock_read,
484};
485
464/** 486/**
465 * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. 487 * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
466 * @tk: Timekeeper to snapshot. 488 * @tk: Timekeeper to snapshot.
@@ -477,13 +499,13 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
477 struct tk_read_base *tkr = &tk->tkr_mono; 499 struct tk_read_base *tkr = &tk->tkr_mono;
478 500
479 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 501 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
480 cycles_at_suspend = tkr->read(tkr->clock); 502 cycles_at_suspend = tk_clock_read(tkr);
481 tkr_dummy.read = dummy_clock_read; 503 tkr_dummy.clock = &dummy_clock;
482 update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); 504 update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
483 505
484 tkr = &tk->tkr_raw; 506 tkr = &tk->tkr_raw;
485 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 507 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
486 tkr_dummy.read = dummy_clock_read; 508 tkr_dummy.clock = &dummy_clock;
487 update_fast_timekeeper(&tkr_dummy, &tk_fast_raw); 509 update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
488} 510}
489 511
@@ -647,11 +669,10 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
647 */ 669 */
648static void timekeeping_forward_now(struct timekeeper *tk) 670static void timekeeping_forward_now(struct timekeeper *tk)
649{ 671{
650 struct clocksource *clock = tk->tkr_mono.clock;
651 cycle_t cycle_now, delta; 672 cycle_t cycle_now, delta;
652 s64 nsec; 673 s64 nsec;
653 674
654 cycle_now = tk->tkr_mono.read(clock); 675 cycle_now = tk_clock_read(&tk->tkr_mono);
655 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); 676 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
656 tk->tkr_mono.cycle_last = cycle_now; 677 tk->tkr_mono.cycle_last = cycle_now;
657 tk->tkr_raw.cycle_last = cycle_now; 678 tk->tkr_raw.cycle_last = cycle_now;
@@ -1434,7 +1455,7 @@ void timekeeping_resume(void)
1434 * The less preferred source will only be tried if there is no better 1455 * The less preferred source will only be tried if there is no better
1435 * usable source. The rtc part is handled separately in rtc core code. 1456 * usable source. The rtc part is handled separately in rtc core code.
1436 */ 1457 */
1437 cycle_now = tk->tkr_mono.read(clock); 1458 cycle_now = tk_clock_read(&tk->tkr_mono);
1438 if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && 1459 if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
1439 cycle_now > tk->tkr_mono.cycle_last) { 1460 cycle_now > tk->tkr_mono.cycle_last) {
1440 u64 num, max = ULLONG_MAX; 1461 u64 num, max = ULLONG_MAX;
@@ -1829,7 +1850,7 @@ void update_wall_time(void)
1829#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET 1850#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
1830 offset = real_tk->cycle_interval; 1851 offset = real_tk->cycle_interval;
1831#else 1852#else
1832 offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock), 1853 offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
1833 tk->tkr_mono.cycle_last, tk->tkr_mono.mask); 1854 tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
1834#endif 1855#endif
1835 1856
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3f743b147247..eba904bae48c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3535,7 +3535,7 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod)
3535 int exclude_mod = 0; 3535 int exclude_mod = 0;
3536 int found = 0; 3536 int found = 0;
3537 int ret; 3537 int ret;
3538 int clear_filter; 3538 int clear_filter = 0;
3539 3539
3540 if (func) { 3540 if (func) {
3541 func_g.type = filter_parse_regex(func, len, &func_g.search, 3541 func_g.type = filter_parse_regex(func, len, &func_g.search,
@@ -3677,23 +3677,24 @@ static void __enable_ftrace_function_probe(struct ftrace_ops_hash *old_hash)
3677 ftrace_probe_registered = 1; 3677 ftrace_probe_registered = 1;
3678} 3678}
3679 3679
3680static void __disable_ftrace_function_probe(void) 3680static bool __disable_ftrace_function_probe(void)
3681{ 3681{
3682 int i; 3682 int i;
3683 3683
3684 if (!ftrace_probe_registered) 3684 if (!ftrace_probe_registered)
3685 return; 3685 return false;
3686 3686
3687 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { 3687 for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
3688 struct hlist_head *hhd = &ftrace_func_hash[i]; 3688 struct hlist_head *hhd = &ftrace_func_hash[i];
3689 if (hhd->first) 3689 if (hhd->first)
3690 return; 3690 return false;
3691 } 3691 }
3692 3692
3693 /* no more funcs left */ 3693 /* no more funcs left */
3694 ftrace_shutdown(&trace_probe_ops, 0); 3694 ftrace_shutdown(&trace_probe_ops, 0);
3695 3695
3696 ftrace_probe_registered = 0; 3696 ftrace_probe_registered = 0;
3697 return true;
3697} 3698}
3698 3699
3699 3700
@@ -3820,6 +3821,7 @@ static void
3820__unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, 3821__unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
3821 void *data, int flags) 3822 void *data, int flags)
3822{ 3823{
3824 struct ftrace_ops_hash old_hash_ops;
3823 struct ftrace_func_entry *rec_entry; 3825 struct ftrace_func_entry *rec_entry;
3824 struct ftrace_func_probe *entry; 3826 struct ftrace_func_probe *entry;
3825 struct ftrace_func_probe *p; 3827 struct ftrace_func_probe *p;
@@ -3831,6 +3833,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
3831 struct hlist_node *tmp; 3833 struct hlist_node *tmp;
3832 char str[KSYM_SYMBOL_LEN]; 3834 char str[KSYM_SYMBOL_LEN];
3833 int i, ret; 3835 int i, ret;
3836 bool disabled;
3834 3837
3835 if (glob && (strcmp(glob, "*") == 0 || !strlen(glob))) 3838 if (glob && (strcmp(glob, "*") == 0 || !strlen(glob)))
3836 func_g.search = NULL; 3839 func_g.search = NULL;
@@ -3849,6 +3852,10 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
3849 3852
3850 mutex_lock(&trace_probe_ops.func_hash->regex_lock); 3853 mutex_lock(&trace_probe_ops.func_hash->regex_lock);
3851 3854
3855 old_hash_ops.filter_hash = old_hash;
3856 /* Probes only have filters */
3857 old_hash_ops.notrace_hash = NULL;
3858
3852 hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); 3859 hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
3853 if (!hash) 3860 if (!hash)
3854 /* Hmm, should report this somehow */ 3861 /* Hmm, should report this somehow */
@@ -3886,12 +3893,17 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
3886 } 3893 }
3887 } 3894 }
3888 mutex_lock(&ftrace_lock); 3895 mutex_lock(&ftrace_lock);
3889 __disable_ftrace_function_probe(); 3896 disabled = __disable_ftrace_function_probe();
3890 /* 3897 /*
3891 * Remove after the disable is called. Otherwise, if the last 3898 * Remove after the disable is called. Otherwise, if the last
3892 * probe is removed, a null hash means *all enabled*. 3899 * probe is removed, a null hash means *all enabled*.
3893 */ 3900 */
3894 ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); 3901 ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
3902
3903 /* still need to update the function call sites */
3904 if (ftrace_enabled && !disabled)
3905 ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS,
3906 &old_hash_ops);
3895 synchronize_sched(); 3907 synchronize_sched();
3896 if (!ret) 3908 if (!ret)
3897 free_ftrace_hash_rcu(old_hash); 3909 free_ftrace_hash_rcu(old_hash);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index acbb0e73d3a2..1275175b0946 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3440,11 +3440,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
3440int ring_buffer_iter_empty(struct ring_buffer_iter *iter) 3440int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
3441{ 3441{
3442 struct ring_buffer_per_cpu *cpu_buffer; 3442 struct ring_buffer_per_cpu *cpu_buffer;
3443 struct buffer_page *reader;
3444 struct buffer_page *head_page;
3445 struct buffer_page *commit_page;
3446 unsigned commit;
3443 3447
3444 cpu_buffer = iter->cpu_buffer; 3448 cpu_buffer = iter->cpu_buffer;
3445 3449
3446 return iter->head_page == cpu_buffer->commit_page && 3450 /* Remember, trace recording is off when iterator is in use */
3447 iter->head == rb_commit_index(cpu_buffer); 3451 reader = cpu_buffer->reader_page;
3452 head_page = cpu_buffer->head_page;
3453 commit_page = cpu_buffer->commit_page;
3454 commit = rb_page_commit(commit_page);
3455
3456 return ((iter->head_page == commit_page && iter->head == commit) ||
3457 (iter->head_page == reader && commit_page == head_page &&
3458 head_page->read == commit &&
3459 iter->head == rb_page_commit(cpu_buffer->reader_page)));
3448} 3460}
3449EXPORT_SYMBOL_GPL(ring_buffer_iter_empty); 3461EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
3450 3462
@@ -4875,9 +4887,9 @@ static __init int test_ringbuffer(void)
4875 rb_data[cpu].cnt = cpu; 4887 rb_data[cpu].cnt = cpu;
4876 rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu], 4888 rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
4877 "rbtester/%d", cpu); 4889 "rbtester/%d", cpu);
4878 if (WARN_ON(!rb_threads[cpu])) { 4890 if (WARN_ON(IS_ERR(rb_threads[cpu]))) {
4879 pr_cont("FAILED\n"); 4891 pr_cont("FAILED\n");
4880 ret = -1; 4892 ret = PTR_ERR(rb_threads[cpu]);
4881 goto out_free; 4893 goto out_free;
4882 } 4894 }
4883 4895
@@ -4887,9 +4899,9 @@ static __init int test_ringbuffer(void)
4887 4899
4888 /* Now create the rb hammer! */ 4900 /* Now create the rb hammer! */
4889 rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer"); 4901 rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
4890 if (WARN_ON(!rb_hammer)) { 4902 if (WARN_ON(IS_ERR(rb_hammer))) {
4891 pr_cont("FAILED\n"); 4903 pr_cont("FAILED\n");
4892 ret = -1; 4904 ret = PTR_ERR(rb_hammer);
4893 goto out_free; 4905 goto out_free;
4894 } 4906 }
4895 4907
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 293af3346c8c..70f519e8489e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1681,7 +1681,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1681 TRACE_FLAG_IRQS_NOSUPPORT | 1681 TRACE_FLAG_IRQS_NOSUPPORT |
1682#endif 1682#endif
1683 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | 1683 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1684 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | 1684 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1685 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | 1685 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1686 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); 1686 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1687} 1687}
@@ -6150,11 +6150,13 @@ ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6150 return ret; 6150 return ret;
6151 6151
6152 out_reg: 6152 out_reg:
6153 ret = register_ftrace_function_probe(glob, ops, count); 6153 ret = alloc_snapshot(&global_trace);
6154 if (ret < 0)
6155 goto out;
6154 6156
6155 if (ret >= 0) 6157 ret = register_ftrace_function_probe(glob, ops, count);
6156 alloc_snapshot(&global_trace);
6157 6158
6159 out:
6158 return ret < 0 ? ret : 0; 6160 return ret < 0 ? ret : 0;
6159} 6161}
6160 6162
@@ -6825,6 +6827,7 @@ static int instance_rmdir(const char *name)
6825 } 6827 }
6826 kfree(tr->topts); 6828 kfree(tr->topts);
6827 6829
6830 free_cpumask_var(tr->tracing_cpumask);
6828 kfree(tr->name); 6831 kfree(tr->name);
6829 kfree(tr); 6832 kfree(tr);
6830 6833
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c9956440d0e6..e9092a0247bf 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -659,30 +659,25 @@ static int create_trace_kprobe(int argc, char **argv)
659 pr_info("Probe point is not specified.\n"); 659 pr_info("Probe point is not specified.\n");
660 return -EINVAL; 660 return -EINVAL;
661 } 661 }
662 if (isdigit(argv[1][0])) { 662
663 if (is_return) { 663 /* try to parse an address. if that fails, try to read the
664 pr_info("Return probe point must be a symbol.\n"); 664 * input as a symbol. */
665 return -EINVAL; 665 if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
666 }
667 /* an address specified */
668 ret = kstrtoul(&argv[1][0], 0, (unsigned long *)&addr);
669 if (ret) {
670 pr_info("Failed to parse address.\n");
671 return ret;
672 }
673 } else {
674 /* a symbol specified */ 666 /* a symbol specified */
675 symbol = argv[1]; 667 symbol = argv[1];
676 /* TODO: support .init module functions */ 668 /* TODO: support .init module functions */
677 ret = traceprobe_split_symbol_offset(symbol, &offset); 669 ret = traceprobe_split_symbol_offset(symbol, &offset);
678 if (ret) { 670 if (ret) {
679 pr_info("Failed to parse symbol.\n"); 671 pr_info("Failed to parse either an address or a symbol.\n");
680 return ret; 672 return ret;
681 } 673 }
682 if (offset && is_return) { 674 if (offset && is_return) {
683 pr_info("Return probe must be used without offset.\n"); 675 pr_info("Return probe must be used without offset.\n");
684 return -EINVAL; 676 return -EINVAL;
685 } 677 }
678 } else if (is_return) {
679 pr_info("Return probe point must be a symbol.\n");
680 return -EINVAL;
686 } 681 }
687 argc -= 2; argv += 2; 682 argc -= 2; argv += 2;
688 683
@@ -1471,6 +1466,11 @@ static __init int kprobe_trace_self_tests_init(void)
1471 1466
1472end: 1467end:
1473 release_all_trace_kprobes(); 1468 release_all_trace_kprobes();
1469 /*
1470 * Wait for the optimizer work to finish. Otherwise it might fiddle
1471 * with probes in already freed __init text.
1472 */
1473 wait_for_kprobe_optimizer();
1474 if (warn) 1474 if (warn)
1475 pr_cont("NG: Some tests are failed. Please check them.\n"); 1475 pr_cont("NG: Some tests are failed. Please check them.\n");
1476 else 1476 else
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2c2f971f3e75..23231237f2e2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3647,8 +3647,12 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
3647 return -EINVAL; 3647 return -EINVAL;
3648 3648
3649 /* creating multiple pwqs breaks ordering guarantee */ 3649 /* creating multiple pwqs breaks ordering guarantee */
3650 if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))) 3650 if (!list_empty(&wq->pwqs)) {
3651 return -EINVAL; 3651 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
3652 return -EINVAL;
3653
3654 wq->flags &= ~__WQ_ORDERED;
3655 }
3652 3656
3653 ctx = apply_wqattrs_prepare(wq, attrs); 3657 ctx = apply_wqattrs_prepare(wq, attrs);
3654 3658
@@ -3834,6 +3838,16 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
3834 struct workqueue_struct *wq; 3838 struct workqueue_struct *wq;
3835 struct pool_workqueue *pwq; 3839 struct pool_workqueue *pwq;
3836 3840
3841 /*
3842 * Unbound && max_active == 1 used to imply ordered, which is no
3843 * longer the case on NUMA machines due to per-node pools. While
3844 * alloc_ordered_workqueue() is the right way to create an ordered
3845 * workqueue, keep the previous behavior to avoid subtle breakages
3846 * on NUMA.
3847 */
3848 if ((flags & WQ_UNBOUND) && max_active == 1)
3849 flags |= __WQ_ORDERED;
3850
3837 /* see the comment above the definition of WQ_POWER_EFFICIENT */ 3851 /* see the comment above the definition of WQ_POWER_EFFICIENT */
3838 if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient) 3852 if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
3839 flags |= WQ_UNBOUND; 3853 flags |= WQ_UNBOUND;
@@ -4022,13 +4036,14 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
4022 struct pool_workqueue *pwq; 4036 struct pool_workqueue *pwq;
4023 4037
4024 /* disallow meddling with max_active for ordered workqueues */ 4038 /* disallow meddling with max_active for ordered workqueues */
4025 if (WARN_ON(wq->flags & __WQ_ORDERED)) 4039 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4026 return; 4040 return;
4027 4041
4028 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name); 4042 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
4029 4043
4030 mutex_lock(&wq->mutex); 4044 mutex_lock(&wq->mutex);
4031 4045
4046 wq->flags &= ~__WQ_ORDERED;
4032 wq->saved_max_active = max_active; 4047 wq->saved_max_active = max_active;
4033 4048
4034 for_each_pwq(pwq, wq) 4049 for_each_pwq(pwq, wq)
@@ -5154,7 +5169,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
5154 * attributes breaks ordering guarantee. Disallow exposing ordered 5169 * attributes breaks ordering guarantee. Disallow exposing ordered
5155 * workqueues. 5170 * workqueues.
5156 */ 5171 */
5157 if (WARN_ON(wq->flags & __WQ_ORDERED)) 5172 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
5158 return -EINVAL; 5173 return -EINVAL;
5159 5174
5160 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL); 5175 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);