]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - android-sdk/kernel-video.git/blob - mm/mprotect.c
mm/rmap, migration: Make rmap_walk_anon() and try_to_unmap_anon() more scalable
[android-sdk/kernel-video.git] / mm / mprotect.c
1 /*
2  *  mm/mprotect.c
3  *
4  *  (C) Copyright 1994 Linus Torvalds
5  *  (C) Copyright 2002 Christoph Hellwig
6  *
7  *  Address space accounting code       <alan@lxorguk.ukuu.org.uk>
8  *  (C) Copyright 2002 Red Hat Inc, All Rights Reserved
9  */
11 #include <linux/mm.h>
12 #include <linux/hugetlb.h>
13 #include <linux/shm.h>
14 #include <linux/mman.h>
15 #include <linux/fs.h>
16 #include <linux/highmem.h>
17 #include <linux/security.h>
18 #include <linux/mempolicy.h>
19 #include <linux/personality.h>
20 #include <linux/syscalls.h>
21 #include <linux/swap.h>
22 #include <linux/swapops.h>
23 #include <linux/mmu_notifier.h>
24 #include <linux/migrate.h>
25 #include <linux/perf_event.h>
26 #include <asm/uaccess.h>
27 #include <asm/pgtable.h>
28 #include <asm/cacheflush.h>
29 #include <asm/tlbflush.h>
31 #ifndef pgprot_modify
32 static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
33 {
34         return newprot;
35 }
36 #endif
38 static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
39                 unsigned long addr, unsigned long end, pgprot_t newprot,
40                 int dirty_accountable, int prot_numa, bool *ret_all_same_node)
41 {
42         struct mm_struct *mm = vma->vm_mm;
43         pte_t *pte, oldpte;
44         spinlock_t *ptl;
45         unsigned long pages = 0;
46         bool all_same_node = true;
47         int last_nid = -1;
49         pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
50         arch_enter_lazy_mmu_mode();
51         do {
52                 oldpte = *pte;
53                 if (pte_present(oldpte)) {
54                         pte_t ptent;
55                         bool updated = false;
57                         ptent = ptep_modify_prot_start(mm, addr, pte);
58                         if (!prot_numa) {
59                                 ptent = pte_modify(ptent, newprot);
60                                 updated = true;
61                         } else {
62                                 struct page *page;
64                                 page = vm_normal_page(vma, addr, oldpte);
65                                 if (page) {
66                                         int this_nid = page_to_nid(page);
67                                         if (last_nid == -1)
68                                                 last_nid = this_nid;
69                                         if (last_nid != this_nid)
70                                                 all_same_node = false;
72                                         /* only check non-shared pages */
73                                         if (!pte_numa(oldpte) &&
74                                             page_mapcount(page) == 1) {
75                                                 ptent = pte_mknuma(ptent);
76                                                 updated = true;
77                                         }
78                                 }
79                         }
81                         /*
82                          * Avoid taking write faults for pages we know to be
83                          * dirty.
84                          */
85                         if (dirty_accountable && pte_dirty(ptent)) {
86                                 ptent = pte_mkwrite(ptent);
87                                 updated = true;
88                         }
90                         if (updated)
91                                 pages++;
92                         ptep_modify_prot_commit(mm, addr, pte, ptent);
93                 } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
94                         swp_entry_t entry = pte_to_swp_entry(oldpte);
96                         if (is_write_migration_entry(entry)) {
97                                 /*
98                                  * A protection check is difficult so
99                                  * just be safe and disable write
100                                  */
101                                 make_migration_entry_read(&entry);
102                                 set_pte_at(mm, addr, pte,
103                                         swp_entry_to_pte(entry));
104                         }
105                         pages++;
106                 }
107         } while (pte++, addr += PAGE_SIZE, addr != end);
108         arch_leave_lazy_mmu_mode();
109         pte_unmap_unlock(pte - 1, ptl);
111         *ret_all_same_node = all_same_node;
112         return pages;
115 #ifdef CONFIG_NUMA_BALANCING
116 static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
117                 pmd_t *pmd)
119         spin_lock(&mm->page_table_lock);
120         set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd));
121         spin_unlock(&mm->page_table_lock);
123 #else
124 static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
125                 pmd_t *pmd)
127         BUG();
129 #endif /* CONFIG_NUMA_BALANCING */
131 static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud,
132                 unsigned long addr, unsigned long end, pgprot_t newprot,
133                 int dirty_accountable, int prot_numa)
135         pmd_t *pmd;
136         unsigned long next;
137         unsigned long pages = 0;
138         bool all_same_node;
140         pmd = pmd_offset(pud, addr);
141         do {
142                 next = pmd_addr_end(addr, end);
143                 if (pmd_trans_huge(*pmd)) {
144                         if (next - addr != HPAGE_PMD_SIZE)
145                                 split_huge_page_pmd(vma->vm_mm, pmd);
146                         else if (change_huge_pmd(vma, pmd, addr, newprot, prot_numa)) {
147                                 pages += HPAGE_PMD_NR;
148                                 continue;
149                         }
150                         /* fall through */
151                 }
152                 if (pmd_none_or_clear_bad(pmd))
153                         continue;
154                 pages += change_pte_range(vma, pmd, addr, next, newprot,
155                                  dirty_accountable, prot_numa, &all_same_node);
157                 /*
158                  * If we are changing protections for NUMA hinting faults then
159                  * set pmd_numa if the examined pages were all on the same
160                  * node. This allows a regular PMD to be handled as one fault
161                  * and effectively batches the taking of the PTL
162                  */
163                 if (prot_numa && all_same_node)
164                         change_pmd_protnuma(vma->vm_mm, addr, pmd);
165         } while (pmd++, addr = next, addr != end);
167         return pages;
170 static inline unsigned long change_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
171                 unsigned long addr, unsigned long end, pgprot_t newprot,
172                 int dirty_accountable, int prot_numa)
174         pud_t *pud;
175         unsigned long next;
176         unsigned long pages = 0;
178         pud = pud_offset(pgd, addr);
179         do {
180                 next = pud_addr_end(addr, end);
181                 if (pud_none_or_clear_bad(pud))
182                         continue;
183                 pages += change_pmd_range(vma, pud, addr, next, newprot,
184                                  dirty_accountable, prot_numa);
185         } while (pud++, addr = next, addr != end);
187         return pages;
190 static unsigned long change_protection_range(struct vm_area_struct *vma,
191                 unsigned long addr, unsigned long end, pgprot_t newprot,
192                 int dirty_accountable, int prot_numa)
194         struct mm_struct *mm = vma->vm_mm;
195         pgd_t *pgd;
196         unsigned long next;
197         unsigned long start = addr;
198         unsigned long pages = 0;
200         BUG_ON(addr >= end);
201         pgd = pgd_offset(mm, addr);
202         flush_cache_range(vma, addr, end);
203         do {
204                 next = pgd_addr_end(addr, end);
205                 if (pgd_none_or_clear_bad(pgd))
206                         continue;
207                 pages += change_pud_range(vma, pgd, addr, next, newprot,
208                                  dirty_accountable, prot_numa);
209         } while (pgd++, addr = next, addr != end);
211         /* Only flush the TLB if we actually modified any entries: */
212         if (pages)
213                 flush_tlb_range(vma, start, end);
215         return pages;
218 unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
219                        unsigned long end, pgprot_t newprot,
220                        int dirty_accountable, int prot_numa)
222         struct mm_struct *mm = vma->vm_mm;
223         unsigned long pages;
225         mmu_notifier_invalidate_range_start(mm, start, end);
226         if (is_vm_hugetlb_page(vma))
227                 pages = hugetlb_change_protection(vma, start, end, newprot);
228         else
229                 pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);
230         mmu_notifier_invalidate_range_end(mm, start, end);
232         return pages;
235 int
236 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
237         unsigned long start, unsigned long end, unsigned long newflags)
239         struct mm_struct *mm = vma->vm_mm;
240         unsigned long oldflags = vma->vm_flags;
241         long nrpages = (end - start) >> PAGE_SHIFT;
242         unsigned long charged = 0;
243         pgoff_t pgoff;
244         int error;
245         int dirty_accountable = 0;
247         if (newflags == oldflags) {
248                 *pprev = vma;
249                 return 0;
250         }
252         /*
253          * If we make a private mapping writable we increase our commit;
254          * but (without finer accounting) cannot reduce our commit if we
255          * make it unwritable again. hugetlb mapping were accounted for
256          * even if read-only so there is no need to account for them here
257          */
258         if (newflags & VM_WRITE) {
259                 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB|
260                                                 VM_SHARED|VM_NORESERVE))) {
261                         charged = nrpages;
262                         if (security_vm_enough_memory_mm(mm, charged))
263                                 return -ENOMEM;
264                         newflags |= VM_ACCOUNT;
265                 }
266         }
268         /*
269          * First try to merge with previous and/or next vma.
270          */
271         pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
272         *pprev = vma_merge(mm, *pprev, start, end, newflags,
273                         vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
274         if (*pprev) {
275                 vma = *pprev;
276                 goto success;
277         }
279         *pprev = vma;
281         if (start != vma->vm_start) {
282                 error = split_vma(mm, vma, start, 1);
283                 if (error)
284                         goto fail;
285         }
287         if (end != vma->vm_end) {
288                 error = split_vma(mm, vma, end, 0);
289                 if (error)
290                         goto fail;
291         }
293 success:
294         /*
295          * vm_flags and vm_page_prot are protected by the mmap_sem
296          * held in write mode.
297          */
298         vma->vm_flags = newflags;
299         vma->vm_page_prot = pgprot_modify(vma->vm_page_prot,
300                                           vm_get_page_prot(newflags));
302         if (vma_wants_writenotify(vma)) {
303                 vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED);
304                 dirty_accountable = 1;
305         }
307         change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable, 0);
309         vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
310         vm_stat_account(mm, newflags, vma->vm_file, nrpages);
311         perf_event_mmap(vma);
312         return 0;
314 fail:
315         vm_unacct_memory(charged);
316         return error;
319 SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
320                 unsigned long, prot)
322         unsigned long vm_flags, nstart, end, tmp, reqprot;
323         struct vm_area_struct *vma, *prev;
324         int error = -EINVAL;
325         const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
326         prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
327         if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
328                 return -EINVAL;
330         if (start & ~PAGE_MASK)
331                 return -EINVAL;
332         if (!len)
333                 return 0;
334         len = PAGE_ALIGN(len);
335         end = start + len;
336         if (end <= start)
337                 return -ENOMEM;
338         if (!arch_validate_prot(prot))
339                 return -EINVAL;
341         reqprot = prot;
342         /*
343          * Does the application expect PROT_READ to imply PROT_EXEC:
344          */
345         if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
346                 prot |= PROT_EXEC;
348         vm_flags = calc_vm_prot_bits(prot);
350         down_write(&current->mm->mmap_sem);
352         vma = find_vma(current->mm, start);
353         error = -ENOMEM;
354         if (!vma)
355                 goto out;
356         prev = vma->vm_prev;
357         if (unlikely(grows & PROT_GROWSDOWN)) {
358                 if (vma->vm_start >= end)
359                         goto out;
360                 start = vma->vm_start;
361                 error = -EINVAL;
362                 if (!(vma->vm_flags & VM_GROWSDOWN))
363                         goto out;
364         }
365         else {
366                 if (vma->vm_start > start)
367                         goto out;
368                 if (unlikely(grows & PROT_GROWSUP)) {
369                         end = vma->vm_end;
370                         error = -EINVAL;
371                         if (!(vma->vm_flags & VM_GROWSUP))
372                                 goto out;
373                 }
374         }
375         if (start > vma->vm_start)
376                 prev = vma;
378         for (nstart = start ; ; ) {
379                 unsigned long newflags;
381                 /* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
383                 newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
385                 /* newflags >> 4 shift VM_MAY% in place of VM_% */
386                 if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
387                         error = -EACCES;
388                         goto out;
389                 }
391                 error = security_file_mprotect(vma, reqprot, prot);
392                 if (error)
393                         goto out;
395                 tmp = vma->vm_end;
396                 if (tmp > end)
397                         tmp = end;
398                 error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
399                 if (error)
400                         goto out;
401                 nstart = tmp;
403                 if (nstart < prev->vm_end)
404                         nstart = prev->vm_end;
405                 if (nstart >= end)
406                         goto out;
408                 vma = prev->vm_next;
409                 if (!vma || vma->vm_start != nstart) {
410                         error = -ENOMEM;
411                         goto out;
412                 }
413         }
414 out:
415         up_write(&current->mm->mmap_sem);
416         return error;