// SPDX-License-Identifier: GPL-2.01/*2* Helper functions for KVM guest address space mapping code3*4* Copyright IBM Corp. 2007, 20255*/67#include <linux/export.h>8#include <linux/mm_types.h>9#include <linux/mmap_lock.h>10#include <linux/mm.h>11#include <linux/hugetlb.h>12#include <linux/swap.h>13#include <linux/swapops.h>14#include <linux/pagewalk.h>15#include <linux/ksm.h>16#include <asm/gmap_helpers.h>17#include <asm/pgtable.h>1819/**20* ptep_zap_swap_entry() - discard a swap entry.21* @mm: the mm22* @entry: the swap entry that needs to be zapped23*24* Discards the given swap entry. If the swap entry was an actual swap25* entry (and not a migration entry, for example), the actual swapped26* page is also discarded from swap.27*/28static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)29{30if (!non_swap_entry(entry))31dec_mm_counter(mm, MM_SWAPENTS);32else if (is_migration_entry(entry))33dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));34free_swap_and_cache(entry);35}3637/**38* gmap_helper_zap_one_page() - discard a page if it was swapped.39* @mm: the mm40* @vmaddr: the userspace virtual address that needs to be discarded41*42* If the given address maps to a swap entry, discard it.43*44* Context: needs to be called while holding the mmap lock.45*/46void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)47{48struct vm_area_struct *vma;49spinlock_t *ptl;50pgste_t pgste;51pte_t *ptep;5253mmap_assert_locked(mm);5455/* Find the vm address for the guest address */56vma = vma_lookup(mm, vmaddr);57if (!vma || is_vm_hugetlb_page(vma))58return;5960/* Get pointer to the page table entry */61ptep = get_locked_pte(mm, vmaddr, &ptl);62if (unlikely(!ptep))63return;64if (pte_swap(*ptep)) {65preempt_disable();66pgste = pgste_get_lock(ptep);6768ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));69pte_clear(mm, vmaddr, ptep);7071pgste_set_unlock(ptep, pgste);72preempt_enable();73}74pte_unmap_unlock(ptep, ptl);75}76EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);7778/**79* gmap_helper_discard() - discard user pages in the given range80* @mm: the mm81* @vmaddr: starting userspace address82* @end: end address (first address outside the range)83*84* All userpace pages in the range [@vamddr, @end) are discarded and unmapped.85*86* Context: needs to be called while holding the mmap lock.87*/88void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)89{90struct vm_area_struct *vma;9192mmap_assert_locked(mm);9394while (vmaddr < end) {95vma = find_vma_intersection(mm, vmaddr, end);96if (!vma)97return;98if (!is_vm_hugetlb_page(vma))99zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);100vmaddr = vma->vm_end;101}102}103EXPORT_SYMBOL_GPL(gmap_helper_discard);104105static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,106unsigned long end, struct mm_walk *walk)107{108unsigned long *found_addr = walk->private;109110/* Return 1 of the page is a zeropage. */111if (is_zero_pfn(pte_pfn(*pte))) {112/*113* Shared zeropage in e.g., a FS DAX mapping? We cannot do the114* right thing and likely don't care: FAULT_FLAG_UNSHARE115* currently only works in COW mappings, which is also where116* mm_forbids_zeropage() is checked.117*/118if (!is_cow_mapping(walk->vma->vm_flags))119return -EFAULT;120121*found_addr = addr;122return 1;123}124return 0;125}126127static const struct mm_walk_ops find_zeropage_ops = {128.pte_entry = find_zeropage_pte_entry,129.walk_lock = PGWALK_WRLOCK,130};131132/** __gmap_helper_unshare_zeropages() - unshare all shared zeropages133* @mm: the mm whose zero pages are to be unshared134*135* Unshare all shared zeropages, replacing them by anonymous pages. Note that136* we cannot simply zap all shared zeropages, because this could later137* trigger unexpected userfaultfd missing events.138*139* This must be called after mm->context.allow_cow_sharing was140* set to 0, to avoid future mappings of shared zeropages.141*142* mm contracts with s390, that even if mm were to remove a page table,143* and racing with walk_page_range_vma() calling pte_offset_map_lock()144* would fail, it will never insert a page table containing empty zero145* pages once mm_forbids_zeropage(mm) i.e.146* mm->context.allow_cow_sharing is set to 0.147*/148static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)149{150struct vm_area_struct *vma;151VMA_ITERATOR(vmi, mm, 0);152unsigned long addr;153vm_fault_t fault;154int rc;155156for_each_vma(vmi, vma) {157/*158* We could only look at COW mappings, but it's more future159* proof to catch unexpected zeropages in other mappings and160* fail.161*/162if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))163continue;164addr = vma->vm_start;165166retry:167rc = walk_page_range_vma(vma, addr, vma->vm_end,168&find_zeropage_ops, &addr);169if (rc < 0)170return rc;171else if (!rc)172continue;173174/* addr was updated by find_zeropage_pte_entry() */175fault = handle_mm_fault(vma, addr,176FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,177NULL);178if (fault & VM_FAULT_OOM)179return -ENOMEM;180/*181* See break_ksm(): even after handle_mm_fault() returned 0, we182* must start the lookup from the current address, because183* handle_mm_fault() may back out if there's any difficulty.184*185* VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but186* maybe they could trigger in the future on concurrent187* truncation. In that case, the shared zeropage would be gone188* and we can simply retry and make progress.189*/190cond_resched();191goto retry;192}193194return 0;195}196197/**198* gmap_helper_disable_cow_sharing() - disable all COW sharing199*200* Disable most COW-sharing of memory pages for the whole process:201* (1) Disable KSM and unmerge/unshare any KSM pages.202* (2) Disallow shared zeropages and unshare any zerpages that are mapped.203*204* Not that we currently don't bother with COW-shared pages that are shared205* with parent/child processes due to fork().206*/207int gmap_helper_disable_cow_sharing(void)208{209struct mm_struct *mm = current->mm;210int rc;211212mmap_assert_write_locked(mm);213214if (!mm->context.allow_cow_sharing)215return 0;216217mm->context.allow_cow_sharing = 0;218219/* Replace all shared zeropages by anonymous pages. */220rc = __gmap_helper_unshare_zeropages(mm);221/*222* Make sure to disable KSM (if enabled for the whole process or223* individual VMAs). Note that nothing currently hinders user space224* from re-enabling it.225*/226if (!rc)227rc = ksm_disable(mm);228if (rc)229mm->context.allow_cow_sharing = 1;230return rc;231}232EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);233234235