mirror of
https://github.com/torvalds/linux.git
synced 2026-04-18 06:44:00 -04:00
Merge tag 'mm-stable-2026-04-13-21-45' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - "maple_tree: Replace big node with maple copy" (Liam Howlett) Mainly prepararatory work for ongoing development but it does reduce stack usage and is an improvement. - "mm, swap: swap table phase III: remove swap_map" (Kairui Song) Offers memory savings by removing the static swap_map. It also yields some CPU savings and implements several cleanups. - "mm: memfd_luo: preserve file seals" (Pratyush Yadav) File seal preservation to LUO's memfd code - "mm: zswap: add per-memcg stat for incompressible pages" (Jiayuan Chen) Additional userspace stats reportng to zswap - "arch, mm: consolidate empty_zero_page" (Mike Rapoport) Some cleanups for our handling of ZERO_PAGE() and zero_pfn - "mm/kmemleak: Improve scan_should_stop() implementation" (Zhongqiu Han) A robustness improvement and some cleanups in the kmemleak code - "Improve khugepaged scan logic" (Vernon Yang) Improve khugepaged scan logic and reduce CPU consumption by prioritizing scanning tasks that access memory frequently - "Make KHO Stateless" (Jason Miu) Simplify Kexec Handover by transitioning KHO from an xarray-based metadata tracking system with serialization to a radix tree data structure that can be passed directly to the next kernel - "mm: vmscan: add PID and cgroup ID to vmscan tracepoints" (Thomas Ballasi and Steven Rostedt) Enhance vmscan's tracepointing - "mm: arch/shstk: Common shadow stack mapping helper and VM_NOHUGEPAGE" (Catalin Marinas) Cleanup for the shadow stack code: remove per-arch code in favour of a generic implementation - "Fix KASAN support for KHO restored vmalloc regions" (Pasha Tatashin) Fix a WARN() which can be emitted the KHO restores a vmalloc area - "mm: Remove stray references to pagevec" (Tal Zussman) Several cleanups, mainly udpating references to "struct pagevec", which became folio_batch three years ago - "mm: Eliminate fake head pages from vmemmap optimization" (Kiryl Shutsemau) Simplify the HugeTLB vmemmap optimization (HVO) by changing how tail pages encode their relationship to the head page - "mm/damon/core: improve DAMOS quota efficiency for core layer filters" (SeongJae Park) Improve two problematic behaviors of DAMOS that makes it less efficient when core layer filters are used - "mm/damon: strictly respect min_nr_regions" (SeongJae Park) Improve DAMON usability by extending the treatment of the min_nr_regions user-settable parameter - "mm/page_alloc: pcp locking cleanup" (Vlastimil Babka) The proper fix for a previously hotfixed SMP=n issue. Code simplifications and cleanups ensued - "mm: cleanups around unmapping / zapping" (David Hildenbrand) A bunch of cleanups around unmapping and zapping. Mostly simplifications, code movements, documentation and renaming of zapping functions - "support batched checking of the young flag for MGLRU" (Baolin Wang) Batched checking of the young flag for MGLRU. It's part cleanups; one benchmark shows large performance benefits for arm64 - "memcg: obj stock and slab stat caching cleanups" (Johannes Weiner) memcg cleanup and robustness improvements - "Allow order zero pages in page reporting" (Yuvraj Sakshith) Enhance free page reporting - it is presently and undesirably order-0 pages when reporting free memory. - "mm: vma flag tweaks" (Lorenzo Stoakes) Cleanup work following from the recent conversion of the VMA flags to a bitmap - "mm/damon: add optional debugging-purpose sanity checks" (SeongJae Park) Add some more developer-facing debug checks into DAMON core - "mm/damon: test and document power-of-2 min_region_sz requirement" (SeongJae Park) An additional DAMON kunit test and makes some adjustments to the addr_unit parameter handling - "mm/damon/core: make passed_sample_intervals comparisons overflow-safe" (SeongJae Park) Fix a hard-to-hit time overflow issue in DAMON core - "mm/damon: improve/fixup/update ratio calculation, test and documentation" (SeongJae Park) A batch of misc/minor improvements and fixups for DAMON - "mm: move vma_(kernel|mmu)_pagesize() out of hugetlb.c" (David Hildenbrand) Fix a possible issue with dax-device when CONFIG_HUGETLB=n. Some code movement was required. - "zram: recompression cleanups and tweaks" (Sergey Senozhatsky) A somewhat random mix of fixups, recompression cleanups and improvements in the zram code - "mm/damon: support multiple goal-based quota tuning algorithms" (SeongJae Park) Extend DAMOS quotas goal auto-tuning to support multiple tuning algorithms that users can select - "mm: thp: reduce unnecessary start_stop_khugepaged()" (Breno Leitao) Fix the khugpaged sysfs handling so we no longer spam the logs with reams of junk when starting/stopping khugepaged - "mm: improve map count checks" (Lorenzo Stoakes) Provide some cleanups and slight fixes in the mremap, mmap and vma code - "mm/damon: support addr_unit on default monitoring targets for modules" (SeongJae Park) Extend the use of DAMON core's addr_unit tunable - "mm: khugepaged cleanups and mTHP prerequisites" (Nico Pache) Cleanups to khugepaged and is a base for Nico's planned khugepaged mTHP support - "mm: memory hot(un)plug and SPARSEMEM cleanups" (David Hildenbrand) Code movement and cleanups in the memhotplug and sparsemem code - "mm: remove CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE and cleanup CONFIG_MIGRATION" (David Hildenbrand) Rationalize some memhotplug Kconfig support - "change young flag check functions to return bool" (Baolin Wang) Cleanups to change all young flag check functions to return bool - "mm/damon/sysfs: fix memory leak and NULL dereference issues" (Josh Law and SeongJae Park) Fix a few potential DAMON bugs - "mm/vma: convert vm_flags_t to vma_flags_t in vma code" (Lorenzo Stoakes) Convert a lot of the existing use of the legacy vm_flags_t data type to the new vma_flags_t type which replaces it. Mainly in the vma code. - "mm: expand mmap_prepare functionality and usage" (Lorenzo Stoakes) Expand the mmap_prepare functionality, which is intended to replace the deprecated f_op->mmap hook which has been the source of bugs and security issues for some time. Cleanups, documentation, extension of mmap_prepare into filesystem drivers - "mm/huge_memory: refactor zap_huge_pmd()" (Lorenzo Stoakes) Simplify and clean up zap_huge_pmd(). Additional cleanups around vm_normal_folio_pmd() and the softleaf functionality are performed. * tag 'mm-stable-2026-04-13-21-45' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (369 commits) mm: fix deferred split queue races during migration mm/khugepaged: fix issue with tracking lock mm/huge_memory: add and use has_deposited_pgtable() mm/huge_memory: add and use normal_or_softleaf_folio_pmd() mm: add softleaf_is_valid_pmd_entry(), pmd_to_softleaf_folio() mm/huge_memory: separate out the folio part of zap_huge_pmd() mm/huge_memory: use mm instead of tlb->mm mm/huge_memory: remove unnecessary sanity checks mm/huge_memory: deduplicate zap deposited table call mm/huge_memory: remove unnecessary VM_BUG_ON_PAGE() mm/huge_memory: add a common exit path to zap_huge_pmd() mm/huge_memory: handle buggy PMD entry in zap_huge_pmd() mm/huge_memory: have zap_huge_pmd return a boolean, add kdoc mm/huge: avoid big else branch in zap_huge_pmd() mm/huge_memory: simplify vma_is_specal_huge() mm: on remap assert that input range within the proposed VMA mm: add mmap_action_map_kernel_pages[_full]() uio: replace deprecated mmap hook with mmap_prepare in uio_info drivers: hv: vmbus: replace deprecated mmap hook with mmap_prepare mm: allow handling of stacked mmap_prepare hooks in more drivers ...
This commit is contained in:
@@ -316,6 +316,12 @@ Contact: SeongJae Park <sj@kernel.org>
|
|||||||
Description: Writing to and reading from this file sets and gets the path
|
Description: Writing to and reading from this file sets and gets the path
|
||||||
parameter of the goal.
|
parameter of the goal.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/goal_tuner
|
||||||
|
Date: Mar 2026
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the
|
||||||
|
goal-based effective quota auto-tuning algorithm to use.
|
||||||
|
|
||||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/sz_permil
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/sz_permil
|
||||||
Date: Mar 2022
|
Date: Mar 2022
|
||||||
Contact: SeongJae Park <sj@kernel.org>
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
|||||||
@@ -462,7 +462,7 @@ know it via /sys/block/zram0/bd_stat's 3rd column.
|
|||||||
recompression
|
recompression
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
With CONFIG_ZRAM_MULTI_COMP, zram can recompress pages using alternative
|
With `CONFIG_ZRAM_MULTI_COMP`, zram can recompress pages using alternative
|
||||||
(secondary) compression algorithms. The basic idea is that alternative
|
(secondary) compression algorithms. The basic idea is that alternative
|
||||||
compression algorithm can provide better compression ratio at a price of
|
compression algorithm can provide better compression ratio at a price of
|
||||||
(potentially) slower compression/decompression speeds. Alternative compression
|
(potentially) slower compression/decompression speeds. Alternative compression
|
||||||
@@ -471,7 +471,7 @@ that default algorithm failed to compress). Another application is idle pages
|
|||||||
recompression - pages that are cold and sit in the memory can be recompressed
|
recompression - pages that are cold and sit in the memory can be recompressed
|
||||||
using more effective algorithm and, hence, reduce zsmalloc memory usage.
|
using more effective algorithm and, hence, reduce zsmalloc memory usage.
|
||||||
|
|
||||||
With CONFIG_ZRAM_MULTI_COMP, zram supports up to 4 compression algorithms:
|
With `CONFIG_ZRAM_MULTI_COMP`, zram supports up to 4 compression algorithms:
|
||||||
one primary and up to 3 secondary ones. Primary zram compressor is explained
|
one primary and up to 3 secondary ones. Primary zram compressor is explained
|
||||||
in "3) Select compression algorithm", secondary algorithms are configured
|
in "3) Select compression algorithm", secondary algorithms are configured
|
||||||
using recomp_algorithm device attribute.
|
using recomp_algorithm device attribute.
|
||||||
@@ -495,56 +495,43 @@ configuration:::
|
|||||||
#select deflate recompression algorithm, priority 2
|
#select deflate recompression algorithm, priority 2
|
||||||
echo "algo=deflate priority=2" > /sys/block/zramX/recomp_algorithm
|
echo "algo=deflate priority=2" > /sys/block/zramX/recomp_algorithm
|
||||||
|
|
||||||
Another device attribute that CONFIG_ZRAM_MULTI_COMP enables is recompress,
|
Another device attribute that `CONFIG_ZRAM_MULTI_COMP` enables is `recompress`,
|
||||||
which controls recompression.
|
which controls recompression.
|
||||||
|
|
||||||
Examples:::
|
Examples:::
|
||||||
|
|
||||||
#IDLE pages recompression is activated by `idle` mode
|
#IDLE pages recompression is activated by `idle` mode
|
||||||
echo "type=idle" > /sys/block/zramX/recompress
|
echo "type=idle priority=1" > /sys/block/zramX/recompress
|
||||||
|
|
||||||
#HUGE pages recompression is activated by `huge` mode
|
#HUGE pages recompression is activated by `huge` mode
|
||||||
echo "type=huge" > /sys/block/zram0/recompress
|
echo "type=huge priority=2" > /sys/block/zram0/recompress
|
||||||
|
|
||||||
#HUGE_IDLE pages recompression is activated by `huge_idle` mode
|
#HUGE_IDLE pages recompression is activated by `huge_idle` mode
|
||||||
echo "type=huge_idle" > /sys/block/zramX/recompress
|
echo "type=huge_idle priority=1" > /sys/block/zramX/recompress
|
||||||
|
|
||||||
The number of idle pages can be significant, so user-space can pass a size
|
The number of idle pages can be significant, so user-space can pass a size
|
||||||
threshold (in bytes) to the recompress knob: zram will recompress only pages
|
threshold (in bytes) to the recompress knob: zram will recompress only pages
|
||||||
of equal or greater size:::
|
of equal or greater size:::
|
||||||
|
|
||||||
#recompress all pages larger than 3000 bytes
|
#recompress all pages larger than 3000 bytes
|
||||||
echo "threshold=3000" > /sys/block/zramX/recompress
|
echo "threshold=3000 priority=1" > /sys/block/zramX/recompress
|
||||||
|
|
||||||
#recompress idle pages larger than 2000 bytes
|
#recompress idle pages larger than 2000 bytes
|
||||||
echo "type=idle threshold=2000" > /sys/block/zramX/recompress
|
echo "type=idle threshold=2000 priority=1" > \
|
||||||
|
/sys/block/zramX/recompress
|
||||||
|
|
||||||
It is also possible to limit the number of pages zram re-compression will
|
It is also possible to limit the number of pages zram re-compression will
|
||||||
attempt to recompress:::
|
attempt to recompress:::
|
||||||
|
|
||||||
echo "type=huge_idle max_pages=42" > /sys/block/zramX/recompress
|
echo "type=huge_idle priority=1 max_pages=42" > \
|
||||||
|
/sys/block/zramX/recompress
|
||||||
|
|
||||||
During re-compression for every page, that matches re-compression criteria,
|
It is advised to always specify `priority` parameter. While it is also
|
||||||
ZRAM iterates the list of registered alternative compression algorithms in
|
possible to specify `algo` parameter, so that `zram` will use algorithm's
|
||||||
order of their priorities. ZRAM stops either when re-compression was
|
name to determine the priority, it is not recommended, since it can lead to
|
||||||
successful (re-compressed object is smaller in size than the original one)
|
unexpected results when the same algorithm is configured with different
|
||||||
and matches re-compression criteria (e.g. size threshold) or when there are
|
priorities (e.g. different parameters). `priority` is the only way to
|
||||||
no secondary algorithms left to try. If none of the secondary algorithms can
|
guarantee that the expected algorithm will be used.
|
||||||
successfully re-compressed the page such a page is marked as incompressible,
|
|
||||||
so ZRAM will not attempt to re-compress it in the future.
|
|
||||||
|
|
||||||
This re-compression behaviour, when it iterates through the list of
|
|
||||||
registered compression algorithms, increases our chances of finding the
|
|
||||||
algorithm that successfully compresses a particular page. Sometimes, however,
|
|
||||||
it is convenient (and sometimes even necessary) to limit recompression to
|
|
||||||
only one particular algorithm so that it will not try any other algorithms.
|
|
||||||
This can be achieved by providing a `algo` or `priority` parameter:::
|
|
||||||
|
|
||||||
#use zstd algorithm only (if registered)
|
|
||||||
echo "type=huge algo=zstd" > /sys/block/zramX/recompress
|
|
||||||
|
|
||||||
#use zstd algorithm only (if zstd was registered under priority 1)
|
|
||||||
echo "type=huge priority=1" > /sys/block/zramX/recompress
|
|
||||||
|
|
||||||
memory tracking
|
memory tracking
|
||||||
===============
|
===============
|
||||||
|
|||||||
@@ -1734,6 +1734,11 @@ The following nested keys are defined.
|
|||||||
zswpwb
|
zswpwb
|
||||||
Number of pages written from zswap to swap.
|
Number of pages written from zswap to swap.
|
||||||
|
|
||||||
|
zswap_incomp
|
||||||
|
Number of incompressible pages currently stored in zswap
|
||||||
|
without compression. These pages could not be compressed to
|
||||||
|
a size smaller than PAGE_SIZE, so they are stored as-is.
|
||||||
|
|
||||||
thp_fault_alloc (npn)
|
thp_fault_alloc (npn)
|
||||||
Number of transparent hugepages which were allocated to satisfy
|
Number of transparent hugepages which were allocated to satisfy
|
||||||
a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE
|
a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ nodemask_t
|
|||||||
The size of a nodemask_t type. Used to compute the number of online
|
The size of a nodemask_t type. Used to compute the number of online
|
||||||
nodes.
|
nodes.
|
||||||
|
|
||||||
(page, flags|_refcount|mapping|lru|_mapcount|private|compound_order|compound_head)
|
(page, flags|_refcount|mapping|lru|_mapcount|private|compound_order|compound_info)
|
||||||
----------------------------------------------------------------------------------
|
----------------------------------------------------------------------------------
|
||||||
|
|
||||||
User-space tools compute their values based on the offset of these
|
User-space tools compute their values based on the offset of these
|
||||||
|
|||||||
@@ -2970,6 +2970,12 @@ Kernel parameters
|
|||||||
Format: <bool>
|
Format: <bool>
|
||||||
Default: CONFIG_KFENCE_DEFERRABLE
|
Default: CONFIG_KFENCE_DEFERRABLE
|
||||||
|
|
||||||
|
kfence.fault= [MM,KFENCE] Controls the behavior when a KFENCE
|
||||||
|
error is detected.
|
||||||
|
report - print the error report and continue (default).
|
||||||
|
oops - print the error report and oops.
|
||||||
|
panic - print the error report and panic.
|
||||||
|
|
||||||
kfence.sample_interval=
|
kfence.sample_interval=
|
||||||
[MM,KFENCE] KFENCE's sample interval in milliseconds.
|
[MM,KFENCE] KFENCE's sample interval in milliseconds.
|
||||||
Format: <unsigned integer>
|
Format: <unsigned integer>
|
||||||
|
|||||||
@@ -91,8 +91,8 @@ increases and decreases the effective level of the quota aiming the LRU
|
|||||||
|
|
||||||
Disabled by default.
|
Disabled by default.
|
||||||
|
|
||||||
Auto-tune monitoring intervals
|
autotune_monitoring_intervals
|
||||||
------------------------------
|
-----------------------------
|
||||||
|
|
||||||
If this parameter is set as ``Y``, DAMON_LRU_SORT automatically tunes DAMON's
|
If this parameter is set as ``Y``, DAMON_LRU_SORT automatically tunes DAMON's
|
||||||
sampling and aggregation intervals. The auto-tuning aims to capture meaningful
|
sampling and aggregation intervals. The auto-tuning aims to capture meaningful
|
||||||
@@ -221,6 +221,10 @@ But, setting this too high could result in increased monitoring overhead.
|
|||||||
Please refer to the DAMON documentation (:doc:`usage`) for more detail. 10 by
|
Please refer to the DAMON documentation (:doc:`usage`) for more detail. 10 by
|
||||||
default.
|
default.
|
||||||
|
|
||||||
|
Note that this must be 3 or higher. Please refer to the :ref:`Monitoring
|
||||||
|
<damon_design_monitoring>` section of the design document for the rationale
|
||||||
|
behind this lower bound.
|
||||||
|
|
||||||
max_nr_regions
|
max_nr_regions
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
@@ -351,3 +355,8 @@ the LRU-list based page granularity reclamation. ::
|
|||||||
# echo 400 > wmarks_mid
|
# echo 400 > wmarks_mid
|
||||||
# echo 200 > wmarks_low
|
# echo 200 > wmarks_low
|
||||||
# echo Y > enabled
|
# echo Y > enabled
|
||||||
|
|
||||||
|
Note that this module (damon_lru_sort) cannot run simultaneously with other
|
||||||
|
DAMON-based special-purpose modules. Refer to :ref:`DAMON design special
|
||||||
|
purpose modules exclusivity <damon_design_special_purpose_modules_exclusivity>`
|
||||||
|
for more details.
|
||||||
|
|||||||
@@ -204,6 +204,10 @@ monitoring. This can be used to set lower-bound of the monitoring quality.
|
|||||||
But, setting this too high could result in increased monitoring overhead.
|
But, setting this too high could result in increased monitoring overhead.
|
||||||
Please refer to the DAMON documentation (:doc:`usage`) for more detail.
|
Please refer to the DAMON documentation (:doc:`usage`) for more detail.
|
||||||
|
|
||||||
|
Note that this must be 3 or higher. Please refer to the :ref:`Monitoring
|
||||||
|
<damon_design_monitoring>` section of the design document for the rationale
|
||||||
|
behind this lower bound.
|
||||||
|
|
||||||
max_nr_regions
|
max_nr_regions
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
@@ -318,6 +322,11 @@ granularity reclamation. ::
|
|||||||
# echo 200 > wmarks_low
|
# echo 200 > wmarks_low
|
||||||
# echo Y > enabled
|
# echo Y > enabled
|
||||||
|
|
||||||
|
Note that this module (damon_reclaim) cannot run simultaneously with other
|
||||||
|
DAMON-based special-purpose modules. Refer to :ref:`DAMON design special
|
||||||
|
purpose modules exclusivity <damon_design_special_purpose_modules_exclusivity>`
|
||||||
|
for more details.
|
||||||
|
|
||||||
.. [1] https://research.google/pubs/pub48551/
|
.. [1] https://research.google/pubs/pub48551/
|
||||||
.. [2] https://lwn.net/Articles/787611/
|
.. [2] https://lwn.net/Articles/787611/
|
||||||
.. [3] https://www.kernel.org/doc/html/latest/mm/free_page_reporting.html
|
.. [3] https://www.kernel.org/doc/html/latest/mm/free_page_reporting.html
|
||||||
|
|||||||
@@ -45,6 +45,11 @@ You can enable DAMON_STAT by setting the value of this parameter as ``Y``.
|
|||||||
Setting it as ``N`` disables DAMON_STAT. The default value is set by
|
Setting it as ``N`` disables DAMON_STAT. The default value is set by
|
||||||
``CONFIG_DAMON_STAT_ENABLED_DEFAULT`` build config option.
|
``CONFIG_DAMON_STAT_ENABLED_DEFAULT`` build config option.
|
||||||
|
|
||||||
|
Note that this module (damon_stat) cannot run simultaneously with other
|
||||||
|
DAMON-based special-purpose modules. Refer to :ref:`DAMON design special
|
||||||
|
purpose modules exclusivity <damon_design_special_purpose_modules_exclusivity>`
|
||||||
|
for more details.
|
||||||
|
|
||||||
.. _damon_stat_aggr_interval_us:
|
.. _damon_stat_aggr_interval_us:
|
||||||
|
|
||||||
aggr_interval_us
|
aggr_interval_us
|
||||||
|
|||||||
@@ -83,7 +83,7 @@ comma (",").
|
|||||||
│ │ │ │ │ │ │ │ sz/min,max
|
│ │ │ │ │ │ │ │ sz/min,max
|
||||||
│ │ │ │ │ │ │ │ nr_accesses/min,max
|
│ │ │ │ │ │ │ │ nr_accesses/min,max
|
||||||
│ │ │ │ │ │ │ │ age/min,max
|
│ │ │ │ │ │ │ │ age/min,max
|
||||||
│ │ │ │ │ │ │ :ref:`quotas <sysfs_quotas>`/ms,bytes,reset_interval_ms,effective_bytes
|
│ │ │ │ │ │ │ :ref:`quotas <sysfs_quotas>`/ms,bytes,reset_interval_ms,effective_bytes,goal_tuner
|
||||||
│ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
|
│ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
|
||||||
│ │ │ │ │ │ │ │ :ref:`goals <sysfs_schemes_quota_goals>`/nr_goals
|
│ │ │ │ │ │ │ │ :ref:`goals <sysfs_schemes_quota_goals>`/nr_goals
|
||||||
│ │ │ │ │ │ │ │ │ 0/target_metric,target_value,current_value,nid,path
|
│ │ │ │ │ │ │ │ │ 0/target_metric,target_value,current_value,nid,path
|
||||||
@@ -377,9 +377,9 @@ schemes/<N>/quotas/
|
|||||||
The directory for the :ref:`quotas <damon_design_damos_quotas>` of the given
|
The directory for the :ref:`quotas <damon_design_damos_quotas>` of the given
|
||||||
DAMON-based operation scheme.
|
DAMON-based operation scheme.
|
||||||
|
|
||||||
Under ``quotas`` directory, four files (``ms``, ``bytes``,
|
Under ``quotas`` directory, five files (``ms``, ``bytes``,
|
||||||
``reset_interval_ms``, ``effective_bytes``) and two directories (``weights`` and
|
``reset_interval_ms``, ``effective_bytes`` and ``goal_tuner``) and two
|
||||||
``goals``) exist.
|
directories (``weights`` and ``goals``) exist.
|
||||||
|
|
||||||
You can set the ``time quota`` in milliseconds, ``size quota`` in bytes, and
|
You can set the ``time quota`` in milliseconds, ``size quota`` in bytes, and
|
||||||
``reset interval`` in milliseconds by writing the values to the three files,
|
``reset interval`` in milliseconds by writing the values to the three files,
|
||||||
@@ -390,6 +390,14 @@ apply the action to only up to ``bytes`` bytes of memory regions within the
|
|||||||
quota limits unless at least one :ref:`goal <sysfs_schemes_quota_goals>` is
|
quota limits unless at least one :ref:`goal <sysfs_schemes_quota_goals>` is
|
||||||
set.
|
set.
|
||||||
|
|
||||||
|
You can set the goal-based effective quota auto-tuning algorithm to use, by
|
||||||
|
writing the algorithm name to ``goal_tuner`` file. Reading the file returns
|
||||||
|
the currently selected tuner algorithm. Refer to the design documentation of
|
||||||
|
:ref:`automatic quota tuning goals <damon_design_damos_quotas_auto_tuning>` for
|
||||||
|
the background design of the feature and the name of the selectable algorithms.
|
||||||
|
Refer to :ref:`goals directory <sysfs_schemes_quota_goals>` for the goals
|
||||||
|
setup.
|
||||||
|
|
||||||
The time quota is internally transformed to a size quota. Between the
|
The time quota is internally transformed to a size quota. Between the
|
||||||
transformed size quota and user-specified size quota, smaller one is applied.
|
transformed size quota and user-specified size quota, smaller one is applied.
|
||||||
Based on the user-specified :ref:`goal <sysfs_schemes_quota_goals>`, the
|
Based on the user-specified :ref:`goal <sysfs_schemes_quota_goals>`, the
|
||||||
|
|||||||
@@ -28,20 +28,10 @@ per NUMA node scratch regions on boot.
|
|||||||
Perform a KHO kexec
|
Perform a KHO kexec
|
||||||
===================
|
===================
|
||||||
|
|
||||||
First, before you perform a KHO kexec, you need to move the system into
|
To perform a KHO kexec, load the target payload and kexec into it. It
|
||||||
the :ref:`KHO finalization phase <kho-finalization-phase>` ::
|
is important that you use the ``-s`` parameter to use the in-kernel
|
||||||
|
kexec file loader, as user space kexec tooling currently has no
|
||||||
$ echo 1 > /sys/kernel/debug/kho/out/finalize
|
support for KHO with the user space based file loader ::
|
||||||
|
|
||||||
After this command, the KHO FDT is available in
|
|
||||||
``/sys/kernel/debug/kho/out/fdt``. Other subsystems may also register
|
|
||||||
their own preserved sub FDTs under
|
|
||||||
``/sys/kernel/debug/kho/out/sub_fdts/``.
|
|
||||||
|
|
||||||
Next, load the target payload and kexec into it. It is important that you
|
|
||||||
use the ``-s`` parameter to use the in-kernel kexec file loader, as user
|
|
||||||
space kexec tooling currently has no support for KHO with the user space
|
|
||||||
based file loader ::
|
|
||||||
|
|
||||||
# kexec -l /path/to/bzImage --initrd /path/to/initrd -s
|
# kexec -l /path/to/bzImage --initrd /path/to/initrd -s
|
||||||
# kexec -e
|
# kexec -e
|
||||||
@@ -52,40 +42,19 @@ For example, if you used ``reserve_mem`` command line parameter to create
|
|||||||
an early memory reservation, the new kernel will have that memory at the
|
an early memory reservation, the new kernel will have that memory at the
|
||||||
same physical address as the old kernel.
|
same physical address as the old kernel.
|
||||||
|
|
||||||
Abort a KHO exec
|
|
||||||
================
|
|
||||||
|
|
||||||
You can move the system out of KHO finalization phase again by calling ::
|
|
||||||
|
|
||||||
$ echo 0 > /sys/kernel/debug/kho/out/active
|
|
||||||
|
|
||||||
After this command, the KHO FDT is no longer available in
|
|
||||||
``/sys/kernel/debug/kho/out/fdt``.
|
|
||||||
|
|
||||||
debugfs Interfaces
|
debugfs Interfaces
|
||||||
==================
|
==================
|
||||||
|
|
||||||
|
These debugfs interfaces are available when the kernel is compiled with
|
||||||
|
``CONFIG_KEXEC_HANDOVER_DEBUGFS`` enabled.
|
||||||
|
|
||||||
Currently KHO creates the following debugfs interfaces. Notice that these
|
Currently KHO creates the following debugfs interfaces. Notice that these
|
||||||
interfaces may change in the future. They will be moved to sysfs once KHO is
|
interfaces may change in the future. They will be moved to sysfs once KHO is
|
||||||
stabilized.
|
stabilized.
|
||||||
|
|
||||||
``/sys/kernel/debug/kho/out/finalize``
|
|
||||||
Kexec HandOver (KHO) allows Linux to transition the state of
|
|
||||||
compatible drivers into the next kexec'ed kernel. To do so,
|
|
||||||
device drivers will instruct KHO to preserve memory regions,
|
|
||||||
which could contain serialized kernel state.
|
|
||||||
While the state is serialized, they are unable to perform
|
|
||||||
any modifications to state that was serialized, such as
|
|
||||||
handed over memory allocations.
|
|
||||||
|
|
||||||
When this file contains "1", the system is in the transition
|
|
||||||
state. When contains "0", it is not. To switch between the
|
|
||||||
two states, echo the respective number into this file.
|
|
||||||
|
|
||||||
``/sys/kernel/debug/kho/out/fdt``
|
``/sys/kernel/debug/kho/out/fdt``
|
||||||
When KHO state tree is finalized, the kernel exposes the
|
The kernel exposes the flattened device tree blob that carries its
|
||||||
flattened device tree blob that carries its current KHO
|
current KHO state in this file. Kexec user space tooling can use this
|
||||||
state in this file. Kexec user space tooling can use this
|
|
||||||
as input file for the KHO payload image.
|
as input file for the KHO payload image.
|
||||||
|
|
||||||
``/sys/kernel/debug/kho/out/scratch_len``
|
``/sys/kernel/debug/kho/out/scratch_len``
|
||||||
@@ -100,8 +69,8 @@ stabilized.
|
|||||||
it should place its payload images.
|
it should place its payload images.
|
||||||
|
|
||||||
``/sys/kernel/debug/kho/out/sub_fdts/``
|
``/sys/kernel/debug/kho/out/sub_fdts/``
|
||||||
In the KHO finalization phase, KHO producers register their own
|
KHO producers can register their own FDT or another binary blob under
|
||||||
FDT blob under this directory.
|
this directory.
|
||||||
|
|
||||||
``/sys/kernel/debug/kho/in/fdt``
|
``/sys/kernel/debug/kho/in/fdt``
|
||||||
When the kernel was booted with Kexec HandOver (KHO),
|
When the kernel was booted with Kexec HandOver (KHO),
|
||||||
|
|||||||
@@ -217,7 +217,7 @@ MPOL_PREFERRED
|
|||||||
the MPOL_F_STATIC_NODES or MPOL_F_RELATIVE_NODES flags
|
the MPOL_F_STATIC_NODES or MPOL_F_RELATIVE_NODES flags
|
||||||
described below.
|
described below.
|
||||||
|
|
||||||
MPOL_INTERLEAVED
|
MPOL_INTERLEAVE
|
||||||
This mode specifies that page allocations be interleaved, on a
|
This mode specifies that page allocations be interleaved, on a
|
||||||
page granularity, across the nodes specified in the policy.
|
page granularity, across the nodes specified in the policy.
|
||||||
This mode also behaves slightly differently, based on the
|
This mode also behaves slightly differently, based on the
|
||||||
|
|||||||
@@ -22,6 +22,12 @@ memblock preservation ABI
|
|||||||
.. kernel-doc:: include/linux/kho/abi/memblock.h
|
.. kernel-doc:: include/linux/kho/abi/memblock.h
|
||||||
:doc: memblock kexec handover ABI
|
:doc: memblock kexec handover ABI
|
||||||
|
|
||||||
|
KHO persistent memory tracker ABI
|
||||||
|
=================================
|
||||||
|
|
||||||
|
.. kernel-doc:: include/linux/kho/abi/kexec_handover.h
|
||||||
|
:doc: KHO persistent memory tracker
|
||||||
|
|
||||||
See Also
|
See Also
|
||||||
========
|
========
|
||||||
|
|
||||||
|
|||||||
@@ -71,17 +71,17 @@ for boot memory allocations and as target memory for kexec blobs, some parts
|
|||||||
of that memory region may be reserved. These reservations are irrelevant for
|
of that memory region may be reserved. These reservations are irrelevant for
|
||||||
the next KHO, because kexec can overwrite even the original kernel.
|
the next KHO, because kexec can overwrite even the original kernel.
|
||||||
|
|
||||||
.. _kho-finalization-phase:
|
Kexec Handover Radix Tree
|
||||||
|
=========================
|
||||||
|
|
||||||
KHO finalization phase
|
.. kernel-doc:: include/linux/kho_radix_tree.h
|
||||||
======================
|
:doc: Kexec Handover Radix Tree
|
||||||
|
|
||||||
To enable user space based kexec file loader, the kernel needs to be able to
|
Public API
|
||||||
provide the FDT that describes the current kernel's state before
|
==========
|
||||||
performing the actual kexec. The process of generating that FDT is
|
|
||||||
called serialization. When the FDT is generated, some properties
|
.. kernel-doc:: kernel/liveupdate/kexec_handover.c
|
||||||
of the system may become immutable because they are already written down
|
:export:
|
||||||
in the FDT. That state is called the KHO finalization phase.
|
|
||||||
|
|
||||||
See Also
|
See Also
|
||||||
========
|
========
|
||||||
|
|||||||
@@ -75,9 +75,6 @@ Software Tag-Based KASAN supports slab, page_alloc, vmalloc, and stack memory.
|
|||||||
Hardware Tag-Based KASAN supports slab, page_alloc, and non-executable vmalloc
|
Hardware Tag-Based KASAN supports slab, page_alloc, and non-executable vmalloc
|
||||||
memory.
|
memory.
|
||||||
|
|
||||||
For slab, both software KASAN modes support SLUB and SLAB allocators, while
|
|
||||||
Hardware Tag-Based KASAN only supports SLUB.
|
|
||||||
|
|
||||||
Usage
|
Usage
|
||||||
-----
|
-----
|
||||||
|
|
||||||
|
|||||||
@@ -81,6 +81,13 @@ tables being allocated.
|
|||||||
Error reports
|
Error reports
|
||||||
~~~~~~~~~~~~~
|
~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
The boot parameter ``kfence.fault`` can be used to control the behavior when a
|
||||||
|
KFENCE error is detected:
|
||||||
|
|
||||||
|
- ``kfence.fault=report``: Print the error report and continue (default).
|
||||||
|
- ``kfence.fault=oops``: Print the error report and oops.
|
||||||
|
- ``kfence.fault=panic``: Print the error report and panic.
|
||||||
|
|
||||||
A typical out-of-bounds access looks like this::
|
A typical out-of-bounds access looks like this::
|
||||||
|
|
||||||
==================================================================
|
==================================================================
|
||||||
|
|||||||
@@ -107,7 +107,7 @@ The function :c:func:`vme_master_read` can be used to read from and
|
|||||||
|
|
||||||
In addition to simple reads and writes, :c:func:`vme_master_rmw` is provided to
|
In addition to simple reads and writes, :c:func:`vme_master_rmw` is provided to
|
||||||
do a read-modify-write transaction. Parts of a VME window can also be mapped
|
do a read-modify-write transaction. Parts of a VME window can also be mapped
|
||||||
into user space memory using :c:func:`vme_master_mmap`.
|
into user space memory using :c:func:`vme_master_mmap_prepare`.
|
||||||
|
|
||||||
|
|
||||||
Slave windows
|
Slave windows
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ algorithms work.
|
|||||||
fiemap
|
fiemap
|
||||||
files
|
files
|
||||||
locks
|
locks
|
||||||
|
mmap_prepare
|
||||||
multigrain-ts
|
multigrain-ts
|
||||||
mount_api
|
mount_api
|
||||||
quota
|
quota
|
||||||
|
|||||||
168
Documentation/filesystems/mmap_prepare.rst
Normal file
168
Documentation/filesystems/mmap_prepare.rst
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
===========================
|
||||||
|
mmap_prepare callback HOWTO
|
||||||
|
===========================
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
============
|
||||||
|
|
||||||
|
The ``struct file->f_op->mmap()`` callback has been deprecated as it is both a
|
||||||
|
stability and security risk, and doesn't always permit the merging of adjacent
|
||||||
|
mappings resulting in unnecessary memory fragmentation.
|
||||||
|
|
||||||
|
It has been replaced with the ``file->f_op->mmap_prepare()`` callback which
|
||||||
|
solves these problems.
|
||||||
|
|
||||||
|
This hook is called right at the beginning of setting up the mapping, and
|
||||||
|
importantly it is invoked *before* any merging of adjacent mappings has taken
|
||||||
|
place.
|
||||||
|
|
||||||
|
If an error arises upon mapping, it might arise after this callback has been
|
||||||
|
invoked, therefore it should be treated as effectively stateless.
|
||||||
|
|
||||||
|
That is - no resources should be allocated nor state updated to reflect that a
|
||||||
|
mapping has been established, as the mapping may either be merged, or fail to be
|
||||||
|
mapped after the callback is complete.
|
||||||
|
|
||||||
|
Mapped callback
|
||||||
|
---------------
|
||||||
|
|
||||||
|
If resources need to be allocated per-mapping, or state such as a reference
|
||||||
|
count needs to be manipulated, this should be done using the ``vm_ops->mapped``
|
||||||
|
hook, which itself should be set by the >mmap_prepare hook.
|
||||||
|
|
||||||
|
This callback is only invoked if a new mapping has been established and was not
|
||||||
|
merged with any other, and is invoked at a point where no error may occur before
|
||||||
|
the mapping is established.
|
||||||
|
|
||||||
|
You may return an error to the callback itself, which will cause the mapping to
|
||||||
|
become unmapped and an error returned to the mmap() caller. This is useful if
|
||||||
|
resources need to be allocated, and that allocation might fail.
|
||||||
|
|
||||||
|
How To Use
|
||||||
|
==========
|
||||||
|
|
||||||
|
In your driver's struct file_operations struct, specify an ``mmap_prepare``
|
||||||
|
callback rather than an ``mmap`` one, e.g. for ext4:
|
||||||
|
|
||||||
|
.. code-block:: C
|
||||||
|
|
||||||
|
const struct file_operations ext4_file_operations = {
|
||||||
|
...
|
||||||
|
.mmap_prepare = ext4_file_mmap_prepare,
|
||||||
|
};
|
||||||
|
|
||||||
|
This has a signature of ``int (*mmap_prepare)(struct vm_area_desc *)``.
|
||||||
|
|
||||||
|
Examining the struct vm_area_desc type:
|
||||||
|
|
||||||
|
.. code-block:: C
|
||||||
|
|
||||||
|
struct vm_area_desc {
|
||||||
|
/* Immutable state. */
|
||||||
|
const struct mm_struct *const mm;
|
||||||
|
struct file *const file; /* May vary from vm_file in stacked callers. */
|
||||||
|
unsigned long start;
|
||||||
|
unsigned long end;
|
||||||
|
|
||||||
|
/* Mutable fields. Populated with initial state. */
|
||||||
|
pgoff_t pgoff;
|
||||||
|
struct file *vm_file;
|
||||||
|
vma_flags_t vma_flags;
|
||||||
|
pgprot_t page_prot;
|
||||||
|
|
||||||
|
/* Write-only fields. */
|
||||||
|
const struct vm_operations_struct *vm_ops;
|
||||||
|
void *private_data;
|
||||||
|
|
||||||
|
/* Take further action? */
|
||||||
|
struct mmap_action action;
|
||||||
|
};
|
||||||
|
|
||||||
|
This is straightforward - you have all the fields you need to set up the
|
||||||
|
mapping, and you can update the mutable and writable fields, for instance:
|
||||||
|
|
||||||
|
.. code-block:: C
|
||||||
|
|
||||||
|
static int ext4_file_mmap_prepare(struct vm_area_desc *desc)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct file *file = desc->file;
|
||||||
|
struct inode *inode = file->f_mapping->host;
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
file_accessed(file);
|
||||||
|
if (IS_DAX(file_inode(file))) {
|
||||||
|
desc->vm_ops = &ext4_dax_vm_ops;
|
||||||
|
vma_desc_set_flags(desc, VMA_HUGEPAGE_BIT);
|
||||||
|
} else {
|
||||||
|
desc->vm_ops = &ext4_file_vm_ops;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Importantly, you no longer have to dance around with reference counts or locks
|
||||||
|
when updating these fields - **you can simply go ahead and change them**.
|
||||||
|
|
||||||
|
Everything is taken care of by the mapping code.
|
||||||
|
|
||||||
|
VMA Flags
|
||||||
|
---------
|
||||||
|
|
||||||
|
Along with ``mmap_prepare``, VMA flags have undergone an overhaul. Where before
|
||||||
|
you would invoke one of vm_flags_init(), vm_flags_reset(), vm_flags_set(),
|
||||||
|
vm_flags_clear(), and vm_flags_mod() to modify flags (and to have the
|
||||||
|
locking done correctly for you, this is no longer necessary.
|
||||||
|
|
||||||
|
Also, the legacy approach of specifying VMA flags via ``VM_READ``, ``VM_WRITE``,
|
||||||
|
etc. - i.e. using a ``-VM_xxx``- macro has changed too.
|
||||||
|
|
||||||
|
When implementing mmap_prepare(), reference flags by their bit number, defined
|
||||||
|
as a ``VMA_xxx_BIT`` macro, e.g. ``VMA_READ_BIT``, ``VMA_WRITE_BIT`` etc.,
|
||||||
|
and use one of (where ``desc`` is a pointer to struct vm_area_desc):
|
||||||
|
|
||||||
|
* ``vma_desc_test_any(desc, ...)`` - Specify a comma-separated list of flags
|
||||||
|
you wish to test for (whether _any_ are set), e.g. - ``vma_desc_test_any(
|
||||||
|
desc, VMA_WRITE_BIT, VMA_MAYWRITE_BIT)`` - returns ``true`` if either are set,
|
||||||
|
otherwise ``false``.
|
||||||
|
* ``vma_desc_set_flags(desc, ...)`` - Update the VMA descriptor flags to set
|
||||||
|
additional flags specified by a comma-separated list,
|
||||||
|
e.g. - ``vma_desc_set_flags(desc, VMA_PFNMAP_BIT, VMA_IO_BIT)``.
|
||||||
|
* ``vma_desc_clear_flags(desc, ...)`` - Update the VMA descriptor flags to clear
|
||||||
|
flags specified by a comma-separated list, e.g. - ``vma_desc_clear_flags(
|
||||||
|
desc, VMA_WRITE_BIT, VMA_MAYWRITE_BIT)``.
|
||||||
|
|
||||||
|
Actions
|
||||||
|
=======
|
||||||
|
|
||||||
|
You can now very easily have actions be performed upon a mapping once set up by
|
||||||
|
utilising simple helper functions invoked upon the struct vm_area_desc
|
||||||
|
pointer. These are:
|
||||||
|
|
||||||
|
* mmap_action_remap() - Remaps a range consisting only of PFNs for a specific
|
||||||
|
range starting a virtual address and PFN number of a set size.
|
||||||
|
|
||||||
|
* mmap_action_remap_full() - Same as mmap_action_remap(), only remaps the
|
||||||
|
entire mapping from ``start_pfn`` onward.
|
||||||
|
|
||||||
|
* mmap_action_ioremap() - Same as mmap_action_remap(), only performs an I/O
|
||||||
|
remap.
|
||||||
|
|
||||||
|
* mmap_action_ioremap_full() - Same as mmap_action_ioremap(), only remaps
|
||||||
|
the entire mapping from ``start_pfn`` onward.
|
||||||
|
|
||||||
|
* mmap_action_simple_ioremap() - Sets up an I/O remap from a specified
|
||||||
|
physical address and over a specified length.
|
||||||
|
|
||||||
|
* mmap_action_map_kernel_pages() - Maps a specified array of `struct page`
|
||||||
|
pointers in the VMA from a specific offset.
|
||||||
|
|
||||||
|
* mmap_action_map_kernel_pages_full() - Maps a specified array of `struct
|
||||||
|
page` pointers over the entire VMA. The caller must ensure there are
|
||||||
|
sufficient entries in the page array to cover the entire range of the
|
||||||
|
described VMA.
|
||||||
|
|
||||||
|
**NOTE:** The ``action`` field should never normally be manipulated directly,
|
||||||
|
rather you ought to use one of these helpers.
|
||||||
@@ -150,6 +150,8 @@ address on the given address space. Support of ``address unit`` parameter is
|
|||||||
up to each operations set implementation. ``paddr`` is the only operations set
|
up to each operations set implementation. ``paddr`` is the only operations set
|
||||||
implementation that supports the parameter.
|
implementation that supports the parameter.
|
||||||
|
|
||||||
|
If the value is smaller than ``PAGE_SIZE``, only a power of two should be used.
|
||||||
|
|
||||||
.. _damon_core_logic:
|
.. _damon_core_logic:
|
||||||
|
|
||||||
Core Logics
|
Core Logics
|
||||||
@@ -165,6 +167,13 @@ monitoring attributes, ``sampling interval``, ``aggregation interval``,
|
|||||||
``update interval``, ``minimum number of regions``, and ``maximum number of
|
``update interval``, ``minimum number of regions``, and ``maximum number of
|
||||||
regions``.
|
regions``.
|
||||||
|
|
||||||
|
Note that ``minimum number of regions`` must be 3 or higher. This is because the
|
||||||
|
virtual address space monitoring is designed to handle at least three regions to
|
||||||
|
accommodate two large unmapped areas commonly found in normal virtual address
|
||||||
|
spaces. While this restriction might not be strictly necessary for other
|
||||||
|
operation sets like ``paddr``, it is currently enforced across all DAMON
|
||||||
|
operations for consistency.
|
||||||
|
|
||||||
To know how user-space can set the attributes via :ref:`DAMON sysfs interface
|
To know how user-space can set the attributes via :ref:`DAMON sysfs interface
|
||||||
<sysfs_interface>`, refer to :ref:`monitoring_attrs <sysfs_monitoring_attrs>`
|
<sysfs_interface>`, refer to :ref:`monitoring_attrs <sysfs_monitoring_attrs>`
|
||||||
part of the documentation.
|
part of the documentation.
|
||||||
@@ -458,9 +467,13 @@ that supports each action are as below.
|
|||||||
- ``pageout``: Reclaim the region.
|
- ``pageout``: Reclaim the region.
|
||||||
Supported by ``vaddr``, ``fvaddr`` and ``paddr`` operations set.
|
Supported by ``vaddr``, ``fvaddr`` and ``paddr`` operations set.
|
||||||
- ``hugepage``: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``.
|
- ``hugepage``: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``.
|
||||||
Supported by ``vaddr`` and ``fvaddr`` operations set.
|
Supported by ``vaddr`` and ``fvaddr`` operations set. When
|
||||||
|
TRANSPARENT_HUGEPAGE is disabled, the application of the action will just
|
||||||
|
fail.
|
||||||
- ``nohugepage``: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``.
|
- ``nohugepage``: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``.
|
||||||
Supported by ``vaddr`` and ``fvaddr`` operations set.
|
Supported by ``vaddr`` and ``fvaddr`` operations set. When
|
||||||
|
TRANSPARENT_HUGEPAGE is disabled, the application of the action will just
|
||||||
|
fail.
|
||||||
- ``lru_prio``: Prioritize the region on its LRU lists.
|
- ``lru_prio``: Prioritize the region on its LRU lists.
|
||||||
Supported by ``paddr`` operations set.
|
Supported by ``paddr`` operations set.
|
||||||
- ``lru_deprio``: Deprioritize the region on its LRU lists.
|
- ``lru_deprio``: Deprioritize the region on its LRU lists.
|
||||||
@@ -564,6 +577,18 @@ aggressiveness (the quota) of the corresponding scheme. For example, if DAMOS
|
|||||||
is under achieving the goal, DAMOS automatically increases the quota. If DAMOS
|
is under achieving the goal, DAMOS automatically increases the quota. If DAMOS
|
||||||
is over achieving the goal, it decreases the quota.
|
is over achieving the goal, it decreases the quota.
|
||||||
|
|
||||||
|
There are two such tuning algorithms that users can select as they need.
|
||||||
|
|
||||||
|
- ``consist``: A proportional feedback loop based algorithm. Tries to find an
|
||||||
|
optimum quota that should be consistently kept, to keep achieving the goal.
|
||||||
|
Useful for kernel-only operation on dynamic and long-running environments.
|
||||||
|
This is the default selection. If unsure, use this.
|
||||||
|
- ``temporal``: More straightforward algorithm. Tries to achieve the goal as
|
||||||
|
fast as possible, using maximum allowed quota, but only for a temporal short
|
||||||
|
time. When the quota is under-achieved, this algorithm keeps tuning quota to
|
||||||
|
a maximum allowed one. Once the quota is [over]-achieved, this sets the
|
||||||
|
quota zero. Useful for deterministic control required environments.
|
||||||
|
|
||||||
The goal can be specified with five parameters, namely ``target_metric``,
|
The goal can be specified with five parameters, namely ``target_metric``,
|
||||||
``target_value``, ``current_value``, ``nid`` and ``path``. The auto-tuning
|
``target_value``, ``current_value``, ``nid`` and ``path``. The auto-tuning
|
||||||
mechanism tries to make ``current_value`` of ``target_metric`` be same to
|
mechanism tries to make ``current_value`` of ``target_metric`` be same to
|
||||||
@@ -839,6 +864,10 @@ more detail, please read the usage documents for those
|
|||||||
(:doc:`/admin-guide/mm/damon/stat`, :doc:`/admin-guide/mm/damon/reclaim` and
|
(:doc:`/admin-guide/mm/damon/stat`, :doc:`/admin-guide/mm/damon/reclaim` and
|
||||||
:doc:`/admin-guide/mm/damon/lru_sort`).
|
:doc:`/admin-guide/mm/damon/lru_sort`).
|
||||||
|
|
||||||
|
.. _damon_design_special_purpose_modules_exclusivity:
|
||||||
|
|
||||||
|
Note that these modules currently run in an exclusive manner. If one of those
|
||||||
|
is already running, others will return ``-EBUSY`` upon start requests.
|
||||||
|
|
||||||
Sample DAMON Modules
|
Sample DAMON Modules
|
||||||
--------------------
|
--------------------
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ DAMON is a Linux kernel subsystem for efficient :ref:`data access monitoring
|
|||||||
- *light-weight* (for production online usages),
|
- *light-weight* (for production online usages),
|
||||||
- *scalable* (in terms of memory size),
|
- *scalable* (in terms of memory size),
|
||||||
- *tunable* (for flexible usages), and
|
- *tunable* (for flexible usages), and
|
||||||
- *autoamted* (for production operation without manual tunings).
|
- *automated* (for production operation without manual tunings).
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|||||||
@@ -63,10 +63,10 @@ management subsystem maintainer.
|
|||||||
Review cadence
|
Review cadence
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
The DAMON maintainer does the work on the usual work hour (09:00 to 17:00,
|
The DAMON maintainer usually work in a flexible way, except early morning in PT
|
||||||
Mon-Fri) in PT (Pacific Time). The response to patches will occasionally be
|
(Pacific Time). The response to patches will occasionally be slow. Do not
|
||||||
slow. Do not hesitate to send a ping if you have not heard back within a week
|
hesitate to send a ping if you have not heard back within a week of sending a
|
||||||
of sending a patch.
|
patch.
|
||||||
|
|
||||||
Mailing tool
|
Mailing tool
|
||||||
------------
|
------------
|
||||||
|
|||||||
@@ -155,7 +155,7 @@ are enough free huge pages to accommodate the reservation. If there are,
|
|||||||
the global reservation count resv_huge_pages is adjusted something like the
|
the global reservation count resv_huge_pages is adjusted something like the
|
||||||
following::
|
following::
|
||||||
|
|
||||||
if (resv_needed <= (resv_huge_pages - free_huge_pages))
|
if (resv_needed <= (free_huge_pages - resv_huge_pages)
|
||||||
resv_huge_pages += resv_needed;
|
resv_huge_pages += resv_needed;
|
||||||
|
|
||||||
Note that the global lock hugetlb_lock is held when checking and adjusting
|
Note that the global lock hugetlb_lock is held when checking and adjusting
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ For each base page, there is a corresponding ``struct page``.
|
|||||||
Within the HugeTLB subsystem, only the first 4 ``struct page`` are used to
|
Within the HugeTLB subsystem, only the first 4 ``struct page`` are used to
|
||||||
contain unique information about a HugeTLB page. ``__NR_USED_SUBPAGE`` provides
|
contain unique information about a HugeTLB page. ``__NR_USED_SUBPAGE`` provides
|
||||||
this upper limit. The only 'useful' information in the remaining ``struct page``
|
this upper limit. The only 'useful' information in the remaining ``struct page``
|
||||||
is the compound_head field, and this field is the same for all tail pages.
|
is the compound_info field, and this field is the same for all tail pages.
|
||||||
|
|
||||||
By removing redundant ``struct page`` for HugeTLB pages, memory can be returned
|
By removing redundant ``struct page`` for HugeTLB pages, memory can be returned
|
||||||
to the buddy allocator for other uses.
|
to the buddy allocator for other uses.
|
||||||
@@ -124,33 +124,35 @@ Here is how things look before optimization::
|
|||||||
| |
|
| |
|
||||||
+-----------+
|
+-----------+
|
||||||
|
|
||||||
The value of page->compound_head is the same for all tail pages. The first
|
The first page of ``struct page`` (page 0) associated with the HugeTLB page
|
||||||
page of ``struct page`` (page 0) associated with the HugeTLB page contains the 4
|
contains the 4 ``struct page`` necessary to describe the HugeTLB. The remaining
|
||||||
``struct page`` necessary to describe the HugeTLB. The only use of the remaining
|
pages of ``struct page`` (page 1 to page 7) are tail pages.
|
||||||
pages of ``struct page`` (page 1 to page 7) is to point to page->compound_head.
|
|
||||||
Therefore, we can remap pages 1 to 7 to page 0. Only 1 page of ``struct page``
|
The optimization is only applied when the size of the struct page is a power
|
||||||
will be used for each HugeTLB page. This will allow us to free the remaining
|
of 2. In this case, all tail pages of the same order are identical. See
|
||||||
7 pages to the buddy allocator.
|
compound_head(). This allows us to remap the tail pages of the vmemmap to a
|
||||||
|
shared, read-only page. The head page is also remapped to a new page. This
|
||||||
|
allows the original vmemmap pages to be freed.
|
||||||
|
|
||||||
Here is how things look after remapping::
|
Here is how things look after remapping::
|
||||||
|
|
||||||
HugeTLB struct pages(8 pages) page frame(8 pages)
|
HugeTLB struct pages(8 pages) page frame (new)
|
||||||
+-----------+ ---virt_to_page---> +-----------+ mapping to +-----------+
|
+-----------+ ---virt_to_page---> +-----------+ mapping to +----------------+
|
||||||
| | | 0 | -------------> | 0 |
|
| | | 0 | -------------> | 0 |
|
||||||
| | +-----------+ +-----------+
|
| | +-----------+ +----------------+
|
||||||
| | | 1 | ---------------^ ^ ^ ^ ^ ^ ^
|
| | | 1 | ------┐
|
||||||
| | +-----------+ | | | | | |
|
| | +-----------+ |
|
||||||
| | | 2 | -----------------+ | | | | |
|
| | | 2 | ------┼ +----------------------------+
|
||||||
| | +-----------+ | | | | |
|
| | +-----------+ | | A single, per-zone page |
|
||||||
| | | 3 | -------------------+ | | | |
|
| | | 3 | ------┼------> | frame shared among all |
|
||||||
| | +-----------+ | | | |
|
| | +-----------+ | | hugepages of the same size |
|
||||||
| | | 4 | ---------------------+ | | |
|
| | | 4 | ------┼ +----------------------------+
|
||||||
| PMD | +-----------+ | | |
|
| | +-----------+ |
|
||||||
| level | | 5 | -----------------------+ | |
|
| | | 5 | ------┼
|
||||||
| mapping | +-----------+ | |
|
| PMD | +-----------+ |
|
||||||
| | | 6 | -------------------------+ |
|
| level | | 6 | ------┼
|
||||||
| | +-----------+ |
|
| mapping | +-----------+ |
|
||||||
| | | 7 | ---------------------------+
|
| | | 7 | ------┘
|
||||||
| | +-----------+
|
| | +-----------+
|
||||||
| |
|
| |
|
||||||
| |
|
| |
|
||||||
@@ -172,16 +174,6 @@ The contiguous bit is used to increase the mapping size at the pmd and pte
|
|||||||
(last) level. So this type of HugeTLB page can be optimized only when its
|
(last) level. So this type of HugeTLB page can be optimized only when its
|
||||||
size of the ``struct page`` structs is greater than **1** page.
|
size of the ``struct page`` structs is greater than **1** page.
|
||||||
|
|
||||||
Notice: The head vmemmap page is not freed to the buddy allocator and all
|
|
||||||
tail vmemmap pages are mapped to the head vmemmap page frame. So we can see
|
|
||||||
more than one ``struct page`` struct with ``PG_head`` (e.g. 8 per 2 MB HugeTLB
|
|
||||||
page) associated with each HugeTLB page. The ``compound_head()`` can handle
|
|
||||||
this correctly. There is only **one** head ``struct page``, the tail
|
|
||||||
``struct page`` with ``PG_head`` are fake head ``struct page``. We need an
|
|
||||||
approach to distinguish between those two different types of ``struct page`` so
|
|
||||||
that ``compound_head()`` can return the real head ``struct page`` when the
|
|
||||||
parameter is the tail ``struct page`` but with ``PG_head``.
|
|
||||||
|
|
||||||
Device DAX
|
Device DAX
|
||||||
==========
|
==========
|
||||||
|
|
||||||
|
|||||||
@@ -16738,6 +16738,7 @@ L: linux-mm@kvack.org
|
|||||||
S: Maintained
|
S: Maintained
|
||||||
W: http://www.linux-mm.org
|
W: http://www.linux-mm.org
|
||||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
|
T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
|
||||||
|
F: include/linux/folio_batch.h
|
||||||
F: include/linux/gfp.h
|
F: include/linux/gfp.h
|
||||||
F: include/linux/gfp_types.h
|
F: include/linux/gfp_types.h
|
||||||
F: include/linux/highmem.h
|
F: include/linux/highmem.h
|
||||||
@@ -16754,6 +16755,7 @@ F: include/linux/pgtable.h
|
|||||||
F: include/linux/ptdump.h
|
F: include/linux/ptdump.h
|
||||||
F: include/linux/vmpressure.h
|
F: include/linux/vmpressure.h
|
||||||
F: include/linux/vmstat.h
|
F: include/linux/vmstat.h
|
||||||
|
F: fs/proc/meminfo.c
|
||||||
F: kernel/fork.c
|
F: kernel/fork.c
|
||||||
F: mm/Kconfig
|
F: mm/Kconfig
|
||||||
F: mm/debug.c
|
F: mm/debug.c
|
||||||
@@ -16987,6 +16989,7 @@ R: Kemeng Shi <shikemeng@huaweicloud.com>
|
|||||||
R: Nhat Pham <nphamcs@gmail.com>
|
R: Nhat Pham <nphamcs@gmail.com>
|
||||||
R: Baoquan He <bhe@redhat.com>
|
R: Baoquan He <bhe@redhat.com>
|
||||||
R: Barry Song <baohua@kernel.org>
|
R: Barry Song <baohua@kernel.org>
|
||||||
|
R: Youngjun Park <youngjun.park@lge.com>
|
||||||
L: linux-mm@kvack.org
|
L: linux-mm@kvack.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
F: Documentation/mm/swap-table.rst
|
F: Documentation/mm/swap-table.rst
|
||||||
@@ -17068,6 +17071,8 @@ S: Maintained
|
|||||||
W: http://www.linux-mm.org
|
W: http://www.linux-mm.org
|
||||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
|
T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
|
||||||
F: include/trace/events/mmap.h
|
F: include/trace/events/mmap.h
|
||||||
|
F: fs/proc/task_mmu.c
|
||||||
|
F: fs/proc/task_nommu.c
|
||||||
F: mm/interval_tree.c
|
F: mm/interval_tree.c
|
||||||
F: mm/mincore.c
|
F: mm/mincore.c
|
||||||
F: mm/mlock.c
|
F: mm/mlock.c
|
||||||
|
|||||||
@@ -126,12 +126,6 @@ struct vm_area_struct;
|
|||||||
*/
|
*/
|
||||||
#define pgprot_noncached(prot) (prot)
|
#define pgprot_noncached(prot) (prot)
|
||||||
|
|
||||||
/*
|
|
||||||
* ZERO_PAGE is a global shared page that is always zero: used
|
|
||||||
* for zero-mapped memory areas etc..
|
|
||||||
*/
|
|
||||||
#define ZERO_PAGE(vaddr) (virt_to_page(ZERO_PGE))
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* On certain platforms whose physical address space can overlap KSEG,
|
* On certain platforms whose physical address space can overlap KSEG,
|
||||||
* namely EV6 and above, we must re-twiddle the physaddr to restore the
|
* namely EV6 and above, we must re-twiddle the physaddr to restore the
|
||||||
|
|||||||
@@ -131,7 +131,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
|
|||||||
#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
|
#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
|
||||||
|
|
||||||
/* Default Permissions for stack/heaps pages (Non Executable) */
|
/* Default Permissions for stack/heaps pages (Non Executable) */
|
||||||
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC
|
#define VMA_DATA_DEFAULT_FLAGS VMA_DATA_FLAGS_NON_EXEC
|
||||||
|
|
||||||
#define WANT_PAGE_VIRTUAL 1
|
#define WANT_PAGE_VIRTUAL 1
|
||||||
|
|
||||||
|
|||||||
@@ -21,9 +21,6 @@
|
|||||||
|
|
||||||
#ifndef __ASSEMBLER__
|
#ifndef __ASSEMBLER__
|
||||||
|
|
||||||
extern char empty_zero_page[PAGE_SIZE];
|
|
||||||
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
||||||
|
|
||||||
extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
|
extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
|
||||||
|
|
||||||
/* to cope with aliasing VIPT cache */
|
/* to cope with aliasing VIPT cache */
|
||||||
|
|||||||
@@ -19,8 +19,6 @@
|
|||||||
#include <asm/arcregs.h>
|
#include <asm/arcregs.h>
|
||||||
|
|
||||||
pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE);
|
pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE);
|
||||||
char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE);
|
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
|
||||||
|
|
||||||
static const unsigned long low_mem_start = CONFIG_LINUX_RAM_BASE;
|
static const unsigned long low_mem_start = CONFIG_LINUX_RAM_BASE;
|
||||||
static unsigned long low_mem_sz;
|
static unsigned long low_mem_sz;
|
||||||
|
|||||||
@@ -184,7 +184,7 @@ extern int pfn_valid(unsigned long);
|
|||||||
|
|
||||||
#include <asm/memory.h>
|
#include <asm/memory.h>
|
||||||
|
|
||||||
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC
|
#define VMA_DATA_DEFAULT_FLAGS VMA_DATA_FLAGS_TSK_EXEC
|
||||||
|
|
||||||
#include <asm-generic/getorder.h>
|
#include <asm-generic/getorder.h>
|
||||||
#include <asm-generic/memory_model.h>
|
#include <asm-generic/memory_model.h>
|
||||||
|
|||||||
@@ -10,15 +10,6 @@
|
|||||||
#include <linux/const.h>
|
#include <linux/const.h>
|
||||||
#include <asm/proc-fns.h>
|
#include <asm/proc-fns.h>
|
||||||
|
|
||||||
#ifndef __ASSEMBLY__
|
|
||||||
/*
|
|
||||||
* ZERO_PAGE is a global shared page that is always zero: used
|
|
||||||
* for zero-mapped memory areas etc..
|
|
||||||
*/
|
|
||||||
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
|
|
||||||
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <asm-generic/pgtable-nopud.h>
|
#include <asm-generic/pgtable-nopud.h>
|
||||||
|
|
||||||
#ifndef CONFIG_MMU
|
#ifndef CONFIG_MMU
|
||||||
|
|||||||
@@ -41,13 +41,6 @@
|
|||||||
|
|
||||||
extern unsigned long __atags_pointer;
|
extern unsigned long __atags_pointer;
|
||||||
|
|
||||||
/*
|
|
||||||
* empty_zero_page is a special page that is used for
|
|
||||||
* zero-initialized data and COW.
|
|
||||||
*/
|
|
||||||
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
|
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The pmd table for the upper-most set of pages.
|
* The pmd table for the upper-most set of pages.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -27,13 +27,6 @@
|
|||||||
|
|
||||||
unsigned long vectors_base;
|
unsigned long vectors_base;
|
||||||
|
|
||||||
/*
|
|
||||||
* empty_zero_page is a special page that is used for
|
|
||||||
* zero-initialized data and COW.
|
|
||||||
*/
|
|
||||||
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
|
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
|
||||||
|
|
||||||
#ifdef CONFIG_ARM_MPU
|
#ifdef CONFIG_ARM_MPU
|
||||||
struct mpu_rgn_info mpu_rgn_info;
|
struct mpu_rgn_info mpu_rgn_info;
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ config ARM64
|
|||||||
select ARCH_BINFMT_ELF_STATE
|
select ARCH_BINFMT_ELF_STATE
|
||||||
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
|
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
|
||||||
select ARCH_ENABLE_MEMORY_HOTPLUG
|
select ARCH_ENABLE_MEMORY_HOTPLUG
|
||||||
select ARCH_ENABLE_MEMORY_HOTREMOVE
|
|
||||||
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
|
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
|
||||||
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
|
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
|
||||||
select ARCH_HAS_CACHE_LINE_SIZE
|
select ARCH_HAS_CACHE_LINE_SIZE
|
||||||
|
|||||||
@@ -46,7 +46,12 @@ int pfn_is_map_memory(unsigned long pfn);
|
|||||||
|
|
||||||
#endif /* !__ASSEMBLER__ */
|
#endif /* !__ASSEMBLER__ */
|
||||||
|
|
||||||
#define VM_DATA_DEFAULT_FLAGS (VM_DATA_FLAGS_TSK_EXEC | VM_MTE_ALLOWED)
|
#ifdef CONFIG_ARM64_MTE
|
||||||
|
#define VMA_DATA_DEFAULT_FLAGS append_vma_flags(VMA_DATA_FLAGS_TSK_EXEC, \
|
||||||
|
VMA_MTE_ALLOWED_BIT)
|
||||||
|
#else
|
||||||
|
#define VMA_DATA_DEFAULT_FLAGS VMA_DATA_FLAGS_TSK_EXEC
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <asm-generic/getorder.h>
|
#include <asm-generic/getorder.h>
|
||||||
|
|
||||||
|
|||||||
@@ -107,13 +107,6 @@ static inline void arch_leave_lazy_mmu_mode(void)
|
|||||||
__flush_tlb_range(vma, address, address + PMD_SIZE, PMD_SIZE, 2, \
|
__flush_tlb_range(vma, address, address + PMD_SIZE, PMD_SIZE, 2, \
|
||||||
TLBF_NOBROADCAST | TLBF_NONOTIFY | TLBF_NOWALKCACHE)
|
TLBF_NOBROADCAST | TLBF_NONOTIFY | TLBF_NOWALKCACHE)
|
||||||
|
|
||||||
/*
|
|
||||||
* ZERO_PAGE is a global shared page that is always zero: used
|
|
||||||
* for zero-mapped memory areas etc..
|
|
||||||
*/
|
|
||||||
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
|
|
||||||
#define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page))
|
|
||||||
|
|
||||||
#define pte_ERROR(e) \
|
#define pte_ERROR(e) \
|
||||||
pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e))
|
pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e))
|
||||||
|
|
||||||
@@ -1309,9 +1302,8 @@ static inline void __pte_clear(struct mm_struct *mm,
|
|||||||
__set_pte(ptep, __pte(0));
|
__set_pte(ptep, __pte(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma,
|
static inline bool __ptep_test_and_clear_young(struct vm_area_struct *vma,
|
||||||
unsigned long address,
|
unsigned long address, pte_t *ptep)
|
||||||
pte_t *ptep)
|
|
||||||
{
|
{
|
||||||
pte_t old_pte, pte;
|
pte_t old_pte, pte;
|
||||||
|
|
||||||
@@ -1326,10 +1318,10 @@ static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma,
|
|||||||
return pte_young(pte);
|
return pte_young(pte);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int __ptep_clear_flush_young(struct vm_area_struct *vma,
|
static inline bool __ptep_clear_flush_young(struct vm_area_struct *vma,
|
||||||
unsigned long address, pte_t *ptep)
|
unsigned long address, pte_t *ptep)
|
||||||
{
|
{
|
||||||
int young = __ptep_test_and_clear_young(vma, address, ptep);
|
bool young = __ptep_test_and_clear_young(vma, address, ptep);
|
||||||
|
|
||||||
if (young) {
|
if (young) {
|
||||||
/*
|
/*
|
||||||
@@ -1348,9 +1340,8 @@ static inline int __ptep_clear_flush_young(struct vm_area_struct *vma,
|
|||||||
|
|
||||||
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
|
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
|
||||||
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
|
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
|
||||||
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
static inline bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
||||||
unsigned long address,
|
unsigned long address, pmd_t *pmdp)
|
||||||
pmd_t *pmdp)
|
|
||||||
{
|
{
|
||||||
/* Operation applies to PMD table entry only if FEAT_HAFT is enabled */
|
/* Operation applies to PMD table entry only if FEAT_HAFT is enabled */
|
||||||
VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_supports_haft());
|
VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_supports_haft());
|
||||||
@@ -1673,9 +1664,9 @@ extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
|
|||||||
extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
|
extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
|
||||||
unsigned long addr, pte_t *ptep,
|
unsigned long addr, pte_t *ptep,
|
||||||
unsigned int nr, int full);
|
unsigned int nr, int full);
|
||||||
int contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
|
bool contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
|
||||||
unsigned long addr, pte_t *ptep, unsigned int nr);
|
unsigned long addr, pte_t *ptep, unsigned int nr);
|
||||||
int contpte_clear_flush_young_ptes(struct vm_area_struct *vma,
|
bool contpte_clear_flush_young_ptes(struct vm_area_struct *vma,
|
||||||
unsigned long addr, pte_t *ptep, unsigned int nr);
|
unsigned long addr, pte_t *ptep, unsigned int nr);
|
||||||
extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
|
extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
|
||||||
pte_t *ptep, unsigned int nr);
|
pte_t *ptep, unsigned int nr);
|
||||||
@@ -1839,21 +1830,26 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
|
|||||||
return __ptep_get_and_clear(mm, addr, ptep);
|
return __ptep_get_and_clear(mm, addr, ptep);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
|
#define test_and_clear_young_ptes test_and_clear_young_ptes
|
||||||
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
|
static inline bool test_and_clear_young_ptes(struct vm_area_struct *vma,
|
||||||
unsigned long addr, pte_t *ptep)
|
unsigned long addr, pte_t *ptep, unsigned int nr)
|
||||||
{
|
{
|
||||||
pte_t orig_pte = __ptep_get(ptep);
|
if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
|
||||||
|
|
||||||
if (likely(!pte_valid_cont(orig_pte)))
|
|
||||||
return __ptep_test_and_clear_young(vma, addr, ptep);
|
return __ptep_test_and_clear_young(vma, addr, ptep);
|
||||||
|
|
||||||
return contpte_test_and_clear_young_ptes(vma, addr, ptep, 1);
|
return contpte_test_and_clear_young_ptes(vma, addr, ptep, nr);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
|
||||||
|
static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
|
||||||
|
unsigned long addr, pte_t *ptep)
|
||||||
|
{
|
||||||
|
return test_and_clear_young_ptes(vma, addr, ptep, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
|
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
|
||||||
static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
|
static inline bool ptep_clear_flush_young(struct vm_area_struct *vma,
|
||||||
unsigned long addr, pte_t *ptep)
|
unsigned long addr, pte_t *ptep)
|
||||||
{
|
{
|
||||||
pte_t orig_pte = __ptep_get(ptep);
|
pte_t orig_pte = __ptep_get(ptep);
|
||||||
|
|
||||||
@@ -1864,9 +1860,8 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define clear_flush_young_ptes clear_flush_young_ptes
|
#define clear_flush_young_ptes clear_flush_young_ptes
|
||||||
static inline int clear_flush_young_ptes(struct vm_area_struct *vma,
|
static inline bool clear_flush_young_ptes(struct vm_area_struct *vma,
|
||||||
unsigned long addr, pte_t *ptep,
|
unsigned long addr, pte_t *ptep, unsigned int nr)
|
||||||
unsigned int nr)
|
|
||||||
{
|
{
|
||||||
if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
|
if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
|
||||||
return __ptep_clear_flush_young(vma, addr, ptep);
|
return __ptep_clear_flush_young(vma, addr, ptep);
|
||||||
|
|||||||
@@ -12,8 +12,7 @@
|
|||||||
|
|
||||||
#define arch_max_pkey() 8
|
#define arch_max_pkey() 8
|
||||||
|
|
||||||
int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
int arch_set_user_pkey_access(int pkey, unsigned long init_val);
|
||||||
unsigned long init_val);
|
|
||||||
|
|
||||||
static inline bool arch_pkeys_enabled(void)
|
static inline bool arch_pkeys_enabled(void)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -509,9 +509,8 @@ pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(contpte_get_and_clear_full_ptes);
|
EXPORT_SYMBOL_GPL(contpte_get_and_clear_full_ptes);
|
||||||
|
|
||||||
int contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
|
bool contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
|
||||||
unsigned long addr, pte_t *ptep,
|
unsigned long addr, pte_t *ptep, unsigned int nr)
|
||||||
unsigned int nr)
|
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* ptep_clear_flush_young() technically requires us to clear the access
|
* ptep_clear_flush_young() technically requires us to clear the access
|
||||||
@@ -526,7 +525,7 @@ int contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
unsigned long end = addr + nr * PAGE_SIZE;
|
unsigned long end = addr + nr * PAGE_SIZE;
|
||||||
int young = 0;
|
bool young = false;
|
||||||
|
|
||||||
ptep = contpte_align_addr_ptep(&addr, &end, ptep, nr);
|
ptep = contpte_align_addr_ptep(&addr, &end, ptep, nr);
|
||||||
for (; addr != end; ptep++, addr += PAGE_SIZE)
|
for (; addr != end; ptep++, addr += PAGE_SIZE)
|
||||||
@@ -536,11 +535,10 @@ int contpte_test_and_clear_young_ptes(struct vm_area_struct *vma,
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(contpte_test_and_clear_young_ptes);
|
EXPORT_SYMBOL_GPL(contpte_test_and_clear_young_ptes);
|
||||||
|
|
||||||
int contpte_clear_flush_young_ptes(struct vm_area_struct *vma,
|
bool contpte_clear_flush_young_ptes(struct vm_area_struct *vma,
|
||||||
unsigned long addr, pte_t *ptep,
|
unsigned long addr, pte_t *ptep, unsigned int nr)
|
||||||
unsigned int nr)
|
|
||||||
{
|
{
|
||||||
int young;
|
bool young;
|
||||||
|
|
||||||
young = contpte_test_and_clear_young_ptes(vma, addr, ptep, nr);
|
young = contpte_test_and_clear_young_ptes(vma, addr, ptep, nr);
|
||||||
|
|
||||||
|
|||||||
@@ -12,19 +12,7 @@
|
|||||||
|
|
||||||
static unsigned long alloc_gcs(unsigned long addr, unsigned long size)
|
static unsigned long alloc_gcs(unsigned long addr, unsigned long size)
|
||||||
{
|
{
|
||||||
int flags = MAP_ANONYMOUS | MAP_PRIVATE;
|
return vm_mmap_shadow_stack(addr, size, 0);
|
||||||
struct mm_struct *mm = current->mm;
|
|
||||||
unsigned long mapped_addr, unused;
|
|
||||||
|
|
||||||
if (addr)
|
|
||||||
flags |= MAP_FIXED_NOREPLACE;
|
|
||||||
|
|
||||||
mmap_write_lock(mm);
|
|
||||||
mapped_addr = do_mmap(NULL, addr, size, PROT_READ, flags,
|
|
||||||
VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL);
|
|
||||||
mmap_write_unlock(mm);
|
|
||||||
|
|
||||||
return mapped_addr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long gcs_size(unsigned long size)
|
static unsigned long gcs_size(unsigned long size)
|
||||||
|
|||||||
@@ -328,6 +328,11 @@ void __init bootmem_init(void)
|
|||||||
memblock_dump_all();
|
memblock_dump_all();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void __init arch_setup_zero_pages(void)
|
||||||
|
{
|
||||||
|
__zero_page = phys_to_page(__pa_symbol(empty_zero_page));
|
||||||
|
}
|
||||||
|
|
||||||
void __init arch_mm_preinit(void)
|
void __init arch_mm_preinit(void)
|
||||||
{
|
{
|
||||||
unsigned int flags = SWIOTLB_VERBOSE;
|
unsigned int flags = SWIOTLB_VERBOSE;
|
||||||
|
|||||||
@@ -64,13 +64,6 @@ static bool rodata_is_rw __ro_after_init = true;
|
|||||||
*/
|
*/
|
||||||
long __section(".mmuoff.data.write") __early_cpu_boot_status;
|
long __section(".mmuoff.data.write") __early_cpu_boot_status;
|
||||||
|
|
||||||
/*
|
|
||||||
* Empty_zero_page is a special page that is used for zero-initialized data
|
|
||||||
* and COW.
|
|
||||||
*/
|
|
||||||
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
|
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
|
||||||
|
|
||||||
static DEFINE_SPINLOCK(swapper_pgdir_lock);
|
static DEFINE_SPINLOCK(swapper_pgdir_lock);
|
||||||
static DEFINE_MUTEX(fixmap_lock);
|
static DEFINE_MUTEX(fixmap_lock);
|
||||||
|
|
||||||
@@ -2344,7 +2337,7 @@ void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_ARCH_HAS_PKEYS
|
#ifdef CONFIG_ARCH_HAS_PKEYS
|
||||||
int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val)
|
int arch_set_user_pkey_access(int pkey, unsigned long init_val)
|
||||||
{
|
{
|
||||||
u64 new_por;
|
u64 new_por;
|
||||||
u64 old_por;
|
u64 old_por;
|
||||||
|
|||||||
@@ -76,9 +76,6 @@
|
|||||||
#define MAX_SWAPFILES_CHECK() \
|
#define MAX_SWAPFILES_CHECK() \
|
||||||
BUILD_BUG_ON(MAX_SWAPFILES_SHIFT != 5)
|
BUILD_BUG_ON(MAX_SWAPFILES_SHIFT != 5)
|
||||||
|
|
||||||
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
|
|
||||||
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
||||||
|
|
||||||
extern void load_pgd(unsigned long pg_dir);
|
extern void load_pgd(unsigned long pg_dir);
|
||||||
extern pte_t invalid_pte_table[PTRS_PER_PTE];
|
extern pte_t invalid_pte_table[PTRS_PER_PTE];
|
||||||
|
|
||||||
|
|||||||
@@ -38,9 +38,6 @@ pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss;
|
|||||||
pte_t kernel_pte_tables[PTRS_KERN_TABLE] __page_aligned_bss;
|
pte_t kernel_pte_tables[PTRS_KERN_TABLE] __page_aligned_bss;
|
||||||
|
|
||||||
EXPORT_SYMBOL(invalid_pte_table);
|
EXPORT_SYMBOL(invalid_pte_table);
|
||||||
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
|
|
||||||
__page_aligned_bss;
|
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
|
||||||
|
|
||||||
void free_initmem(void)
|
void free_initmem(void)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ struct page;
|
|||||||
#define virt_to_page(kaddr) pfn_to_page(PFN_DOWN(__pa(kaddr)))
|
#define virt_to_page(kaddr) pfn_to_page(PFN_DOWN(__pa(kaddr)))
|
||||||
|
|
||||||
/* Default vm area behavior is non-executable. */
|
/* Default vm area behavior is non-executable. */
|
||||||
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC
|
#define VMA_DATA_DEFAULT_FLAGS VMA_DATA_FLAGS_NON_EXEC
|
||||||
|
|
||||||
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
|
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
|
||||||
|
|
||||||
|
|||||||
@@ -14,9 +14,6 @@
|
|||||||
#include <asm/page.h>
|
#include <asm/page.h>
|
||||||
#include <asm-generic/pgtable-nopmd.h>
|
#include <asm-generic/pgtable-nopmd.h>
|
||||||
|
|
||||||
/* A handy thing to have if one has the RAM. Declared in head.S */
|
|
||||||
extern unsigned long empty_zero_page;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The PTE model described here is that of the Hexagon Virtual Machine,
|
* The PTE model described here is that of the Hexagon Virtual Machine,
|
||||||
* which autonomously walks 2-level page tables. At a lower level, we
|
* which autonomously walks 2-level page tables. At a lower level, we
|
||||||
@@ -348,9 +345,6 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
|
|||||||
return (unsigned long)__va(pmd_val(pmd) & PAGE_MASK);
|
return (unsigned long)__va(pmd_val(pmd) & PAGE_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ZERO_PAGE - returns the globally shared zero page */
|
|
||||||
#define ZERO_PAGE(vaddr) (virt_to_page(&empty_zero_page))
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
|
* Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
|
||||||
* are !pte_none() && !pte_present().
|
* are !pte_none() && !pte_present().
|
||||||
|
|||||||
@@ -216,8 +216,3 @@ __head_s_vaddr_target:
|
|||||||
.p2align PAGE_SHIFT
|
.p2align PAGE_SHIFT
|
||||||
ENTRY(external_cmdline_buffer)
|
ENTRY(external_cmdline_buffer)
|
||||||
.fill _PAGE_SIZE,1,0
|
.fill _PAGE_SIZE,1,0
|
||||||
|
|
||||||
.data
|
|
||||||
.p2align PAGE_SHIFT
|
|
||||||
ENTRY(empty_zero_page)
|
|
||||||
.fill _PAGE_SIZE,1,0
|
|
||||||
|
|||||||
@@ -17,7 +17,6 @@ EXPORT_SYMBOL(raw_copy_to_user);
|
|||||||
EXPORT_SYMBOL(__vmgetie);
|
EXPORT_SYMBOL(__vmgetie);
|
||||||
EXPORT_SYMBOL(__vmsetie);
|
EXPORT_SYMBOL(__vmsetie);
|
||||||
EXPORT_SYMBOL(__vmyield);
|
EXPORT_SYMBOL(__vmyield);
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
|
||||||
EXPORT_SYMBOL(memcpy);
|
EXPORT_SYMBOL(memcpy);
|
||||||
EXPORT_SYMBOL(memset);
|
EXPORT_SYMBOL(memset);
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ config LOONGARCH
|
|||||||
select ARCH_NEEDS_DEFER_KASAN
|
select ARCH_NEEDS_DEFER_KASAN
|
||||||
select ARCH_DISABLE_KASAN_INLINE
|
select ARCH_DISABLE_KASAN_INLINE
|
||||||
select ARCH_ENABLE_MEMORY_HOTPLUG
|
select ARCH_ENABLE_MEMORY_HOTPLUG
|
||||||
select ARCH_ENABLE_MEMORY_HOTREMOVE
|
|
||||||
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
|
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
|
||||||
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
|
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
|
||||||
select ARCH_HAS_CPU_FINALIZE_INIT
|
select ARCH_HAS_CPU_FINALIZE_INIT
|
||||||
|
|||||||
@@ -104,7 +104,7 @@ struct page *tlb_virt_to_page(unsigned long kaddr);
|
|||||||
extern int __virt_addr_valid(volatile void *kaddr);
|
extern int __virt_addr_valid(volatile void *kaddr);
|
||||||
#define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
|
#define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
|
||||||
|
|
||||||
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC
|
#define VMA_DATA_DEFAULT_FLAGS VMA_DATA_FLAGS_TSK_EXEC
|
||||||
|
|
||||||
#include <asm-generic/memory_model.h>
|
#include <asm-generic/memory_model.h>
|
||||||
#include <asm-generic/getorder.h>
|
#include <asm-generic/getorder.h>
|
||||||
|
|||||||
@@ -74,15 +74,6 @@
|
|||||||
struct mm_struct;
|
struct mm_struct;
|
||||||
struct vm_area_struct;
|
struct vm_area_struct;
|
||||||
|
|
||||||
/*
|
|
||||||
* ZERO_PAGE is a global shared page that is always zero; used
|
|
||||||
* for zero-mapped memory areas etc..
|
|
||||||
*/
|
|
||||||
|
|
||||||
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
|
|
||||||
|
|
||||||
#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page)
|
|
||||||
|
|
||||||
#ifdef CONFIG_32BIT
|
#ifdef CONFIG_32BIT
|
||||||
|
|
||||||
#define VMALLOC_START (vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE))
|
#define VMALLOC_START (vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE))
|
||||||
@@ -113,7 +104,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
|
|||||||
min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits) / 2) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
|
min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits) / 2) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
|
#define VMEMMAP_ALIGN max(PMD_SIZE, MAX_FOLIO_VMEMMAP_ALIGN)
|
||||||
|
#define vmemmap ((struct page *)(ALIGN(VMALLOC_END, VMEMMAP_ALIGN)))
|
||||||
#define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
|
#define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
|
||||||
|
|
||||||
#define KFENCE_AREA_START (VMEMMAP_END + 1)
|
#define KFENCE_AREA_START (VMEMMAP_END + 1)
|
||||||
|
|||||||
@@ -36,9 +36,6 @@
|
|||||||
#include <asm/pgalloc.h>
|
#include <asm/pgalloc.h>
|
||||||
#include <asm/tlb.h>
|
#include <asm/tlb.h>
|
||||||
|
|
||||||
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
|
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
|
||||||
|
|
||||||
void copy_user_highpage(struct page *to, struct page *from,
|
void copy_user_highpage(struct page *to, struct page *from,
|
||||||
unsigned long vaddr, struct vm_area_struct *vma)
|
unsigned long vaddr, struct vm_area_struct *vma)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -110,15 +110,6 @@ extern unsigned long m68k_vmalloc_end;
|
|||||||
#define VMALLOC_END KMAP_START
|
#define VMALLOC_END KMAP_START
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* zero page used for uninitialized stuff */
|
|
||||||
extern void *empty_zero_page;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* ZERO_PAGE is a global shared page that is always zero: used
|
|
||||||
* for zero-mapped memory areas etc..
|
|
||||||
*/
|
|
||||||
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
||||||
|
|
||||||
extern void kernel_set_cachemode(void *addr, unsigned long size, int cmode);
|
extern void kernel_set_cachemode(void *addr, unsigned long size, int cmode);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
@@ -30,13 +30,6 @@
|
|||||||
|
|
||||||
#define swapper_pg_dir ((pgd_t *) 0)
|
#define swapper_pg_dir ((pgd_t *) 0)
|
||||||
|
|
||||||
/*
|
|
||||||
* ZERO_PAGE is a global shared page that is always zero: used
|
|
||||||
* for zero-mapped memory areas etc..
|
|
||||||
*/
|
|
||||||
extern void *empty_zero_page;
|
|
||||||
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* All 32bit addresses are effectively valid for vmalloc...
|
* All 32bit addresses are effectively valid for vmalloc...
|
||||||
* Sort of meaningless for non-VM targets.
|
* Sort of meaningless for non-VM targets.
|
||||||
|
|||||||
@@ -33,13 +33,6 @@
|
|||||||
#include <asm/sections.h>
|
#include <asm/sections.h>
|
||||||
#include <asm/tlb.h>
|
#include <asm/tlb.h>
|
||||||
|
|
||||||
/*
|
|
||||||
* ZERO_PAGE is a special page that is used for zero-initialized
|
|
||||||
* data and COW.
|
|
||||||
*/
|
|
||||||
void *empty_zero_page;
|
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
|
||||||
|
|
||||||
void __init arch_zone_limits_init(unsigned long *max_zone_pfns)
|
void __init arch_zone_limits_init(unsigned long *max_zone_pfns)
|
||||||
{
|
{
|
||||||
max_zone_pfns[ZONE_DMA] = PFN_DOWN(memblock_end_of_DRAM());
|
max_zone_pfns[ZONE_DMA] = PFN_DOWN(memblock_end_of_DRAM());
|
||||||
@@ -71,8 +64,6 @@ void __init paging_init(void)
|
|||||||
unsigned long end_mem = memory_end & PAGE_MASK;
|
unsigned long end_mem = memory_end & PAGE_MASK;
|
||||||
|
|
||||||
high_memory = (void *) end_mem;
|
high_memory = (void *) end_mem;
|
||||||
|
|
||||||
empty_zero_page = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* CONFIG_MMU */
|
#endif /* CONFIG_MMU */
|
||||||
|
|||||||
@@ -41,8 +41,6 @@ void __init paging_init(void)
|
|||||||
unsigned long next_pgtable;
|
unsigned long next_pgtable;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
empty_zero_page = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
|
|
||||||
|
|
||||||
pg_dir = swapper_pg_dir;
|
pg_dir = swapper_pg_dir;
|
||||||
memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));
|
memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));
|
||||||
|
|
||||||
|
|||||||
@@ -498,12 +498,6 @@ void __init paging_init(void)
|
|||||||
|
|
||||||
early_memtest(min_addr, max_addr);
|
early_memtest(min_addr, max_addr);
|
||||||
|
|
||||||
/*
|
|
||||||
* initialize the bad page table and bad page to point
|
|
||||||
* to a couple of allocated pages
|
|
||||||
*/
|
|
||||||
empty_zero_page = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set up SFC/DFC registers
|
* Set up SFC/DFC registers
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -43,8 +43,6 @@ void __init paging_init(void)
|
|||||||
unsigned long bootmem_end;
|
unsigned long bootmem_end;
|
||||||
unsigned long size;
|
unsigned long size;
|
||||||
|
|
||||||
empty_zero_page = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
|
|
||||||
|
|
||||||
address = PAGE_OFFSET;
|
address = PAGE_OFFSET;
|
||||||
pg_dir = swapper_pg_dir;
|
pg_dir = swapper_pg_dir;
|
||||||
memset (swapper_pg_dir, 0, sizeof (swapper_pg_dir));
|
memset (swapper_pg_dir, 0, sizeof (swapper_pg_dir));
|
||||||
|
|||||||
@@ -207,16 +207,6 @@ extern pte_t *va_to_pte(unsigned long address);
|
|||||||
* Also, write permissions imply read permissions.
|
* Also, write permissions imply read permissions.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __ASSEMBLER__
|
|
||||||
/*
|
|
||||||
* ZERO_PAGE is a global shared page that is always zero: used
|
|
||||||
* for zero-mapped memory areas etc..
|
|
||||||
*/
|
|
||||||
extern unsigned long empty_zero_page[1024];
|
|
||||||
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
||||||
|
|
||||||
#endif /* __ASSEMBLER__ */
|
|
||||||
|
|
||||||
#define pte_none(pte) ((pte_val(pte) & ~_PTE_NONE_MASK) == 0)
|
#define pte_none(pte) ((pte_val(pte) & ~_PTE_NONE_MASK) == 0)
|
||||||
#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT)
|
#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT)
|
||||||
#define pte_clear(mm, addr, ptep) \
|
#define pte_clear(mm, addr, ptep) \
|
||||||
@@ -328,7 +318,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
|
|||||||
|
|
||||||
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
|
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
|
||||||
struct vm_area_struct;
|
struct vm_area_struct;
|
||||||
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
|
static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
|
||||||
unsigned long address, pte_t *ptep)
|
unsigned long address, pte_t *ptep)
|
||||||
{
|
{
|
||||||
return (pte_update(ptep, _PAGE_ACCESSED, 0) & _PAGE_ACCESSED) != 0;
|
return (pte_update(ptep, _PAGE_ACCESSED, 0) & _PAGE_ACCESSED) != 0;
|
||||||
|
|||||||
@@ -39,10 +39,6 @@
|
|||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
|
|
||||||
.section .data
|
.section .data
|
||||||
.global empty_zero_page
|
|
||||||
.align 12
|
|
||||||
empty_zero_page:
|
|
||||||
.space PAGE_SIZE
|
|
||||||
.global swapper_pg_dir
|
.global swapper_pg_dir
|
||||||
swapper_pg_dir:
|
swapper_pg_dir:
|
||||||
.space PAGE_SIZE
|
.space PAGE_SIZE
|
||||||
|
|||||||
@@ -33,8 +33,6 @@ EXPORT_SYMBOL(memcpy);
|
|||||||
EXPORT_SYMBOL(memmove);
|
EXPORT_SYMBOL(memmove);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
|
||||||
|
|
||||||
EXPORT_SYMBOL(mbc);
|
EXPORT_SYMBOL(mbc);
|
||||||
|
|
||||||
extern void __divsi3(void);
|
extern void __divsi3(void);
|
||||||
|
|||||||
@@ -213,7 +213,7 @@ extern bool __virt_addr_valid(const volatile void *kaddr);
|
|||||||
#define virt_addr_valid(kaddr) \
|
#define virt_addr_valid(kaddr) \
|
||||||
__virt_addr_valid((const volatile void *) (kaddr))
|
__virt_addr_valid((const volatile void *) (kaddr))
|
||||||
|
|
||||||
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC
|
#define VMA_DATA_DEFAULT_FLAGS VMA_DATA_FLAGS_TSK_EXEC
|
||||||
|
|
||||||
extern unsigned long __kaslr_offset;
|
extern unsigned long __kaslr_offset;
|
||||||
static inline unsigned long kaslr_offset(void)
|
static inline unsigned long kaslr_offset(void)
|
||||||
|
|||||||
@@ -56,10 +56,7 @@ unsigned long empty_zero_page, zero_page_mask;
|
|||||||
EXPORT_SYMBOL_GPL(empty_zero_page);
|
EXPORT_SYMBOL_GPL(empty_zero_page);
|
||||||
EXPORT_SYMBOL(zero_page_mask);
|
EXPORT_SYMBOL(zero_page_mask);
|
||||||
|
|
||||||
/*
|
void __init arch_setup_zero_pages(void)
|
||||||
* Not static inline because used by IP27 special magic initialization code
|
|
||||||
*/
|
|
||||||
static void __init setup_zero_pages(void)
|
|
||||||
{
|
{
|
||||||
unsigned int order;
|
unsigned int order;
|
||||||
|
|
||||||
@@ -450,7 +447,6 @@ void __init arch_mm_preinit(void)
|
|||||||
BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (PFN_PTE_SHIFT > PAGE_SHIFT));
|
BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (PFN_PTE_SHIFT > PAGE_SHIFT));
|
||||||
|
|
||||||
maar_init();
|
maar_init();
|
||||||
setup_zero_pages(); /* Setup zeroed pages. */
|
|
||||||
highmem_init();
|
highmem_init();
|
||||||
|
|
||||||
#ifdef CONFIG_64BIT
|
#ifdef CONFIG_64BIT
|
||||||
@@ -461,11 +457,6 @@ void __init arch_mm_preinit(void)
|
|||||||
0x80000000 - 4, KCORE_TEXT);
|
0x80000000 - 4, KCORE_TEXT);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#else /* CONFIG_NUMA */
|
|
||||||
void __init arch_mm_preinit(void)
|
|
||||||
{
|
|
||||||
setup_zero_pages(); /* This comes from node 0 */
|
|
||||||
}
|
|
||||||
#endif /* !CONFIG_NUMA */
|
#endif /* !CONFIG_NUMA */
|
||||||
|
|
||||||
void free_init_pages(const char *what, unsigned long begin, unsigned long end)
|
void free_init_pages(const char *what, unsigned long begin, unsigned long end)
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ extern struct page *mem_map;
|
|||||||
# define virt_to_page(vaddr) pfn_to_page(PFN_DOWN(virt_to_phys(vaddr)))
|
# define virt_to_page(vaddr) pfn_to_page(PFN_DOWN(virt_to_phys(vaddr)))
|
||||||
# define virt_addr_valid(vaddr) pfn_valid(PFN_DOWN(virt_to_phys(vaddr)))
|
# define virt_addr_valid(vaddr) pfn_valid(PFN_DOWN(virt_to_phys(vaddr)))
|
||||||
|
|
||||||
# define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC
|
# define VMA_DATA_DEFAULT_FLAGS VMA_DATA_FLAGS_NON_EXEC
|
||||||
|
|
||||||
#include <asm-generic/memory_model.h>
|
#include <asm-generic/memory_model.h>
|
||||||
|
|
||||||
|
|||||||
@@ -65,13 +65,6 @@ struct mm_struct;
|
|||||||
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
|
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
|
||||||
#define PGDIR_MASK (~(PGDIR_SIZE-1))
|
#define PGDIR_MASK (~(PGDIR_SIZE-1))
|
||||||
|
|
||||||
/*
|
|
||||||
* ZERO_PAGE is a global shared page that is always zero: used
|
|
||||||
* for zero-mapped memory areas etc..
|
|
||||||
*/
|
|
||||||
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
|
|
||||||
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
||||||
|
|
||||||
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
|
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
|
||||||
extern pte_t invalid_pte_table[PAGE_SIZE/sizeof(pte_t)];
|
extern pte_t invalid_pte_table[PAGE_SIZE/sizeof(pte_t)];
|
||||||
|
|
||||||
|
|||||||
@@ -23,16 +23,6 @@
|
|||||||
#include <asm/asm-offsets.h>
|
#include <asm/asm-offsets.h>
|
||||||
#include <asm/asm-macros.h>
|
#include <asm/asm-macros.h>
|
||||||
|
|
||||||
/*
|
|
||||||
* ZERO_PAGE is a special page that is used for zero-initialized
|
|
||||||
* data and COW.
|
|
||||||
*/
|
|
||||||
.data
|
|
||||||
.global empty_zero_page
|
|
||||||
.align 12
|
|
||||||
empty_zero_page:
|
|
||||||
.space PAGE_SIZE
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This global variable is used as an extension to the nios'
|
* This global variable is used as an extension to the nios'
|
||||||
* STATUS register to emulate a user/supervisor mode.
|
* STATUS register to emulate a user/supervisor mode.
|
||||||
|
|||||||
@@ -20,7 +20,6 @@ EXPORT_SYMBOL(memmove);
|
|||||||
|
|
||||||
/* memory management */
|
/* memory management */
|
||||||
|
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
|
||||||
EXPORT_SYMBOL(flush_icache_range);
|
EXPORT_SYMBOL(flush_icache_range);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
@@ -179,10 +179,6 @@ extern void paging_init(void);
|
|||||||
__pgprot(_PAGE_ALL | _PAGE_SRE | _PAGE_SWE \
|
__pgprot(_PAGE_ALL | _PAGE_SRE | _PAGE_SWE \
|
||||||
| _PAGE_SHARED | _PAGE_DIRTY | _PAGE_EXEC | _PAGE_CI)
|
| _PAGE_SHARED | _PAGE_DIRTY | _PAGE_EXEC | _PAGE_CI)
|
||||||
|
|
||||||
/* zero page used for uninitialized stuff */
|
|
||||||
extern unsigned long empty_zero_page[2048];
|
|
||||||
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
||||||
|
|
||||||
#define pte_none(x) (!pte_val(x))
|
#define pte_none(x) (!pte_val(x))
|
||||||
#define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
|
#define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
|
||||||
#define pte_clear(mm, addr, xp) do { pte_val(*(xp)) = 0; } while (0)
|
#define pte_clear(mm, addr, xp) do { pte_val(*(xp)) = 0; } while (0)
|
||||||
|
|||||||
@@ -1563,9 +1563,6 @@ _string_nl:
|
|||||||
*/
|
*/
|
||||||
.section .data,"aw"
|
.section .data,"aw"
|
||||||
.align 8192
|
.align 8192
|
||||||
.global empty_zero_page
|
|
||||||
empty_zero_page:
|
|
||||||
.space 8192
|
|
||||||
|
|
||||||
.global swapper_pg_dir
|
.global swapper_pg_dir
|
||||||
swapper_pg_dir:
|
swapper_pg_dir:
|
||||||
|
|||||||
@@ -40,7 +40,6 @@ DECLARE_EXPORT(__ashldi3);
|
|||||||
DECLARE_EXPORT(__lshrdi3);
|
DECLARE_EXPORT(__lshrdi3);
|
||||||
DECLARE_EXPORT(__ucmpdi2);
|
DECLARE_EXPORT(__ucmpdi2);
|
||||||
|
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
|
||||||
EXPORT_SYMBOL(__copy_tofrom_user);
|
EXPORT_SYMBOL(__copy_tofrom_user);
|
||||||
EXPORT_SYMBOL(__clear_user);
|
EXPORT_SYMBOL(__clear_user);
|
||||||
EXPORT_SYMBOL(memset);
|
EXPORT_SYMBOL(memset);
|
||||||
|
|||||||
@@ -188,9 +188,6 @@ void __init mem_init(void)
|
|||||||
{
|
{
|
||||||
BUG_ON(!mem_map);
|
BUG_ON(!mem_map);
|
||||||
|
|
||||||
/* clear the zero-page */
|
|
||||||
memset((void *)empty_zero_page, 0, PAGE_SIZE);
|
|
||||||
|
|
||||||
printk("mem_init_done ...........................................\n");
|
printk("mem_init_done ...........................................\n");
|
||||||
mem_init_done = 1;
|
mem_init_done = 1;
|
||||||
return;
|
return;
|
||||||
|
|||||||
@@ -262,17 +262,6 @@ extern pgd_t swapper_pg_dir[]; /* declared in init_task.c */
|
|||||||
|
|
||||||
extern pte_t pg0[];
|
extern pte_t pg0[];
|
||||||
|
|
||||||
/* zero page used for uninitialized stuff */
|
|
||||||
|
|
||||||
extern unsigned long *empty_zero_page;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* ZERO_PAGE is a global shared page that is always zero: used
|
|
||||||
* for zero-mapped memory areas etc..
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
||||||
|
|
||||||
#define pte_none(x) (pte_val(x) == 0)
|
#define pte_none(x) (pte_val(x) == 0)
|
||||||
#define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
|
#define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
|
||||||
#define pte_user(x) (pte_val(x) & _PAGE_USER)
|
#define pte_user(x) (pte_val(x) & _PAGE_USER)
|
||||||
@@ -449,19 +438,20 @@ static inline pte_t ptep_get(pte_t *ptep)
|
|||||||
}
|
}
|
||||||
#define ptep_get ptep_get
|
#define ptep_get ptep_get
|
||||||
|
|
||||||
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
|
static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
|
||||||
|
unsigned long addr, pte_t *ptep)
|
||||||
{
|
{
|
||||||
pte_t pte;
|
pte_t pte;
|
||||||
|
|
||||||
pte = ptep_get(ptep);
|
pte = ptep_get(ptep);
|
||||||
if (!pte_young(pte)) {
|
if (!pte_young(pte)) {
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte));
|
set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte));
|
||||||
return 1;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep);
|
bool ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep);
|
||||||
pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep);
|
pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep);
|
||||||
|
|
||||||
struct mm_struct;
|
struct mm_struct;
|
||||||
|
|||||||
@@ -781,18 +781,18 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned lon
|
|||||||
__flush_cache_page(vma, vmaddr, PFN_PHYS(page_to_pfn(page)));
|
__flush_cache_page(vma, vmaddr, PFN_PHYS(page_to_pfn(page)));
|
||||||
}
|
}
|
||||||
|
|
||||||
int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr,
|
bool ptep_clear_flush_young(struct vm_area_struct *vma,
|
||||||
pte_t *ptep)
|
unsigned long addr, pte_t *ptep)
|
||||||
{
|
{
|
||||||
pte_t pte = ptep_get(ptep);
|
pte_t pte = ptep_get(ptep);
|
||||||
|
|
||||||
if (!pte_young(pte))
|
if (!pte_young(pte))
|
||||||
return 0;
|
return false;
|
||||||
set_pte(ptep, pte_mkold(pte));
|
set_pte(ptep, pte_mkold(pte));
|
||||||
#if CONFIG_FLUSH_PAGE_ACCESSED
|
#if CONFIG_FLUSH_PAGE_ACCESSED
|
||||||
__flush_cache_page(vma, addr, PFN_PHYS(pte_pfn(pte)));
|
__flush_cache_page(vma, addr, PFN_PHYS(pte_pfn(pte)));
|
||||||
#endif
|
#endif
|
||||||
return 1;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
@@ -604,9 +604,6 @@ void __init mem_init(void)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long *empty_zero_page __ro_after_init;
|
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* pagetable_init() sets up the page tables
|
* pagetable_init() sets up the page tables
|
||||||
*
|
*
|
||||||
@@ -639,9 +636,6 @@ static void __init pagetable_init(void)
|
|||||||
initrd_end - initrd_start, PAGE_KERNEL, 0);
|
initrd_end - initrd_start, PAGE_KERNEL, 0);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
empty_zero_page = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init gateway_init(void)
|
static void __init gateway_init(void)
|
||||||
|
|||||||
@@ -126,7 +126,6 @@ config PPC
|
|||||||
select ARCH_DISABLE_KASAN_INLINE if PPC_RADIX_MMU
|
select ARCH_DISABLE_KASAN_INLINE if PPC_RADIX_MMU
|
||||||
select ARCH_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE
|
select ARCH_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE
|
||||||
select ARCH_ENABLE_MEMORY_HOTPLUG
|
select ARCH_ENABLE_MEMORY_HOTPLUG
|
||||||
select ARCH_ENABLE_MEMORY_HOTREMOVE
|
|
||||||
select ARCH_HAS_COPY_MC if PPC64
|
select ARCH_HAS_COPY_MC if PPC64
|
||||||
select ARCH_HAS_CURRENT_STACK_POINTER
|
select ARCH_HAS_CURRENT_STACK_POINTER
|
||||||
select ARCH_HAS_DEBUG_VIRTUAL
|
select ARCH_HAS_DEBUG_VIRTUAL
|
||||||
|
|||||||
@@ -295,8 +295,8 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p
|
|||||||
* for our hash-based implementation, we fix that up here.
|
* for our hash-based implementation, we fix that up here.
|
||||||
*/
|
*/
|
||||||
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
|
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
|
||||||
static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
|
static inline bool __ptep_test_and_clear_young(struct mm_struct *mm,
|
||||||
unsigned long addr, pte_t *ptep)
|
unsigned long addr, pte_t *ptep)
|
||||||
{
|
{
|
||||||
unsigned long old;
|
unsigned long old;
|
||||||
old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
|
old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
|
||||||
|
|||||||
@@ -349,13 +349,13 @@ static inline unsigned long pte_update(struct mm_struct *mm, unsigned long addr,
|
|||||||
* For radix: H_PAGE_HASHPTE should be zero. Hence we can use the same
|
* For radix: H_PAGE_HASHPTE should be zero. Hence we can use the same
|
||||||
* function for both hash and radix.
|
* function for both hash and radix.
|
||||||
*/
|
*/
|
||||||
static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
|
static inline bool __ptep_test_and_clear_young(struct mm_struct *mm,
|
||||||
unsigned long addr, pte_t *ptep)
|
unsigned long addr, pte_t *ptep)
|
||||||
{
|
{
|
||||||
unsigned long old;
|
unsigned long old;
|
||||||
|
|
||||||
if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
|
if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
|
||||||
return 0;
|
return false;
|
||||||
old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
|
old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
|
||||||
return (old & _PAGE_ACCESSED) != 0;
|
return (old & _PAGE_ACCESSED) != 0;
|
||||||
}
|
}
|
||||||
@@ -1161,24 +1161,24 @@ pud_hugepage_update(struct mm_struct *mm, unsigned long addr, pud_t *pudp,
|
|||||||
* For radix we should always find H_PAGE_HASHPTE zero. Hence
|
* For radix we should always find H_PAGE_HASHPTE zero. Hence
|
||||||
* the below will work for radix too
|
* the below will work for radix too
|
||||||
*/
|
*/
|
||||||
static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
|
static inline bool __pmdp_test_and_clear_young(struct mm_struct *mm,
|
||||||
unsigned long addr, pmd_t *pmdp)
|
unsigned long addr, pmd_t *pmdp)
|
||||||
{
|
{
|
||||||
unsigned long old;
|
unsigned long old;
|
||||||
|
|
||||||
if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
|
if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
|
||||||
return 0;
|
return false;
|
||||||
old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
|
old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
|
||||||
return ((old & _PAGE_ACCESSED) != 0);
|
return ((old & _PAGE_ACCESSED) != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int __pudp_test_and_clear_young(struct mm_struct *mm,
|
static inline bool __pudp_test_and_clear_young(struct mm_struct *mm,
|
||||||
unsigned long addr, pud_t *pudp)
|
unsigned long addr, pud_t *pudp)
|
||||||
{
|
{
|
||||||
unsigned long old;
|
unsigned long old;
|
||||||
|
|
||||||
if ((pud_raw(*pudp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
|
if ((pud_raw(*pudp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
|
||||||
return 0;
|
return false;
|
||||||
old = pud_hugepage_update(mm, addr, pudp, _PAGE_ACCESSED, 0);
|
old = pud_hugepage_update(mm, addr, pudp, _PAGE_ACCESSED, 0);
|
||||||
return ((old & _PAGE_ACCESSED) != 0);
|
return ((old & _PAGE_ACCESSED) != 0);
|
||||||
}
|
}
|
||||||
@@ -1323,11 +1323,11 @@ extern int pudp_set_access_flags(struct vm_area_struct *vma,
|
|||||||
pud_t entry, int dirty);
|
pud_t entry, int dirty);
|
||||||
|
|
||||||
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
|
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
|
||||||
extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
||||||
unsigned long address, pmd_t *pmdp);
|
unsigned long address, pmd_t *pmdp);
|
||||||
#define __HAVE_ARCH_PUDP_TEST_AND_CLEAR_YOUNG
|
#define __HAVE_ARCH_PUDP_TEST_AND_CLEAR_YOUNG
|
||||||
extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
|
bool pudp_test_and_clear_young(struct vm_area_struct *vma,
|
||||||
unsigned long address, pud_t *pudp);
|
unsigned long address, pud_t *pudp);
|
||||||
|
|
||||||
|
|
||||||
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
|
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
|
||||||
|
|||||||
@@ -155,7 +155,7 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
|
|||||||
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
|
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
|
||||||
#define ptep_clear_flush_young(__vma, __address, __ptep) \
|
#define ptep_clear_flush_young(__vma, __address, __ptep) \
|
||||||
({ \
|
({ \
|
||||||
int __young = ptep_test_and_clear_young(__vma, __address, __ptep);\
|
bool __young = ptep_test_and_clear_young(__vma, __address, __ptep);\
|
||||||
__young; \
|
__young; \
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
@@ -101,8 +101,8 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
|
static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
|
||||||
unsigned long addr, pte_t *ptep)
|
unsigned long addr, pte_t *ptep)
|
||||||
{
|
{
|
||||||
unsigned long old;
|
unsigned long old;
|
||||||
|
|
||||||
|
|||||||
@@ -240,8 +240,8 @@ static inline const void *pfn_to_kaddr(unsigned long pfn)
|
|||||||
* and needs to be executable. This means the whole heap ends
|
* and needs to be executable. This means the whole heap ends
|
||||||
* up being executable.
|
* up being executable.
|
||||||
*/
|
*/
|
||||||
#define VM_DATA_DEFAULT_FLAGS32 VM_DATA_FLAGS_TSK_EXEC
|
#define VMA_DATA_DEFAULT_FLAGS32 VMA_DATA_FLAGS_TSK_EXEC
|
||||||
#define VM_DATA_DEFAULT_FLAGS64 VM_DATA_FLAGS_NON_EXEC
|
#define VMA_DATA_DEFAULT_FLAGS64 VMA_DATA_FLAGS_NON_EXEC
|
||||||
|
|
||||||
#ifdef __powerpc64__
|
#ifdef __powerpc64__
|
||||||
#include <asm/page_64.h>
|
#include <asm/page_64.h>
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define VM_DATA_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS32
|
#define VMA_DATA_DEFAULT_FLAGS VMA_DATA_DEFAULT_FLAGS32
|
||||||
|
|
||||||
#if defined(CONFIG_PPC_256K_PAGES) || \
|
#if defined(CONFIG_PPC_256K_PAGES) || \
|
||||||
(defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES))
|
(defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES))
|
||||||
|
|||||||
@@ -84,9 +84,9 @@ extern u64 ppc64_pft_size;
|
|||||||
|
|
||||||
#endif /* __ASSEMBLER__ */
|
#endif /* __ASSEMBLER__ */
|
||||||
|
|
||||||
#define VM_DATA_DEFAULT_FLAGS \
|
#define VMA_DATA_DEFAULT_FLAGS \
|
||||||
(is_32bit_task() ? \
|
(is_32bit_task() ? \
|
||||||
VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
|
VMA_DATA_DEFAULT_FLAGS32 : VMA_DATA_DEFAULT_FLAGS64)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is the default if a program doesn't have a PT_GNU_STACK
|
* This is the default if a program doesn't have a PT_GNU_STACK
|
||||||
@@ -94,12 +94,12 @@ extern u64 ppc64_pft_size;
|
|||||||
* stack by default, so in the absence of a PT_GNU_STACK program header
|
* stack by default, so in the absence of a PT_GNU_STACK program header
|
||||||
* we turn execute permission off.
|
* we turn execute permission off.
|
||||||
*/
|
*/
|
||||||
#define VM_STACK_DEFAULT_FLAGS32 VM_DATA_FLAGS_EXEC
|
#define VMA_STACK_DEFAULT_FLAGS32 VMA_DATA_FLAGS_EXEC
|
||||||
#define VM_STACK_DEFAULT_FLAGS64 VM_DATA_FLAGS_NON_EXEC
|
#define VMA_STACK_DEFAULT_FLAGS64 VMA_DATA_FLAGS_NON_EXEC
|
||||||
|
|
||||||
#define VM_STACK_DEFAULT_FLAGS \
|
#define VMA_STACK_DEFAULT_FLAGS \
|
||||||
(is_32bit_task() ? \
|
(is_32bit_task() ? \
|
||||||
VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64)
|
VMA_STACK_DEFAULT_FLAGS32 : VMA_STACK_DEFAULT_FLAGS64)
|
||||||
|
|
||||||
#include <asm-generic/getorder.h>
|
#include <asm-generic/getorder.h>
|
||||||
|
|
||||||
|
|||||||
@@ -90,12 +90,6 @@ static inline const void *pmd_page_vaddr(pmd_t pmd)
|
|||||||
}
|
}
|
||||||
#define pmd_page_vaddr pmd_page_vaddr
|
#define pmd_page_vaddr pmd_page_vaddr
|
||||||
#endif
|
#endif
|
||||||
/*
|
|
||||||
* ZERO_PAGE is a global shared page that is always zero: used
|
|
||||||
* for zero-mapped memory areas etc..
|
|
||||||
*/
|
|
||||||
extern unsigned long empty_zero_page[];
|
|
||||||
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
||||||
|
|
||||||
extern pgd_t swapper_pg_dir[];
|
extern pgd_t swapper_pg_dir[];
|
||||||
|
|
||||||
|
|||||||
@@ -143,10 +143,8 @@ static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
|
|||||||
return __arch_override_mprotect_pkey(vma, prot, pkey);
|
return __arch_override_mprotect_pkey(vma, prot, pkey);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
extern int __arch_set_user_pkey_access(int pkey, unsigned long init_val);
|
||||||
unsigned long init_val);
|
static inline int arch_set_user_pkey_access(int pkey, unsigned long init_val)
|
||||||
static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
|
||||||
unsigned long init_val)
|
|
||||||
{
|
{
|
||||||
if (!mmu_has_feature(MMU_FTR_PKEY))
|
if (!mmu_has_feature(MMU_FTR_PKEY))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@@ -160,7 +158,7 @@ static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
|||||||
if (pkey == 0)
|
if (pkey == 0)
|
||||||
return init_val ? -EINVAL : 0;
|
return init_val ? -EINVAL : 0;
|
||||||
|
|
||||||
return __arch_set_user_pkey_access(tsk, pkey, init_val);
|
return __arch_set_user_pkey_access(pkey, init_val);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool arch_pkeys_enabled(void)
|
static inline bool arch_pkeys_enabled(void)
|
||||||
|
|||||||
@@ -36,7 +36,6 @@
|
|||||||
#include <linux/gfp.h>
|
#include <linux/gfp.h>
|
||||||
#include <linux/vmalloc.h>
|
#include <linux/vmalloc.h>
|
||||||
#include <linux/highmem.h>
|
#include <linux/highmem.h>
|
||||||
#include <linux/hugetlb.h>
|
|
||||||
#include <linux/kvm_irqfd.h>
|
#include <linux/kvm_irqfd.h>
|
||||||
#include <linux/irqbypass.h>
|
#include <linux/irqbypass.h>
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
|
|||||||
@@ -96,14 +96,14 @@ int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
||||||
unsigned long address, pmd_t *pmdp)
|
unsigned long address, pmd_t *pmdp)
|
||||||
{
|
{
|
||||||
return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
|
return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
|
||||||
}
|
}
|
||||||
|
|
||||||
int pudp_test_and_clear_young(struct vm_area_struct *vma,
|
bool pudp_test_and_clear_young(struct vm_area_struct *vma,
|
||||||
unsigned long address, pud_t *pudp)
|
unsigned long address, pud_t *pudp)
|
||||||
{
|
{
|
||||||
return __pudp_test_and_clear_young(vma->vm_mm, address, pudp);
|
return __pudp_test_and_clear_young(vma->vm_mm, address, pudp);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -335,8 +335,7 @@ static inline void init_iamr(int pkey, u8 init_bits)
|
|||||||
* Set the access rights in AMR IAMR and UAMOR registers for @pkey to that
|
* Set the access rights in AMR IAMR and UAMOR registers for @pkey to that
|
||||||
* specified in @init_val.
|
* specified in @init_val.
|
||||||
*/
|
*/
|
||||||
int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
|
int __arch_set_user_pkey_access(int pkey, unsigned long init_val)
|
||||||
unsigned long init_val)
|
|
||||||
{
|
{
|
||||||
u64 new_amr_bits = 0x0ul;
|
u64 new_amr_bits = 0x0ul;
|
||||||
u64 new_iamr_bits = 0x0ul;
|
u64 new_iamr_bits = 0x0ul;
|
||||||
|
|||||||
@@ -38,9 +38,6 @@
|
|||||||
|
|
||||||
unsigned long long memory_limit __initdata;
|
unsigned long long memory_limit __initdata;
|
||||||
|
|
||||||
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
|
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
|
||||||
|
|
||||||
pgprot_t __phys_mem_access_prot(unsigned long pfn, unsigned long size,
|
pgprot_t __phys_mem_access_prot(unsigned long pfn, unsigned long size,
|
||||||
pgprot_t vma_prot)
|
pgprot_t vma_prot)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -414,7 +414,7 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf)
|
|||||||
/*
|
/*
|
||||||
* When the LPAR lost credits due to core removal or during
|
* When the LPAR lost credits due to core removal or during
|
||||||
* migration, invalidate the existing mapping for the current
|
* migration, invalidate the existing mapping for the current
|
||||||
* paste addresses and set windows in-active (zap_vma_pages in
|
* paste addresses and set windows in-active (zap_vma() in
|
||||||
* reconfig_close_windows()).
|
* reconfig_close_windows()).
|
||||||
* New mapping will be done later after migration or new credits
|
* New mapping will be done later after migration or new credits
|
||||||
* available. So continue to receive faults if the user space
|
* available. So continue to receive faults if the user space
|
||||||
|
|||||||
@@ -807,7 +807,7 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
|
|||||||
* is done before the original mmap() and after the ioctl.
|
* is done before the original mmap() and after the ioctl.
|
||||||
*/
|
*/
|
||||||
if (vma)
|
if (vma)
|
||||||
zap_vma_pages(vma);
|
zap_vma(vma);
|
||||||
|
|
||||||
mutex_unlock(&task_ref->mmap_mutex);
|
mutex_unlock(&task_ref->mmap_mutex);
|
||||||
mmap_write_unlock(task_ref->mm);
|
mmap_write_unlock(task_ref->mm);
|
||||||
|
|||||||
@@ -21,7 +21,6 @@ config RISCV
|
|||||||
select ARCH_DMA_DEFAULT_COHERENT
|
select ARCH_DMA_DEFAULT_COHERENT
|
||||||
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
|
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
|
||||||
select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM_VMEMMAP
|
select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM_VMEMMAP
|
||||||
select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG
|
|
||||||
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
|
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
|
||||||
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
|
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
|
||||||
select ARCH_HAS_BINFMT_FLAT
|
select ARCH_HAS_BINFMT_FLAT
|
||||||
|
|||||||
@@ -204,7 +204,7 @@ static __always_inline void *pfn_to_kaddr(unsigned long pfn)
|
|||||||
(unsigned long)(_addr) >= PAGE_OFFSET && pfn_valid(virt_to_pfn(_addr)); \
|
(unsigned long)(_addr) >= PAGE_OFFSET && pfn_valid(virt_to_pfn(_addr)); \
|
||||||
})
|
})
|
||||||
|
|
||||||
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC
|
#define VMA_DATA_DEFAULT_FLAGS VMA_DATA_FLAGS_NON_EXEC
|
||||||
|
|
||||||
#include <asm-generic/memory_model.h>
|
#include <asm-generic/memory_model.h>
|
||||||
#include <asm-generic/getorder.h>
|
#include <asm-generic/getorder.h>
|
||||||
|
|||||||
@@ -659,8 +659,8 @@ static inline void pte_clear(struct mm_struct *mm,
|
|||||||
extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
|
extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
|
||||||
pte_t *ptep, pte_t entry, int dirty);
|
pte_t *ptep, pte_t entry, int dirty);
|
||||||
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG /* defined in mm/pgtable.c */
|
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG /* defined in mm/pgtable.c */
|
||||||
extern int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long address,
|
bool ptep_test_and_clear_young(struct vm_area_struct *vma,
|
||||||
pte_t *ptep);
|
unsigned long address, pte_t *ptep);
|
||||||
|
|
||||||
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
|
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
|
||||||
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
|
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
|
||||||
@@ -695,8 +695,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
|
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
|
||||||
static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
|
static inline bool ptep_clear_flush_young(struct vm_area_struct *vma,
|
||||||
unsigned long address, pte_t *ptep)
|
unsigned long address, pte_t *ptep)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* This comment is borrowed from x86, but applies equally to RISC-V:
|
* This comment is borrowed from x86, but applies equally to RISC-V:
|
||||||
@@ -1015,8 +1015,8 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
|
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
|
||||||
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
static inline bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
||||||
unsigned long address, pmd_t *pmdp)
|
unsigned long address, pmd_t *pmdp)
|
||||||
{
|
{
|
||||||
return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
|
return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
|
||||||
}
|
}
|
||||||
@@ -1109,8 +1109,8 @@ static inline int pudp_set_access_flags(struct vm_area_struct *vma,
|
|||||||
return ptep_set_access_flags(vma, address, (pte_t *)pudp, pud_pte(entry), dirty);
|
return ptep_set_access_flags(vma, address, (pte_t *)pudp, pud_pte(entry), dirty);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int pudp_test_and_clear_young(struct vm_area_struct *vma,
|
static inline bool pudp_test_and_clear_young(struct vm_area_struct *vma,
|
||||||
unsigned long address, pud_t *pudp)
|
unsigned long address, pud_t *pudp)
|
||||||
{
|
{
|
||||||
return ptep_test_and_clear_young(vma, address, (pte_t *)pudp);
|
return ptep_test_and_clear_young(vma, address, (pte_t *)pudp);
|
||||||
}
|
}
|
||||||
@@ -1284,13 +1284,6 @@ extern u64 satp_mode;
|
|||||||
void paging_init(void);
|
void paging_init(void);
|
||||||
void misc_mem_init(void);
|
void misc_mem_init(void);
|
||||||
|
|
||||||
/*
|
|
||||||
* ZERO_PAGE is a global shared page that is always zero,
|
|
||||||
* used for zero-mapped memory areas, etc.
|
|
||||||
*/
|
|
||||||
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
|
|
||||||
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use set_p*_safe(), and elide TLB flushing, when confident that *no*
|
* Use set_p*_safe(), and elide TLB flushing, when confident that *no*
|
||||||
* TLB flush will be required as a result of the "set". For example, use
|
* TLB flush will be required as a result of the "set". For example, use
|
||||||
|
|||||||
@@ -230,17 +230,7 @@ int restore_user_shstk(struct task_struct *tsk, unsigned long shstk_ptr)
|
|||||||
static unsigned long allocate_shadow_stack(unsigned long addr, unsigned long size,
|
static unsigned long allocate_shadow_stack(unsigned long addr, unsigned long size,
|
||||||
unsigned long token_offset, bool set_tok)
|
unsigned long token_offset, bool set_tok)
|
||||||
{
|
{
|
||||||
int flags = MAP_ANONYMOUS | MAP_PRIVATE;
|
addr = vm_mmap_shadow_stack(addr, size, 0);
|
||||||
struct mm_struct *mm = current->mm;
|
|
||||||
unsigned long populate;
|
|
||||||
|
|
||||||
if (addr)
|
|
||||||
flags |= MAP_FIXED_NOREPLACE;
|
|
||||||
|
|
||||||
mmap_write_lock(mm);
|
|
||||||
addr = do_mmap(NULL, addr, size, PROT_READ, flags,
|
|
||||||
VM_SHADOW_STACK | VM_WRITE, 0, &populate, NULL);
|
|
||||||
mmap_write_unlock(mm);
|
|
||||||
|
|
||||||
if (!set_tok || IS_ERR_VALUE(addr))
|
if (!set_tok || IS_ERR_VALUE(addr))
|
||||||
goto out;
|
goto out;
|
||||||
|
|||||||
@@ -63,16 +63,13 @@ phys_addr_t phys_ram_base __ro_after_init;
|
|||||||
EXPORT_SYMBOL(phys_ram_base);
|
EXPORT_SYMBOL(phys_ram_base);
|
||||||
|
|
||||||
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
||||||
#define VMEMMAP_ADDR_ALIGN (1ULL << SECTION_SIZE_BITS)
|
#define VMEMMAP_ADDR_ALIGN max(1ULL << SECTION_SIZE_BITS, \
|
||||||
|
MAX_FOLIO_VMEMMAP_ALIGN)
|
||||||
|
|
||||||
unsigned long vmemmap_start_pfn __ro_after_init;
|
unsigned long vmemmap_start_pfn __ro_after_init;
|
||||||
EXPORT_SYMBOL(vmemmap_start_pfn);
|
EXPORT_SYMBOL(vmemmap_start_pfn);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
|
|
||||||
__page_aligned_bss;
|
|
||||||
EXPORT_SYMBOL(empty_zero_page);
|
|
||||||
|
|
||||||
extern char _start[];
|
extern char _start[];
|
||||||
void *_dtb_early_va __initdata;
|
void *_dtb_early_va __initdata;
|
||||||
uintptr_t _dtb_early_pa __initdata;
|
uintptr_t _dtb_early_pa __initdata;
|
||||||
|
|||||||
@@ -29,12 +29,11 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ptep_test_and_clear_young(struct vm_area_struct *vma,
|
bool ptep_test_and_clear_young(struct vm_area_struct *vma,
|
||||||
unsigned long address,
|
unsigned long address, pte_t *ptep)
|
||||||
pte_t *ptep)
|
|
||||||
{
|
{
|
||||||
if (!pte_young(ptep_get(ptep)))
|
if (!pte_young(ptep_get(ptep)))
|
||||||
return 0;
|
return false;
|
||||||
return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep));
|
return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep));
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(ptep_test_and_clear_young);
|
EXPORT_SYMBOL_GPL(ptep_test_and_clear_young);
|
||||||
|
|||||||
@@ -85,7 +85,6 @@ config S390
|
|||||||
select ARCH_32BIT_USTAT_F_TINODE
|
select ARCH_32BIT_USTAT_F_TINODE
|
||||||
select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
|
select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
|
||||||
select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
|
select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
|
||||||
select ARCH_ENABLE_MEMORY_HOTREMOVE
|
|
||||||
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
|
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
|
||||||
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
|
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
|
||||||
select ARCH_HAS_CC_CAN_LINK
|
select ARCH_HAS_CC_CAN_LINK
|
||||||
|
|||||||
@@ -277,7 +277,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
|
|||||||
|
|
||||||
#define virt_addr_valid(kaddr) pfn_valid(phys_to_pfn(__pa_nodebug((unsigned long)(kaddr))))
|
#define virt_addr_valid(kaddr) pfn_valid(phys_to_pfn(__pa_nodebug((unsigned long)(kaddr))))
|
||||||
|
|
||||||
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC
|
#define VMA_DATA_DEFAULT_FLAGS VMA_DATA_FLAGS_NON_EXEC
|
||||||
|
|
||||||
#endif /* !__ASSEMBLER__ */
|
#endif /* !__ASSEMBLER__ */
|
||||||
|
|
||||||
|
|||||||
@@ -1164,8 +1164,8 @@ pte_t ptep_xchg_direct(struct mm_struct *, unsigned long, pte_t *, pte_t);
|
|||||||
pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t);
|
pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t);
|
||||||
|
|
||||||
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
|
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
|
||||||
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
|
static inline bool ptep_test_and_clear_young(struct vm_area_struct *vma,
|
||||||
unsigned long addr, pte_t *ptep)
|
unsigned long addr, pte_t *ptep)
|
||||||
{
|
{
|
||||||
pte_t pte = *ptep;
|
pte_t pte = *ptep;
|
||||||
|
|
||||||
@@ -1174,8 +1174,8 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
|
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
|
||||||
static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
|
static inline bool ptep_clear_flush_young(struct vm_area_struct *vma,
|
||||||
unsigned long address, pte_t *ptep)
|
unsigned long address, pte_t *ptep)
|
||||||
{
|
{
|
||||||
return ptep_test_and_clear_young(vma, address, ptep);
|
return ptep_test_and_clear_young(vma, address, ptep);
|
||||||
}
|
}
|
||||||
@@ -1683,8 +1683,8 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
|
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
|
||||||
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
static inline bool pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
||||||
unsigned long addr, pmd_t *pmdp)
|
unsigned long addr, pmd_t *pmdp)
|
||||||
{
|
{
|
||||||
pmd_t pmd = *pmdp;
|
pmd_t pmd = *pmdp;
|
||||||
|
|
||||||
@@ -1693,8 +1693,8 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
|
#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
|
||||||
static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
|
static inline bool pmdp_clear_flush_young(struct vm_area_struct *vma,
|
||||||
unsigned long addr, pmd_t *pmdp)
|
unsigned long addr, pmd_t *pmdp)
|
||||||
{
|
{
|
||||||
VM_BUG_ON(addr & ~HPAGE_MASK);
|
VM_BUG_ON(addr & ~HPAGE_MASK);
|
||||||
return pmdp_test_and_clear_young(vma, addr, pmdp);
|
return pmdp_test_and_clear_young(vma, addr, pmdp);
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned lo
|
|||||||
if (!vma)
|
if (!vma)
|
||||||
return;
|
return;
|
||||||
if (!is_vm_hugetlb_page(vma))
|
if (!is_vm_hugetlb_page(vma))
|
||||||
zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
|
zap_vma_range(vma, vmaddr, min(end, vma->vm_end) - vmaddr);
|
||||||
vmaddr = vma->vm_end;
|
vmaddr = vma->vm_end;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ unsigned long empty_zero_page, zero_page_mask;
|
|||||||
EXPORT_SYMBOL(empty_zero_page);
|
EXPORT_SYMBOL(empty_zero_page);
|
||||||
EXPORT_SYMBOL(zero_page_mask);
|
EXPORT_SYMBOL(zero_page_mask);
|
||||||
|
|
||||||
static void __init setup_zero_pages(void)
|
void __init arch_setup_zero_pages(void)
|
||||||
{
|
{
|
||||||
unsigned long total_pages = memblock_estimated_nr_free_pages();
|
unsigned long total_pages = memblock_estimated_nr_free_pages();
|
||||||
unsigned int order;
|
unsigned int order;
|
||||||
@@ -159,8 +159,6 @@ void __init arch_mm_preinit(void)
|
|||||||
cpumask_set_cpu(0, mm_cpumask(&init_mm));
|
cpumask_set_cpu(0, mm_cpumask(&init_mm));
|
||||||
|
|
||||||
pv_init();
|
pv_init();
|
||||||
|
|
||||||
setup_zero_pages(); /* Setup zeroed pages. */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long memory_block_size_bytes(void)
|
unsigned long memory_block_size_bytes(void)
|
||||||
|
|||||||
@@ -20,14 +20,6 @@
|
|||||||
#ifndef __ASSEMBLER__
|
#ifndef __ASSEMBLER__
|
||||||
#include <asm/addrspace.h>
|
#include <asm/addrspace.h>
|
||||||
#include <asm/fixmap.h>
|
#include <asm/fixmap.h>
|
||||||
|
|
||||||
/*
|
|
||||||
* ZERO_PAGE is a global shared page that is always zero: used
|
|
||||||
* for zero-mapped memory areas etc..
|
|
||||||
*/
|
|
||||||
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
|
|
||||||
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
||||||
|
|
||||||
#endif /* !__ASSEMBLER__ */
|
#endif /* !__ASSEMBLER__ */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user