kernel: 6.1: Synchronize MGLRU patches with upstream

Replace the refreshed 5.15 backports with backports for 6.1.

This fixes FMODE_CAN_ODIRECT having the same value as
FMODE_NOREUSE.

Signed-off-by: Kazuki Hashimoto <kazukih0205@gmail.com>
This commit is contained in:
Kazuki Hashimoto 2023-05-24 23:56:36 +09:00 committed by Christian Marangi
parent 0063e71d66
commit b28b8ed1f4
No known key found for this signature in database
GPG Key ID: AC001D09ADBFEAD7
18 changed files with 1505 additions and 165 deletions

View File

@ -1,7 +1,7 @@
From 348fdbada9fb3f0bf1a53651be46319105af187f Mon Sep 17 00:00:00 2001 From 8c20e2eb5f2a0175b774134685e4d7bd93e85ff8 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com> From: Yu Zhao <yuzhao@google.com>
Date: Wed, 21 Dec 2022 21:18:59 -0700 Date: Wed, 21 Dec 2022 21:18:59 -0700
Subject: [PATCH 21/29] mm: multi-gen LRU: rename lru_gen_struct to Subject: [PATCH 01/19] UPSTREAM: mm: multi-gen LRU: rename lru_gen_struct to
lru_gen_folio lru_gen_folio
Patch series "mm: multi-gen LRU: memcg LRU", v3. Patch series "mm: multi-gen LRU: memcg LRU", v3.
@ -115,6 +115,10 @@ Cc: Mike Rapoport <rppt@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com> Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Bug: 274865848
(cherry picked from commit 391655fe08d1f942359a11148aa9aaf3f99d6d6f)
Change-Id: I7df67e0e2435ba28f10eaa57d28d98b61a9210a6
Signed-off-by: T.J. Mercier <tjmercier@google.com>
--- ---
include/linux/mm_inline.h | 4 ++-- include/linux/mm_inline.h | 4 ++--
include/linux/mmzone.h | 6 +++--- include/linux/mmzone.h | 6 +++---
@ -122,9 +126,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/workingset.c | 4 ++-- mm/workingset.c | 4 ++--
4 files changed, 24 insertions(+), 24 deletions(-) 4 files changed, 24 insertions(+), 24 deletions(-)
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index e8ed225d8f7ca..f63968bd7de59 100644
--- a/include/linux/mm_inline.h --- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h
@@ -178,7 +178,7 @@ static inline void lru_gen_update_size(s @@ -178,7 +178,7 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *foli
int zone = folio_zonenum(folio); int zone = folio_zonenum(folio);
int delta = folio_nr_pages(folio); int delta = folio_nr_pages(folio);
enum lru_list lru = type * LRU_INACTIVE_FILE; enum lru_list lru = type * LRU_INACTIVE_FILE;
@ -133,7 +139,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS); VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS);
VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS); VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS);
@@ -224,7 +224,7 @@ static inline bool lru_gen_add_folio(str @@ -224,7 +224,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
int gen = folio_lru_gen(folio); int gen = folio_lru_gen(folio);
int type = folio_is_file_lru(folio); int type = folio_is_file_lru(folio);
int zone = folio_zonenum(folio); int zone = folio_zonenum(folio);
@ -142,6 +148,8 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
VM_WARN_ON_ONCE_FOLIO(gen != -1, folio); VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 5f74891556f33..bd3e4689f72dc 100644
--- a/include/linux/mmzone.h --- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h +++ b/include/linux/mmzone.h
@@ -404,7 +404,7 @@ enum { @@ -404,7 +404,7 @@ enum {
@ -171,9 +179,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
/* to concurrently iterate lru_gen_mm_list */ /* to concurrently iterate lru_gen_mm_list */
struct lru_gen_mm_state mm_state; struct lru_gen_mm_state mm_state;
#endif #endif
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d18296109aa7e..27142caf284c1 100644
--- a/mm/vmscan.c --- a/mm/vmscan.c
+++ b/mm/vmscan.c +++ b/mm/vmscan.c
@@ -3190,7 +3190,7 @@ static int get_nr_gens(struct lruvec *lr @@ -3190,7 +3190,7 @@ static int get_nr_gens(struct lruvec *lruvec, int type)
static bool __maybe_unused seq_is_valid(struct lruvec *lruvec) static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
{ {
@ -191,7 +201,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
int hist = lru_hist_from_seq(lrugen->min_seq[type]); int hist = lru_hist_from_seq(lrugen->min_seq[type]);
pos->refaulted = lrugen->avg_refaulted[type][tier] + pos->refaulted = lrugen->avg_refaulted[type][tier] +
@@ -3611,7 +3611,7 @@ static void read_ctrl_pos(struct lruvec @@ -3611,7 +3611,7 @@ static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain,
static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover) static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover)
{ {
int hist, tier; int hist, tier;
@ -200,7 +210,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1; bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1;
unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1; unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1;
@@ -3688,7 +3688,7 @@ static int folio_update_gen(struct folio @@ -3688,7 +3688,7 @@ static int folio_update_gen(struct folio *folio, int gen)
static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming) static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
{ {
int type = folio_is_file_lru(folio); int type = folio_is_file_lru(folio);
@ -209,7 +219,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
unsigned long new_flags, old_flags = READ_ONCE(folio->flags); unsigned long new_flags, old_flags = READ_ONCE(folio->flags);
@@ -3733,7 +3733,7 @@ static void update_batch_size(struct lru @@ -3733,7 +3733,7 @@ static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio,
static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk) static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk)
{ {
int gen, type, zone; int gen, type, zone;
@ -218,7 +228,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
walk->batched = 0; walk->batched = 0;
@@ -4253,7 +4253,7 @@ static bool inc_min_seq(struct lruvec *l @@ -4250,7 +4250,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
{ {
int zone; int zone;
int remaining = MAX_LRU_BATCH; int remaining = MAX_LRU_BATCH;
@ -227,7 +237,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
if (type == LRU_GEN_ANON && !can_swap) if (type == LRU_GEN_ANON && !can_swap)
@@ -4289,7 +4289,7 @@ static bool try_to_inc_min_seq(struct lr @@ -4286,7 +4286,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
{ {
int gen, type, zone; int gen, type, zone;
bool success = false; bool success = false;
@ -236,7 +246,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
DEFINE_MIN_SEQ(lruvec); DEFINE_MIN_SEQ(lruvec);
VM_WARN_ON_ONCE(!seq_is_valid(lruvec)); VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
@@ -4310,7 +4310,7 @@ next: @@ -4307,7 +4307,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
; ;
} }
@ -245,7 +255,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
if (can_swap) { if (can_swap) {
min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]); min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]);
min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]); min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]);
@@ -4332,7 +4332,7 @@ static void inc_max_seq(struct lruvec *l @@ -4329,7 +4329,7 @@ static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan)
{ {
int prev, next; int prev, next;
int type, zone; int type, zone;
@ -254,7 +264,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
spin_lock_irq(&lruvec->lru_lock); spin_lock_irq(&lruvec->lru_lock);
@@ -4390,7 +4390,7 @@ static bool try_to_inc_max_seq(struct lr @@ -4387,7 +4387,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
bool success; bool success;
struct lru_gen_mm_walk *walk; struct lru_gen_mm_walk *walk;
struct mm_struct *mm = NULL; struct mm_struct *mm = NULL;
@ -263,7 +273,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq)); VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq));
@@ -4455,7 +4455,7 @@ static bool should_run_aging(struct lruv @@ -4452,7 +4452,7 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
unsigned long old = 0; unsigned long old = 0;
unsigned long young = 0; unsigned long young = 0;
unsigned long total = 0; unsigned long total = 0;
@ -272,7 +282,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec);
for (type = !can_swap; type < ANON_AND_FILE; type++) { for (type = !can_swap; type < ANON_AND_FILE; type++) {
@@ -4740,7 +4740,7 @@ static bool sort_folio(struct lruvec *lr @@ -4737,7 +4737,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx)
int delta = folio_nr_pages(folio); int delta = folio_nr_pages(folio);
int refs = folio_lru_refs(folio); int refs = folio_lru_refs(folio);
int tier = lru_tier_from_refs(refs); int tier = lru_tier_from_refs(refs);
@ -281,7 +291,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio); VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio);
@@ -4840,7 +4840,7 @@ static int scan_folios(struct lruvec *lr @@ -4837,7 +4837,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
int scanned = 0; int scanned = 0;
int isolated = 0; int isolated = 0;
int remaining = MAX_LRU_BATCH; int remaining = MAX_LRU_BATCH;
@ -290,7 +300,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec);
VM_WARN_ON_ONCE(!list_empty(list)); VM_WARN_ON_ONCE(!list_empty(list));
@@ -5240,7 +5240,7 @@ done: @@ -5237,7 +5237,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
static bool __maybe_unused state_is_valid(struct lruvec *lruvec) static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
{ {
@ -299,7 +309,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
if (lrugen->enabled) { if (lrugen->enabled) {
enum lru_list lru; enum lru_list lru;
@@ -5522,7 +5522,7 @@ static void lru_gen_seq_show_full(struct @@ -5519,7 +5519,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
int i; int i;
int type, tier; int type, tier;
int hist = lru_hist_from_seq(seq); int hist = lru_hist_from_seq(seq);
@ -308,7 +318,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
for (tier = 0; tier < MAX_NR_TIERS; tier++) { for (tier = 0; tier < MAX_NR_TIERS; tier++) {
seq_printf(m, " %10d", tier); seq_printf(m, " %10d", tier);
@@ -5572,7 +5572,7 @@ static int lru_gen_seq_show(struct seq_f @@ -5569,7 +5569,7 @@ static int lru_gen_seq_show(struct seq_file *m, void *v)
unsigned long seq; unsigned long seq;
bool full = !debugfs_real_fops(m->file)->write; bool full = !debugfs_real_fops(m->file)->write;
struct lruvec *lruvec = v; struct lruvec *lruvec = v;
@ -317,7 +327,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
int nid = lruvec_pgdat(lruvec)->node_id; int nid = lruvec_pgdat(lruvec)->node_id;
struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MAX_SEQ(lruvec); DEFINE_MAX_SEQ(lruvec);
@@ -5826,7 +5826,7 @@ void lru_gen_init_lruvec(struct lruvec * @@ -5823,7 +5823,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
{ {
int i; int i;
int gen, type, zone; int gen, type, zone;
@ -326,9 +336,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
lrugen->max_seq = MIN_NR_GENS + 1; lrugen->max_seq = MIN_NR_GENS + 1;
lrugen->enabled = lru_gen_enabled(); lrugen->enabled = lru_gen_enabled();
diff --git a/mm/workingset.c b/mm/workingset.c
index ae7e984b23c6b..688aaa73f64e8 100644
--- a/mm/workingset.c --- a/mm/workingset.c
+++ b/mm/workingset.c +++ b/mm/workingset.c
@@ -223,7 +223,7 @@ static void *lru_gen_eviction(struct fol @@ -223,7 +223,7 @@ static void *lru_gen_eviction(struct folio *folio)
unsigned long token; unsigned long token;
unsigned long min_seq; unsigned long min_seq;
struct lruvec *lruvec; struct lruvec *lruvec;
@ -337,7 +349,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
int type = folio_is_file_lru(folio); int type = folio_is_file_lru(folio);
int delta = folio_nr_pages(folio); int delta = folio_nr_pages(folio);
int refs = folio_lru_refs(folio); int refs = folio_lru_refs(folio);
@@ -252,7 +252,7 @@ static void lru_gen_refault(struct folio @@ -252,7 +252,7 @@ static void lru_gen_refault(struct folio *folio, void *shadow)
unsigned long token; unsigned long token;
unsigned long min_seq; unsigned long min_seq;
struct lruvec *lruvec; struct lruvec *lruvec;
@ -346,3 +358,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
struct pglist_data *pgdat; struct pglist_data *pgdat;
int type = folio_is_file_lru(folio); int type = folio_is_file_lru(folio);
--
2.40.1

View File

@ -1,7 +1,7 @@
From afd37e73db04c7e6b47411120ac5f6a7eca51fec Mon Sep 17 00:00:00 2001 From 656287d55d9cfc72a4bcd4d9bd098570f12ce409 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com> From: Yu Zhao <yuzhao@google.com>
Date: Wed, 21 Dec 2022 21:19:00 -0700 Date: Wed, 21 Dec 2022 21:19:00 -0700
Subject: [PATCH 22/29] mm: multi-gen LRU: rename lrugen->lists[] to Subject: [PATCH 02/19] UPSTREAM: mm: multi-gen LRU: rename lrugen->lists[] to
lrugen->folios[] lrugen->folios[]
lru_gen_folio will be chained into per-node lists by the coming lru_gen_folio will be chained into per-node lists by the coming
@ -17,15 +17,54 @@ Cc: Mike Rapoport <rppt@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com> Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Bug: 274865848
(cherry picked from commit 6df1b2212950aae2b2188c6645ea18e2a9e3fdd5)
Change-Id: I09f53e0fb2cd6b8b3adbb8a80b15dc5efbeae857
Signed-off-by: T.J. Mercier <tjmercier@google.com>
--- ---
include/linux/mm_inline.h | 4 ++-- Documentation/mm/multigen_lru.rst | 8 ++++----
include/linux/mmzone.h | 8 ++++---- include/linux/mm_inline.h | 4 ++--
mm/vmscan.c | 20 ++++++++++---------- include/linux/mmzone.h | 8 ++++----
3 files changed, 16 insertions(+), 16 deletions(-) mm/vmscan.c | 20 ++++++++++----------
4 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst
index d7062c6a89464..d8f721f98868a 100644
--- a/Documentation/mm/multigen_lru.rst
+++ b/Documentation/mm/multigen_lru.rst
@@ -89,15 +89,15 @@ variables are monotonically increasing.
Generation numbers are truncated into ``order_base_2(MAX_NR_GENS+1)``
bits in order to fit into the gen counter in ``folio->flags``. Each
-truncated generation number is an index to ``lrugen->lists[]``. The
+truncated generation number is an index to ``lrugen->folios[]``. The
sliding window technique is used to track at least ``MIN_NR_GENS`` and
at most ``MAX_NR_GENS`` generations. The gen counter stores a value
within ``[1, MAX_NR_GENS]`` while a page is on one of
-``lrugen->lists[]``; otherwise it stores zero.
+``lrugen->folios[]``; otherwise it stores zero.
Each generation is divided into multiple tiers. A page accessed ``N``
times through file descriptors is in tier ``order_base_2(N)``. Unlike
-generations, tiers do not have dedicated ``lrugen->lists[]``. In
+generations, tiers do not have dedicated ``lrugen->folios[]``. In
contrast to moving across generations, which requires the LRU lock,
moving across tiers only involves atomic operations on
``folio->flags`` and therefore has a negligible cost. A feedback loop
@@ -127,7 +127,7 @@ page mapped by this PTE to ``(max_seq%MAX_NR_GENS)+1``.
Eviction
--------
The eviction consumes old generations. Given an ``lruvec``, it
-increments ``min_seq`` when ``lrugen->lists[]`` indexed by
+increments ``min_seq`` when ``lrugen->folios[]`` indexed by
``min_seq%MAX_NR_GENS`` becomes empty. To select a type and a tier to
evict from, it first compares ``min_seq[]`` to select the older type.
If both types are equally old, it selects the one whose first tier has
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index f63968bd7de59..da38e3d962e2f 100644
--- a/include/linux/mm_inline.h --- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h
@@ -256,9 +256,9 @@ static inline bool lru_gen_add_folio(str @@ -256,9 +256,9 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
lru_gen_update_size(lruvec, folio, -1, gen); lru_gen_update_size(lruvec, folio, -1, gen);
/* for folio_rotate_reclaimable() */ /* for folio_rotate_reclaimable() */
if (reclaiming) if (reclaiming)
@ -37,6 +76,8 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
return true; return true;
} }
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bd3e4689f72dc..02e4323744715 100644
--- a/include/linux/mmzone.h --- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h +++ b/include/linux/mmzone.h
@@ -312,7 +312,7 @@ enum lruvec_flags { @@ -312,7 +312,7 @@ enum lruvec_flags {
@ -68,9 +109,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
/* the multi-gen LRU sizes, eventually consistent */ /* the multi-gen LRU sizes, eventually consistent */
long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
/* the exponential moving average of refaulted */ /* the exponential moving average of refaulted */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 27142caf284c1..b02fed912f742 100644
--- a/mm/vmscan.c --- a/mm/vmscan.c
+++ b/mm/vmscan.c +++ b/mm/vmscan.c
@@ -4261,7 +4261,7 @@ static bool inc_min_seq(struct lruvec *l @@ -4258,7 +4258,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
/* prevent cold/hot inversion if force_scan is true */ /* prevent cold/hot inversion if force_scan is true */
for (zone = 0; zone < MAX_NR_ZONES; zone++) { for (zone = 0; zone < MAX_NR_ZONES; zone++) {
@ -79,7 +122,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
while (!list_empty(head)) { while (!list_empty(head)) {
struct folio *folio = lru_to_folio(head); struct folio *folio = lru_to_folio(head);
@@ -4272,7 +4272,7 @@ static bool inc_min_seq(struct lruvec *l @@ -4269,7 +4269,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio); VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
new_gen = folio_inc_gen(lruvec, folio, false); new_gen = folio_inc_gen(lruvec, folio, false);
@ -88,7 +131,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
if (!--remaining) if (!--remaining)
return false; return false;
@@ -4300,7 +4300,7 @@ static bool try_to_inc_min_seq(struct lr @@ -4297,7 +4297,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
gen = lru_gen_from_seq(min_seq[type]); gen = lru_gen_from_seq(min_seq[type]);
for (zone = 0; zone < MAX_NR_ZONES; zone++) { for (zone = 0; zone < MAX_NR_ZONES; zone++) {
@ -97,7 +140,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
goto next; goto next;
} }
@@ -4765,7 +4765,7 @@ static bool sort_folio(struct lruvec *lr @@ -4762,7 +4762,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx)
/* promoted */ /* promoted */
if (gen != lru_gen_from_seq(lrugen->min_seq[type])) { if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
@ -106,7 +149,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
return true; return true;
} }
@@ -4774,7 +4774,7 @@ static bool sort_folio(struct lruvec *lr @@ -4771,7 +4771,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx)
int hist = lru_hist_from_seq(lrugen->min_seq[type]); int hist = lru_hist_from_seq(lrugen->min_seq[type]);
gen = folio_inc_gen(lruvec, folio, false); gen = folio_inc_gen(lruvec, folio, false);
@ -115,7 +158,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
WRITE_ONCE(lrugen->protected[hist][type][tier - 1], WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
lrugen->protected[hist][type][tier - 1] + delta); lrugen->protected[hist][type][tier - 1] + delta);
@@ -4786,7 +4786,7 @@ static bool sort_folio(struct lruvec *lr @@ -4783,7 +4783,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx)
if (folio_test_locked(folio) || folio_test_writeback(folio) || if (folio_test_locked(folio) || folio_test_writeback(folio) ||
(type == LRU_GEN_FILE && folio_test_dirty(folio))) { (type == LRU_GEN_FILE && folio_test_dirty(folio))) {
gen = folio_inc_gen(lruvec, folio, true); gen = folio_inc_gen(lruvec, folio, true);
@ -124,7 +167,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
return true; return true;
} }
@@ -4853,7 +4853,7 @@ static int scan_folios(struct lruvec *lr @@ -4850,7 +4850,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
for (zone = sc->reclaim_idx; zone >= 0; zone--) { for (zone = sc->reclaim_idx; zone >= 0; zone--) {
LIST_HEAD(moved); LIST_HEAD(moved);
int skipped = 0; int skipped = 0;
@ -133,7 +176,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
while (!list_empty(head)) { while (!list_empty(head)) {
struct folio *folio = lru_to_folio(head); struct folio *folio = lru_to_folio(head);
@@ -5253,7 +5253,7 @@ static bool __maybe_unused state_is_vali @@ -5250,7 +5250,7 @@ static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
int gen, type, zone; int gen, type, zone;
for_each_gen_type_zone(gen, type, zone) { for_each_gen_type_zone(gen, type, zone) {
@ -142,7 +185,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
return false; return false;
} }
} }
@@ -5298,7 +5298,7 @@ static bool drain_evictable(struct lruve @@ -5295,7 +5295,7 @@ static bool drain_evictable(struct lruvec *lruvec)
int remaining = MAX_LRU_BATCH; int remaining = MAX_LRU_BATCH;
for_each_gen_type_zone(gen, type, zone) { for_each_gen_type_zone(gen, type, zone) {
@ -151,7 +194,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
while (!list_empty(head)) { while (!list_empty(head)) {
bool success; bool success;
@@ -5835,7 +5835,7 @@ void lru_gen_init_lruvec(struct lruvec * @@ -5832,7 +5832,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
lrugen->timestamps[i] = jiffies; lrugen->timestamps[i] = jiffies;
for_each_gen_type_zone(gen, type, zone) for_each_gen_type_zone(gen, type, zone)
@ -160,3 +203,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
lruvec->mm_state.seq = MIN_NR_GENS; lruvec->mm_state.seq = MIN_NR_GENS;
init_waitqueue_head(&lruvec->mm_state.wait); init_waitqueue_head(&lruvec->mm_state.wait);
--
2.40.1

View File

@ -1,7 +1,8 @@
From ce45f1c4b32cf69b166f56ef5bc6c761e06ed4e5 Mon Sep 17 00:00:00 2001 From 14f9a7a15f3d1af351f30e0438fd747b7ac253b0 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com> From: Yu Zhao <yuzhao@google.com>
Date: Wed, 21 Dec 2022 21:19:01 -0700 Date: Wed, 21 Dec 2022 21:19:01 -0700
Subject: [PATCH 23/29] mm: multi-gen LRU: remove eviction fairness safeguard Subject: [PATCH 03/19] UPSTREAM: mm: multi-gen LRU: remove eviction fairness
safeguard
Recall that the eviction consumes the oldest generation: first it Recall that the eviction consumes the oldest generation: first it
bucket-sorts folios whose gen counters were updated by the aging and bucket-sorts folios whose gen counters were updated by the aging and
@ -31,13 +32,19 @@ Cc: Mike Rapoport <rppt@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com> Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Bug: 274865848
(cherry picked from commit a579086c99ed70cc4bfc104348dbe3dd8f2787e6)
Change-Id: I08ac1b3c90e29cafd0566785aaa4bcdb5db7d22c
Signed-off-by: T.J. Mercier <tjmercier@google.com>
--- ---
mm/vmscan.c | 82 +++++++++++++++-------------------------------------- mm/vmscan.c | 81 +++++++++++++++--------------------------------------
1 file changed, 23 insertions(+), 59 deletions(-) 1 file changed, 23 insertions(+), 58 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b02fed912f742..991961180b320 100644
--- a/mm/vmscan.c --- a/mm/vmscan.c
+++ b/mm/vmscan.c +++ b/mm/vmscan.c
@@ -448,6 +448,11 @@ static bool cgroup_reclaim(struct scan_c @@ -448,6 +448,11 @@ static bool cgroup_reclaim(struct scan_control *sc)
return sc->target_mem_cgroup; return sc->target_mem_cgroup;
} }
@ -49,7 +56,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
/** /**
* writeback_throttling_sane - is the usual dirty throttling mechanism available? * writeback_throttling_sane - is the usual dirty throttling mechanism available?
* @sc: scan_control in question * @sc: scan_control in question
@@ -498,6 +503,11 @@ static bool cgroup_reclaim(struct scan_c @@ -498,6 +503,11 @@ static bool cgroup_reclaim(struct scan_control *sc)
return false; return false;
} }
@ -61,7 +68,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
static bool writeback_throttling_sane(struct scan_control *sc) static bool writeback_throttling_sane(struct scan_control *sc)
{ {
return true; return true;
@@ -4996,8 +5006,7 @@ static int isolate_folios(struct lruvec @@ -4993,8 +5003,7 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw
return scanned; return scanned;
} }
@ -71,7 +78,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
{ {
int type; int type;
int scanned; int scanned;
@@ -5086,9 +5095,6 @@ retry: @@ -5083,9 +5092,6 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
goto retry; goto retry;
} }
@ -81,7 +88,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
return scanned; return scanned;
} }
@@ -5127,67 +5133,26 @@ done: @@ -5124,67 +5130,26 @@ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *
return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0; return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
} }
@ -158,7 +165,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
lru_add_drain(); lru_add_drain();
@@ -5211,7 +5176,7 @@ static void lru_gen_shrink_lruvec(struct @@ -5208,7 +5173,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
if (!nr_to_scan) if (!nr_to_scan)
goto done; goto done;
@ -167,7 +174,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
if (!delta) if (!delta)
goto done; goto done;
@@ -5219,7 +5184,7 @@ static void lru_gen_shrink_lruvec(struct @@ -5216,7 +5181,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
if (scanned >= nr_to_scan) if (scanned >= nr_to_scan)
break; break;
@ -176,7 +183,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
break; break;
cond_resched(); cond_resched();
@@ -5669,7 +5634,7 @@ static int run_eviction(struct lruvec *l @@ -5666,7 +5631,7 @@ static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_co
if (sc->nr_reclaimed >= nr_to_reclaim) if (sc->nr_reclaimed >= nr_to_reclaim)
return 0; return 0;
@ -185,3 +192,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
return 0; return 0;
cond_resched(); cond_resched();
--
2.40.1

View File

@ -1,7 +1,8 @@
From e20b7386fccc18c791796eb1dc1a91eee3ccf801 Mon Sep 17 00:00:00 2001 From f3c93d2e37a3c56593d7ccf4f4bcf1b58426fdd8 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com> From: Yu Zhao <yuzhao@google.com>
Date: Wed, 21 Dec 2022 21:19:02 -0700 Date: Wed, 21 Dec 2022 21:19:02 -0700
Subject: [PATCH 24/29] mm: multi-gen LRU: remove aging fairness safeguard Subject: [PATCH 04/19] BACKPORT: mm: multi-gen LRU: remove aging fairness
safeguard
Recall that the aging produces the youngest generation: first it scans Recall that the aging produces the youngest generation: first it scans
for accessed folios and updates their gen counters; then it increments for accessed folios and updates their gen counters; then it increments
@ -31,10 +32,18 @@ Cc: Mike Rapoport <rppt@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com> Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Bug: 274865848
(cherry picked from commit 7348cc91821b0cb24dfb00e578047f68299a50ab)
[TJ: Resolved conflicts with older function signatures for
min_cgroup_below_min / min_cgroup_below_low]
Change-Id: I6e36ecfbaaefbc0a56d9a9d5d7cbe404ed7f57a5
Signed-off-by: T.J. Mercier <tjmercier@google.com>
--- ---
mm/vmscan.c | 126 ++++++++++++++++++++++++---------------------------- mm/vmscan.c | 126 ++++++++++++++++++++++++----------------------------
1 file changed, 59 insertions(+), 67 deletions(-) 1 file changed, 59 insertions(+), 67 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 991961180b320..5a2e83e673232 100644
--- a/mm/vmscan.c --- a/mm/vmscan.c
+++ b/mm/vmscan.c +++ b/mm/vmscan.c
@@ -136,7 +136,6 @@ struct scan_control { @@ -136,7 +136,6 @@ struct scan_control {
@ -45,7 +54,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
unsigned long last_reclaimed; unsigned long last_reclaimed;
#endif #endif
@@ -4458,7 +4457,7 @@ done: @@ -4455,7 +4454,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
return true; return true;
} }
@ -54,7 +63,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan) struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan)
{ {
int gen, type, zone; int gen, type, zone;
@@ -4467,6 +4466,13 @@ static bool should_run_aging(struct lruv @@ -4464,6 +4463,13 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
unsigned long total = 0; unsigned long total = 0;
struct lru_gen_folio *lrugen = &lruvec->lrugen; struct lru_gen_folio *lrugen = &lruvec->lrugen;
struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec);
@ -68,7 +77,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
for (type = !can_swap; type < ANON_AND_FILE; type++) { for (type = !can_swap; type < ANON_AND_FILE; type++) {
unsigned long seq; unsigned long seq;
@@ -4495,8 +4501,6 @@ static bool should_run_aging(struct lruv @@ -4492,8 +4498,6 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
* stalls when the number of generations reaches MIN_NR_GENS. Hence, the * stalls when the number of generations reaches MIN_NR_GENS. Hence, the
* ideal number of generations is MIN_NR_GENS+1. * ideal number of generations is MIN_NR_GENS+1.
*/ */
@ -77,7 +86,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
if (min_seq[!can_swap] + MIN_NR_GENS < max_seq) if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
return false; return false;
@@ -4515,40 +4519,54 @@ static bool should_run_aging(struct lruv @@ -4512,40 +4516,54 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
return false; return false;
} }
@ -153,7 +162,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
} }
/* to protect the working set of the last N jiffies */ /* to protect the working set of the last N jiffies */
@@ -4557,46 +4575,32 @@ static unsigned long lru_gen_min_ttl __r @@ -4554,46 +4572,32 @@ static unsigned long lru_gen_min_ttl __read_mostly;
static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
{ {
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
@ -207,7 +216,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
*/ */
if (mutex_trylock(&oom_lock)) { if (mutex_trylock(&oom_lock)) {
struct oom_control oc = { struct oom_control oc = {
@@ -5104,33 +5108,27 @@ retry: @@ -5101,33 +5105,27 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
* reclaim. * reclaim.
*/ */
static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
@ -247,7 +256,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
} }
static unsigned long get_nr_to_reclaim(struct scan_control *sc) static unsigned long get_nr_to_reclaim(struct scan_control *sc)
@@ -5149,9 +5147,7 @@ static unsigned long get_nr_to_reclaim(s @@ -5146,9 +5144,7 @@ static unsigned long get_nr_to_reclaim(struct scan_control *sc)
static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
{ {
struct blk_plug plug; struct blk_plug plug;
@ -257,7 +266,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
lru_add_drain(); lru_add_drain();
@@ -5172,13 +5168,13 @@ static void lru_gen_shrink_lruvec(struct @@ -5169,13 +5165,13 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
else else
swappiness = 0; swappiness = 0;
@ -274,7 +283,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
scanned += delta; scanned += delta;
if (scanned >= nr_to_scan) if (scanned >= nr_to_scan)
@@ -5190,10 +5186,6 @@ static void lru_gen_shrink_lruvec(struct @@ -5187,10 +5183,6 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
cond_resched(); cond_resched();
} }
@ -285,3 +294,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
clear_mm_walk(); clear_mm_walk();
blk_finish_plug(&plug); blk_finish_plug(&plug);
--
2.40.1

View File

@ -1,12 +1,11 @@
From 107d54931df3c28d81648122e219bf0034ef4e99 Mon Sep 17 00:00:00 2001 From eca3858631e0cbad2ca6e40f788892749428e4cb Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com> From: Yu Zhao <yuzhao@google.com>
Date: Wed, 21 Dec 2022 21:19:03 -0700 Date: Wed, 21 Dec 2022 21:19:03 -0700
Subject: [PATCH 25/29] mm: multi-gen LRU: shuffle should_run_aging() Subject: [PATCH 05/19] UPSTREAM: mm: multi-gen LRU: shuffle should_run_aging()
Move should_run_aging() next to its only caller left. Move should_run_aging() next to its only caller left.
Link: https://lkml.kernel.org/r/20221222041905.2431096-6-yuzhao@google.com Link: https://lkml.kernel.org/r/20221222041905.2431096-6-yuzhao@google.com
Signed-off-by: Yu Zhao <yuzhao@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net> Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Larabel <Michael@MichaelLarabel.com> Cc: Michael Larabel <Michael@MichaelLarabel.com>
@ -15,13 +14,21 @@ Cc: Mike Rapoport <rppt@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com> Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Bug: 274865848
(cherry picked from commit 77d4459a4a1a472b7309e475f962dda87d950abd)
Signed-off-by: T.J. Mercier <tjmercier@google.com>
Change-Id: I3b0383fe16b93a783b4d8c0b3a0b325160392576
Signed-off-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: T.J. Mercier <tjmercier@google.com>
--- ---
mm/vmscan.c | 124 ++++++++++++++++++++++++++-------------------------- mm/vmscan.c | 124 ++++++++++++++++++++++++++--------------------------
1 file changed, 62 insertions(+), 62 deletions(-) 1 file changed, 62 insertions(+), 62 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5a2e83e673232..0c47952714b26 100644
--- a/mm/vmscan.c --- a/mm/vmscan.c
+++ b/mm/vmscan.c +++ b/mm/vmscan.c
@@ -4457,68 +4457,6 @@ done: @@ -4454,68 +4454,6 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
return true; return true;
} }
@ -90,7 +97,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc) static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
{ {
int gen, type, zone; int gen, type, zone;
@@ -5102,6 +5040,68 @@ retry: @@ -5099,6 +5037,68 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
return scanned; return scanned;
} }
@ -159,3 +166,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
/* /*
* For future optimizations: * For future optimizations:
* 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg * 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg
--
2.40.1

View File

@ -1,7 +1,8 @@
From fa6363828d314e837c5f79e97ea5e8c0d2f7f062 Mon Sep 17 00:00:00 2001 From 8ee8571e47aa75221e5fbd4c9c7802fc4244c346 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com> From: Yu Zhao <yuzhao@google.com>
Date: Wed, 21 Dec 2022 21:19:04 -0700 Date: Wed, 21 Dec 2022 21:19:04 -0700
Subject: [PATCH 26/29] mm: multi-gen LRU: per-node lru_gen_folio lists Subject: [PATCH 06/19] BACKPORT: mm: multi-gen LRU: per-node lru_gen_folio
lists
For each node, memcgs are divided into two generations: the old and For each node, memcgs are divided into two generations: the old and
the young. For each generation, memcgs are randomly sharded into the young. For each generation, memcgs are randomly sharded into
@ -58,18 +59,26 @@ Cc: Mike Rapoport <rppt@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com> Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Bug: 274865848
(cherry picked from commit e4dde56cd208674ce899b47589f263499e5b8cdc)
[TJ: Resolved conflicts with older function signatures for
min_cgroup_below_min / min_cgroup_below_low and includes]
Change-Id: Idc8a0f635e035d72dd911f807d1224cb47cbd655
Signed-off-by: T.J. Mercier <tjmercier@google.com>
--- ---
include/linux/memcontrol.h | 10 + include/linux/memcontrol.h | 10 +
include/linux/mm_inline.h | 17 ++ include/linux/mm_inline.h | 17 ++
include/linux/mmzone.h | 117 +++++++++++- include/linux/mmzone.h | 117 +++++++++++-
mm/memcontrol.c | 16 ++ mm/memcontrol.c | 16 ++
mm/folio_alloc.c | 1 + mm/page_alloc.c | 1 +
mm/vmscan.c | 373 +++++++++++++++++++++++++++++++++---- mm/vmscan.c | 374 +++++++++++++++++++++++++++++++++----
6 files changed, 499 insertions(+), 35 deletions(-) 6 files changed, 500 insertions(+), 35 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e039763029563..82d28b052a9e5 100644
--- a/include/linux/memcontrol.h --- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h
@@ -790,6 +790,11 @@ static inline void obj_cgroup_put(struct @@ -790,6 +790,11 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg)
percpu_ref_put(&objcg->refcnt); percpu_ref_put(&objcg->refcnt);
} }
@ -81,7 +90,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
static inline void mem_cgroup_put(struct mem_cgroup *memcg) static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{ {
if (memcg) if (memcg)
@@ -1290,6 +1295,11 @@ static inline void obj_cgroup_put(struct @@ -1290,6 +1295,11 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg)
{ {
} }
@ -93,9 +102,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
static inline void mem_cgroup_put(struct mem_cgroup *memcg) static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{ {
} }
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index da38e3d962e2f..c1fd3922dc5dd 100644
--- a/include/linux/mm_inline.h --- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h
@@ -122,6 +122,18 @@ static inline bool lru_gen_in_fault(void @@ -122,6 +122,18 @@ static inline bool lru_gen_in_fault(void)
return current->in_lru_fault; return current->in_lru_fault;
} }
@ -114,7 +125,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
static inline int lru_gen_from_seq(unsigned long seq) static inline int lru_gen_from_seq(unsigned long seq)
{ {
return seq % MAX_NR_GENS; return seq % MAX_NR_GENS;
@@ -297,6 +309,11 @@ static inline bool lru_gen_in_fault(void @@ -297,6 +309,11 @@ static inline bool lru_gen_in_fault(void)
return false; return false;
} }
@ -126,6 +137,8 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
{ {
return false; return false;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 02e4323744715..66e067a635682 100644
--- a/include/linux/mmzone.h --- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h +++ b/include/linux/mmzone.h
@@ -7,6 +7,7 @@ @@ -7,6 +7,7 @@
@ -167,7 +180,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
}; };
enum { enum {
@@ -479,12 +497,87 @@ void lru_gen_init_lruvec(struct lruvec * @@ -479,12 +497,87 @@ void lru_gen_init_lruvec(struct lruvec *lruvec);
void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
#ifdef CONFIG_MEMCG #ifdef CONFIG_MEMCG
@ -256,7 +269,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
static inline void lru_gen_init_lruvec(struct lruvec *lruvec) static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
{ {
} }
@@ -494,6 +587,7 @@ static inline void lru_gen_look_around(s @@ -494,6 +587,7 @@ static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
} }
#ifdef CONFIG_MEMCG #ifdef CONFIG_MEMCG
@ -264,7 +277,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
{ {
} }
@@ -501,7 +595,24 @@ static inline void lru_gen_init_memcg(st @@ -501,7 +595,24 @@ static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg)
{ {
} }
@ -299,9 +312,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
#endif #endif
CACHELINE_PADDING(_pad2_); CACHELINE_PADDING(_pad2_);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3e8f1ad0fe9db..7815d556e38cc 100644
--- a/mm/memcontrol.c --- a/mm/memcontrol.c
+++ b/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -477,6 +477,16 @@ static void mem_cgroup_update_tree(struc @@ -477,6 +477,16 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid)
struct mem_cgroup_per_node *mz; struct mem_cgroup_per_node *mz;
struct mem_cgroup_tree_per_node *mctz; struct mem_cgroup_tree_per_node *mctz;
@ -318,7 +333,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mctz = soft_limit_tree.rb_tree_per_node[nid]; mctz = soft_limit_tree.rb_tree_per_node[nid];
if (!mctz) if (!mctz)
return; return;
@@ -3522,6 +3532,9 @@ unsigned long mem_cgroup_soft_limit_recl @@ -3522,6 +3532,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
struct mem_cgroup_tree_per_node *mctz; struct mem_cgroup_tree_per_node *mctz;
unsigned long excess; unsigned long excess;
@ -328,7 +343,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
if (order > 0) if (order > 0)
return 0; return 0;
@@ -5382,6 +5395,7 @@ static int mem_cgroup_css_online(struct @@ -5382,6 +5395,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
if (unlikely(mem_cgroup_is_root(memcg))) if (unlikely(mem_cgroup_is_root(memcg)))
queue_delayed_work(system_unbound_wq, &stats_flush_dwork, queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
2UL*HZ); 2UL*HZ);
@ -336,7 +351,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
return 0; return 0;
offline_kmem: offline_kmem:
memcg_offline_kmem(memcg); memcg_offline_kmem(memcg);
@@ -5413,6 +5427,7 @@ static void mem_cgroup_css_offline(struc @@ -5413,6 +5427,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
memcg_offline_kmem(memcg); memcg_offline_kmem(memcg);
reparent_shrinker_deferred(memcg); reparent_shrinker_deferred(memcg);
wb_memcg_offline(memcg); wb_memcg_offline(memcg);
@ -344,7 +359,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
drain_all_stock(memcg); drain_all_stock(memcg);
@@ -5424,6 +5439,7 @@ static void mem_cgroup_css_released(stru @@ -5424,6 +5439,7 @@ static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup *memcg = mem_cgroup_from_css(css);
invalidate_reclaim_iterators(memcg); invalidate_reclaim_iterators(memcg);
@ -352,9 +367,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
} }
static void mem_cgroup_css_free(struct cgroup_subsys_state *css) static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 69668817fed37..473057b81a9df 100644
--- a/mm/page_alloc.c --- a/mm/page_alloc.c
+++ b/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -7957,6 +7957,7 @@ static void __init free_area_init_node(i @@ -7957,6 +7957,7 @@ static void __init free_area_init_node(int nid)
pgdat_set_deferred_range(pgdat); pgdat_set_deferred_range(pgdat);
free_area_init_core(pgdat); free_area_init_core(pgdat);
@ -362,6 +379,8 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
} }
static void __init free_area_init_memoryless_node(int nid) static void __init free_area_init_memoryless_node(int nid)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0c47952714b26..65eb28448f216 100644
--- a/mm/vmscan.c --- a/mm/vmscan.c
+++ b/mm/vmscan.c +++ b/mm/vmscan.c
@@ -54,6 +54,8 @@ @@ -54,6 +54,8 @@
@ -385,7 +404,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
/* Allocation order */ /* Allocation order */
s8 order; s8 order;
@@ -3160,6 +3157,9 @@ DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_ca @@ -3160,6 +3157,9 @@ DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS);
for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \ for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \
for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++) for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
@ -395,7 +414,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid) static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid)
{ {
struct pglist_data *pgdat = NODE_DATA(nid); struct pglist_data *pgdat = NODE_DATA(nid);
@@ -4443,8 +4443,7 @@ done: @@ -4440,8 +4440,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
if (sc->priority <= DEF_PRIORITY - 2) if (sc->priority <= DEF_PRIORITY - 2)
wait_event_killable(lruvec->mm_state.wait, wait_event_killable(lruvec->mm_state.wait,
max_seq < READ_ONCE(lrugen->max_seq)); max_seq < READ_ONCE(lrugen->max_seq));
@ -405,7 +424,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
} }
VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq)); VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq));
@@ -4517,8 +4516,6 @@ static void lru_gen_age_node(struct pgli @@ -4514,8 +4513,6 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
VM_WARN_ON_ONCE(!current_is_kswapd()); VM_WARN_ON_ONCE(!current_is_kswapd());
@ -414,7 +433,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
/* check the order to exclude compaction-induced reclaim */ /* check the order to exclude compaction-induced reclaim */
if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY) if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY)
return; return;
@@ -5107,8 +5104,7 @@ static bool should_run_aging(struct lruv @@ -5104,8 +5101,7 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
* 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg * 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg
* reclaim. * reclaim.
*/ */
@ -424,7 +443,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
{ {
unsigned long nr_to_scan; unsigned long nr_to_scan;
struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec);
@@ -5125,10 +5121,8 @@ static unsigned long get_nr_to_scan(stru @@ -5122,10 +5118,8 @@ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *
if (sc->priority == DEF_PRIORITY) if (sc->priority == DEF_PRIORITY)
return nr_to_scan; return nr_to_scan;
@ -436,7 +455,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
} }
static unsigned long get_nr_to_reclaim(struct scan_control *sc) static unsigned long get_nr_to_reclaim(struct scan_control *sc)
@@ -5137,29 +5131,18 @@ static unsigned long get_nr_to_reclaim(s @@ -5134,29 +5128,18 @@ static unsigned long get_nr_to_reclaim(struct scan_control *sc)
if (!global_reclaim(sc)) if (!global_reclaim(sc))
return -1; return -1;
@ -468,7 +487,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
if (sc->may_swap) if (sc->may_swap)
swappiness = get_swappiness(lruvec, sc); swappiness = get_swappiness(lruvec, sc);
@@ -5169,7 +5152,7 @@ static void lru_gen_shrink_lruvec(struct @@ -5166,7 +5149,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
swappiness = 0; swappiness = 0;
nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
@ -477,7 +496,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
break; break;
delta = evict_folios(lruvec, sc, swappiness); delta = evict_folios(lruvec, sc, swappiness);
@@ -5186,10 +5169,250 @@ static void lru_gen_shrink_lruvec(struct @@ -5183,11 +5166,252 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
cond_resched(); cond_resched();
} }
@ -515,8 +534,9 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+ +
+ shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority); + shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority);
+ +
+ vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned, + if (!sc->proactive)
+ sc->nr_reclaimed - reclaimed); + vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned,
+ sc->nr_reclaimed - reclaimed);
+ +
+ sc->nr_reclaimed += current->reclaim_state->reclaimed_slab; + sc->nr_reclaimed += current->reclaim_state->reclaimed_slab;
+ current->reclaim_state->reclaimed_slab = 0; + current->reclaim_state->reclaimed_slab = 0;
@ -538,7 +558,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+ struct mem_cgroup *memcg = NULL; + struct mem_cgroup *memcg = NULL;
+ unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); + unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
+ +
+ bin = first_bin = prandom_u32_max(MEMCG_NR_BINS); + bin = first_bin = get_random_u32_below(MEMCG_NR_BINS);
+restart: +restart:
+ gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq)); + gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq));
+ +
@ -601,11 +621,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+ if (try_to_shrink_lruvec(lruvec, sc)) + if (try_to_shrink_lruvec(lruvec, sc))
+ lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG); + lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG);
+ +
+ clear_mm_walk(); clear_mm_walk();
+
+ blk_finish_plug(&plug); blk_finish_plug(&plug);
+} }
+
+#else /* !CONFIG_MEMCG */ +#else /* !CONFIG_MEMCG */
+ +
+static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) +static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
@ -671,9 +691,9 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+ if (current_is_kswapd()) + if (current_is_kswapd())
+ sc->nr_reclaimed += reclaimed; + sc->nr_reclaimed += reclaimed;
+ +
clear_mm_walk(); + clear_mm_walk();
+
blk_finish_plug(&plug); + blk_finish_plug(&plug);
+ +
+ /* kswapd should never fail */ + /* kswapd should never fail */
+ pgdat->kswapd_failures = 0; + pgdat->kswapd_failures = 0;
@ -684,7 +704,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+{ +{
+ int seg; + int seg;
+ int old, new; + int old, new;
+ int bin = prandom_u32_max(MEMCG_NR_BINS); + int bin = get_random_u32_below(MEMCG_NR_BINS);
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec); + struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+ +
+ spin_lock(&pgdat->memcg_lru.lock); + spin_lock(&pgdat->memcg_lru.lock);
@ -723,12 +743,13 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+ WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); + WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
+ +
+ spin_unlock(&pgdat->memcg_lru.lock); + spin_unlock(&pgdat->memcg_lru.lock);
} +}
+#endif +#endif
+
/****************************************************************************** /******************************************************************************
* state change * state change
@@ -5647,11 +5870,11 @@ static int run_cmd(char cmd, int memcg_i ******************************************************************************/
@@ -5644,11 +5868,11 @@ static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq,
if (!mem_cgroup_disabled()) { if (!mem_cgroup_disabled()) {
rcu_read_lock(); rcu_read_lock();
@ -743,7 +764,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
rcu_read_unlock(); rcu_read_unlock();
if (!memcg) if (!memcg)
@@ -5799,6 +6022,19 @@ void lru_gen_init_lruvec(struct lruvec * @@ -5796,6 +6020,19 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
} }
#ifdef CONFIG_MEMCG #ifdef CONFIG_MEMCG
@ -763,7 +784,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
void lru_gen_init_memcg(struct mem_cgroup *memcg) void lru_gen_init_memcg(struct mem_cgroup *memcg)
{ {
INIT_LIST_HEAD(&memcg->mm_list.fifo); INIT_LIST_HEAD(&memcg->mm_list.fifo);
@@ -5822,7 +6058,69 @@ void lru_gen_exit_memcg(struct mem_cgrou @@ -5819,7 +6056,69 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg)
} }
} }
} }
@ -773,7 +794,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+{ +{
+ int gen; + int gen;
+ int nid; + int nid;
+ int bin = prandom_u32_max(MEMCG_NR_BINS); + int bin = get_random_u32_below(MEMCG_NR_BINS);
+ +
+ for_each_node(nid) { + for_each_node(nid) {
+ struct pglist_data *pgdat = NODE_DATA(nid); + struct pglist_data *pgdat = NODE_DATA(nid);
@ -834,7 +855,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
static int __init init_lru_gen(void) static int __init init_lru_gen(void)
{ {
@@ -5849,6 +6147,10 @@ static void lru_gen_shrink_lruvec(struct @@ -5846,6 +6145,10 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
{ {
} }
@ -845,7 +866,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
#endif /* CONFIG_LRU_GEN */ #endif /* CONFIG_LRU_GEN */
static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
@@ -5862,7 +6164,7 @@ static void shrink_lruvec(struct lruvec @@ -5859,7 +6162,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
bool proportional_reclaim; bool proportional_reclaim;
struct blk_plug plug; struct blk_plug plug;
@ -854,7 +875,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
lru_gen_shrink_lruvec(lruvec, sc); lru_gen_shrink_lruvec(lruvec, sc);
return; return;
} }
@@ -6105,6 +6407,11 @@ static void shrink_node(pg_data_t *pgdat @@ -6102,6 +6405,11 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
struct lruvec *target_lruvec; struct lruvec *target_lruvec;
bool reclaimable = false; bool reclaimable = false;
@ -866,3 +887,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
again: again:
--
2.40.1

View File

@ -1,7 +1,7 @@
From 93147736b5b3a21bea24313bfc7a696829932009 Mon Sep 17 00:00:00 2001 From 11b14ee8cbbbebd8204609076a9327a1171cd253 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com> From: Yu Zhao <yuzhao@google.com>
Date: Wed, 21 Dec 2022 21:19:05 -0700 Date: Wed, 21 Dec 2022 21:19:05 -0700
Subject: [PATCH 27/29] mm: multi-gen LRU: clarify scan_control flags Subject: [PATCH 07/19] BACKPORT: mm: multi-gen LRU: clarify scan_control flags
Among the flags in scan_control: Among the flags in scan_control:
1. sc->may_swap, which indicates swap constraint due to memsw.max, is 1. sc->may_swap, which indicates swap constraint due to memsw.max, is
@ -12,7 +12,7 @@ Among the flags in scan_control:
3. !(sc->gfp_mask & __GFP_IO), which indicates IO constraint, lowers 3. !(sc->gfp_mask & __GFP_IO), which indicates IO constraint, lowers
swappiness to prioritize file LRU, since clean file folios are more swappiness to prioritize file LRU, since clean file folios are more
likely to exist. likely to exist.
4. sc->may_writefolio and sc->may_unmap, which indicates opportunistic 4. sc->may_writepage and sc->may_unmap, which indicates opportunistic
reclaim, are rejected, since unmapped clean folios are already reclaim, are rejected, since unmapped clean folios are already
prioritized. Scanning for more of them is likely futile and can prioritized. Scanning for more of them is likely futile and can
cause high reclaim latency when there is a large number of memcgs. cause high reclaim latency when there is a large number of memcgs.
@ -29,13 +29,21 @@ Cc: Mike Rapoport <rppt@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com> Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Bug: 274865848
(cherry picked from commit e9d4e1ee788097484606c32122f146d802a9c5fb)
[TJ: Resolved conflict with older function signature for min_cgroup_below_min, and over
cdded861182142ac4488a4d64c571107aeb77f53 ("ANDROID: MGLRU: Don't skip anon reclaim if swap low")]
Change-Id: Ic2e779eaf4e91a3921831b4e2fa10c740dc59d50
Signed-off-by: T.J. Mercier <tjmercier@google.com>
--- ---
mm/vmscan.c | 55 +++++++++++++++++++++++++++-------------------------- mm/vmscan.c | 55 +++++++++++++++++++++++++++--------------------------
1 file changed, 28 insertions(+), 27 deletions(-) 1 file changed, 28 insertions(+), 27 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 65eb28448f216..0a0e1250ffc87 100644
--- a/mm/vmscan.c --- a/mm/vmscan.c
+++ b/mm/vmscan.c +++ b/mm/vmscan.c
@@ -3185,6 +3185,9 @@ static int get_swappiness(struct lruvec @@ -3185,6 +3185,9 @@ static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc)
struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct pglist_data *pgdat = lruvec_pgdat(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec);
@ -45,7 +53,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
if (!can_demote(pgdat->node_id, sc) && if (!can_demote(pgdat->node_id, sc) &&
mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH) mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
return 0; return 0;
@@ -4226,7 +4229,7 @@ static void walk_mm(struct lruvec *lruve @@ -4223,7 +4226,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
} while (err == -EAGAIN); } while (err == -EAGAIN);
} }
@ -54,7 +62,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
{ {
struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk; struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk;
@@ -4234,7 +4237,7 @@ static struct lru_gen_mm_walk *set_mm_wa @@ -4231,7 +4234,7 @@ static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat)
VM_WARN_ON_ONCE(walk); VM_WARN_ON_ONCE(walk);
walk = &pgdat->mm_walk; walk = &pgdat->mm_walk;
@ -63,7 +71,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
VM_WARN_ON_ONCE(current_is_kswapd()); VM_WARN_ON_ONCE(current_is_kswapd());
walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
@@ -4420,7 +4423,7 @@ static bool try_to_inc_max_seq(struct lr @@ -4417,7 +4420,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
goto done; goto done;
} }
@ -72,7 +80,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
if (!walk) { if (!walk) {
success = iterate_mm_list_nowalk(lruvec, max_seq); success = iterate_mm_list_nowalk(lruvec, max_seq);
goto done; goto done;
@@ -4489,8 +4492,6 @@ static bool lruvec_is_reclaimable(struct @@ -4486,8 +4489,6 @@ static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc
struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MIN_SEQ(lruvec); DEFINE_MIN_SEQ(lruvec);
@ -81,7 +89,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
/* see the comment on lru_gen_folio */ /* see the comment on lru_gen_folio */
gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
@@ -4746,12 +4747,8 @@ static bool isolate_folio(struct lruvec @@ -4743,12 +4744,8 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca
{ {
bool success; bool success;
@ -95,19 +103,19 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
(folio_test_dirty(folio) || (folio_test_dirty(folio) ||
(folio_test_anon(folio) && !folio_test_swapcache(folio)))) (folio_test_anon(folio) && !folio_test_swapcache(folio))))
return false; return false;
@@ -4848,9 +4845,8 @@ static int scan_folios(struct lruvec *lr @@ -4845,9 +4842,8 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
__count_vm_events(PGSCAN_ANON + type, isolated); __count_vm_events(PGSCAN_ANON + type, isolated);
/* /*
- * There might not be eligible pages due to reclaim_idx, may_unmap and - * There might not be eligible pages due to reclaim_idx, may_unmap and
- * may_writepage. Check the remaining to prevent livelock if it's not - * may_writepage. Check the remaining to prevent livelock if it's not
- * making progress. - * making progress.
+ * There might not be eligible pages due to reclaim_idx. Check the + * There might not be eligible folios due to reclaim_idx. Check the
+ * remaining to prevent livelock if it's not making progress. + * remaining to prevent livelock if it's not making progress.
*/ */
return isolated || !remaining ? scanned : 0; return isolated || !remaining ? scanned : 0;
} }
@@ -5110,8 +5106,7 @@ static long get_nr_to_scan(struct lruvec @@ -5107,8 +5103,7 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool
struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec);
DEFINE_MAX_SEQ(lruvec); DEFINE_MAX_SEQ(lruvec);
@ -117,7 +125,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
return 0; return 0;
if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan)) if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
@@ -5139,17 +5134,14 @@ static bool try_to_shrink_lruvec(struct @@ -5136,17 +5131,14 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
long nr_to_scan; long nr_to_scan;
unsigned long scanned = 0; unsigned long scanned = 0;
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
@ -140,7 +148,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
if (nr_to_scan <= 0) if (nr_to_scan <= 0)
@@ -5279,12 +5271,13 @@ static void lru_gen_shrink_lruvec(struct @@ -5277,12 +5269,13 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
struct blk_plug plug; struct blk_plug plug;
VM_WARN_ON_ONCE(global_reclaim(sc)); VM_WARN_ON_ONCE(global_reclaim(sc));
@ -155,7 +163,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
if (try_to_shrink_lruvec(lruvec, sc)) if (try_to_shrink_lruvec(lruvec, sc))
lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG); lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG);
@@ -5340,11 +5333,19 @@ static void lru_gen_shrink_node(struct p @@ -5338,11 +5331,19 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
VM_WARN_ON_ONCE(!global_reclaim(sc)); VM_WARN_ON_ONCE(!global_reclaim(sc));
@ -172,11 +180,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
blk_start_plug(&plug); blk_start_plug(&plug);
- set_mm_walk(pgdat); - set_mm_walk(pgdat);
+ set_mm_walk(NULL, sc->proactive); + set_mm_walk(pgdat, sc->proactive);
set_initial_priority(pgdat, sc); set_initial_priority(pgdat, sc);
@@ -5362,7 +5363,7 @@ static void lru_gen_shrink_node(struct p @@ -5360,7 +5361,7 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
clear_mm_walk(); clear_mm_walk();
blk_finish_plug(&plug); blk_finish_plug(&plug);
@ -185,7 +193,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
/* kswapd should never fail */ /* kswapd should never fail */
pgdat->kswapd_failures = 0; pgdat->kswapd_failures = 0;
} }
@@ -5934,7 +5935,7 @@ static ssize_t lru_gen_seq_write(struct @@ -5932,7 +5933,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
set_task_reclaim_state(current, &sc.reclaim_state); set_task_reclaim_state(current, &sc.reclaim_state);
flags = memalloc_noreclaim_save(); flags = memalloc_noreclaim_save();
blk_start_plug(&plug); blk_start_plug(&plug);
@ -194,3 +202,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
err = -ENOMEM; err = -ENOMEM;
goto done; goto done;
} }
--
2.40.1

View File

@ -1,8 +1,8 @@
From cf3297e4c7a928da8b2b2f0baff2f9c69ea57952 Mon Sep 17 00:00:00 2001 From 25887d48dff860751a06caa4188bfaf6bfb6e4b2 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com> From: Yu Zhao <yuzhao@google.com>
Date: Wed, 21 Dec 2022 21:19:06 -0700 Date: Wed, 21 Dec 2022 21:19:06 -0700
Subject: [PATCH 28/29] mm: multi-gen LRU: simplify arch_has_hw_pte_young() Subject: [PATCH 08/19] UPSTREAM: mm: multi-gen LRU: simplify
check arch_has_hw_pte_young() check
Scanning page tables when hardware does not set the accessed bit has Scanning page tables when hardware does not set the accessed bit has
no real use cases. no real use cases.
@ -17,13 +17,19 @@ Cc: Mike Rapoport <rppt@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com> Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Bug: 274865848
(cherry picked from commit f386e9314025ea99dae639ed2032560a92081430)
Change-Id: I84d97ab665b4e3bb862a9bc7d72f50dea7191a6b
Signed-off-by: T.J. Mercier <tjmercier@google.com>
--- ---
mm/vmscan.c | 2 +- mm/vmscan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-) 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0a0e1250ffc87..aa9746f2bc80b 100644
--- a/mm/vmscan.c --- a/mm/vmscan.c
+++ b/mm/vmscan.c +++ b/mm/vmscan.c
@@ -4418,7 +4418,7 @@ static bool try_to_inc_max_seq(struct lr @@ -4415,7 +4415,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
* handful of PTEs. Spreading the work out over a period of time usually * handful of PTEs. Spreading the work out over a period of time usually
* is less efficient, but it avoids bursty page faults. * is less efficient, but it avoids bursty page faults.
*/ */
@ -32,3 +38,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
success = iterate_mm_list_nowalk(lruvec, max_seq); success = iterate_mm_list_nowalk(lruvec, max_seq);
goto done; goto done;
} }
--
2.40.1

View File

@ -1,7 +1,7 @@
From cc67f962cc53f6e1dfa92eb85b7b26fe83a3c66f Mon Sep 17 00:00:00 2001 From 620b0ee94455e48d124414cd06d8a53f69fb6453 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com> From: Yu Zhao <yuzhao@google.com>
Date: Mon, 13 Feb 2023 00:53:22 -0700 Date: Mon, 13 Feb 2023 00:53:22 -0700
Subject: [PATCH 29/29] mm: multi-gen LRU: avoid futile retries Subject: [PATCH 09/19] UPSTREAM: mm: multi-gen LRU: avoid futile retries
Recall that the per-node memcg LRU has two generations and they alternate Recall that the per-node memcg LRU has two generations and they alternate
when the last memcg (of a given node) is moved from one to the other. when the last memcg (of a given node) is moved from one to the other.
@ -19,13 +19,19 @@ Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists")
Signed-off-by: Yu Zhao <yuzhao@google.com> Signed-off-by: Yu Zhao <yuzhao@google.com>
Reported-by: T.J. Mercier <tjmercier@google.com> Reported-by: T.J. Mercier <tjmercier@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Bug: 274865848
(cherry picked from commit 9f550d78b40da21b4da515db4c37d8d7b12aa1a6)
Change-Id: Ie92535676b005ec9e7987632b742fdde8d54436f
Signed-off-by: T.J. Mercier <tjmercier@google.com>
--- ---
mm/vmscan.c | 25 +++++++++++++++---------- mm/vmscan.c | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-) 1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index aa9746f2bc80b..49da02f841c81 100644
--- a/mm/vmscan.c --- a/mm/vmscan.c
+++ b/mm/vmscan.c +++ b/mm/vmscan.c
@@ -5208,18 +5208,20 @@ static int shrink_one(struct lruvec *lru @@ -5206,18 +5206,20 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
{ {
@ -41,14 +47,14 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
- struct mem_cgroup *memcg = NULL; - struct mem_cgroup *memcg = NULL;
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
bin = first_bin = prandom_u32_max(MEMCG_NR_BINS); bin = first_bin = get_random_u32_below(MEMCG_NR_BINS);
restart: restart:
+ op = 0; + op = 0;
+ memcg = NULL; + memcg = NULL;
gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq)); gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq));
rcu_read_lock(); rcu_read_lock();
@@ -5243,14 +5245,22 @@ restart: @@ -5241,14 +5243,22 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
op = shrink_one(lruvec, sc); op = shrink_one(lruvec, sc);
@ -74,7 +80,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
/* restart if raced with lru_gen_rotate_memcg() */ /* restart if raced with lru_gen_rotate_memcg() */
if (gen != get_nulls_value(pos)) if (gen != get_nulls_value(pos))
goto restart; goto restart;
@@ -5259,11 +5269,6 @@ restart: @@ -5257,11 +5267,6 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
bin = get_memcg_bin(bin + 1); bin = get_memcg_bin(bin + 1);
if (bin != first_bin) if (bin != first_bin)
goto restart; goto restart;
@ -86,3 +92,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
} }
static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
--
2.40.1

View File

@ -1,7 +1,7 @@
From 6c7f552a48b49a8612786a28a2239fbc24fac289 Mon Sep 17 00:00:00 2001 From 70d216c71ff5c5b17dd1da6294f97b91fb6aba7a Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com> From: Yu Zhao <yuzhao@google.com>
Date: Fri, 30 Dec 2022 14:52:51 -0700 Date: Fri, 30 Dec 2022 14:52:51 -0700
Subject: [PATCH 19/29] mm: add vma_has_recency() Subject: [PATCH 10/19] UPSTREAM: mm: add vma_has_recency()
Add vma_has_recency() to indicate whether a VMA may exhibit temporal Add vma_has_recency() to indicate whether a VMA may exhibit temporal
locality that the LRU algorithm relies on. locality that the LRU algorithm relies on.
@ -43,22 +43,28 @@ results are available in that thread.
[1] https://lore.kernel.org/r/Y31s%2FK8T85jh05wH@google.com/ [1] https://lore.kernel.org/r/Y31s%2FK8T85jh05wH@google.com/
Link: https://lkml.kernel.org/r/20221230215252.2628425-1-yuzhao@google.com Link: https://lkml.kernel.org/r/20221230215252.2628425-1-yuzhao@google.com
Change-Id: I291dcb795197659e40e46539cd32b857677c34ad
Signed-off-by: Yu Zhao <yuzhao@google.com> Signed-off-by: Yu Zhao <yuzhao@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andrea Righi <andrea.righi@canonical.com> Cc: Andrea Righi <andrea.righi@canonical.com>
Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michael Larabel <Michael@MichaelLarabel.com> Cc: Michael Larabel <Michael@MichaelLarabel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
(cherry picked from commit 8788f6781486769d9598dcaedc3fe0eb12fc3e59)
Bug: 274865848
Signed-off-by: T.J. Mercier <tjmercier@google.com>
--- ---
include/linux/mm_inline.h | 9 +++++++++ include/linux/mm_inline.h | 8 ++++++++
mm/memory.c | 8 ++++---- mm/memory.c | 7 +++----
mm/rmap.c | 42 +++++++++++++++++---------------------- mm/rmap.c | 42 +++++++++++++++++----------------------
mm/vmscan.c | 5 ++++- mm/vmscan.c | 5 ++++-
4 files changed, 35 insertions(+), 29 deletions(-) 4 files changed, 33 insertions(+), 29 deletions(-)
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index c1fd3922dc5dd..7bb2e5f94734c 100644
--- a/include/linux/mm_inline.h --- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h
@@ -578,4 +578,12 @@ pte_install_uffd_wp_if_needed(struct vm_ @@ -595,4 +595,12 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr,
#endif #endif
} }
@ -71,9 +77,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+} +}
+ +
#endif #endif
diff --git a/mm/memory.c b/mm/memory.c
index 747b7ea30f890..c2f48f8003c2e 100644
--- a/mm/memory.c --- a/mm/memory.c
+++ b/mm/memory.c +++ b/mm/memory.c
@@ -1435,8 +1435,7 @@ again: @@ -1435,8 +1435,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
force_flush = 1; force_flush = 1;
set_page_dirty(page); set_page_dirty(page);
} }
@ -83,7 +91,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mark_page_accessed(page); mark_page_accessed(page);
} }
rss[mm_counter(page)]--; rss[mm_counter(page)]--;
@@ -5170,8 +5169,8 @@ static inline void mm_account_fault(stru @@ -5170,8 +5169,8 @@ static inline void mm_account_fault(struct pt_regs *regs,
#ifdef CONFIG_LRU_GEN #ifdef CONFIG_LRU_GEN
static void lru_gen_enter_fault(struct vm_area_struct *vma) static void lru_gen_enter_fault(struct vm_area_struct *vma)
{ {
@ -94,9 +102,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
} }
static void lru_gen_exit_fault(void) static void lru_gen_exit_fault(void)
diff --git a/mm/rmap.c b/mm/rmap.c
index 7da2d8d097d9b..825dac3caa1e5 100644
--- a/mm/rmap.c --- a/mm/rmap.c
+++ b/mm/rmap.c +++ b/mm/rmap.c
@@ -823,25 +823,14 @@ static bool folio_referenced_one(struct @@ -823,25 +823,14 @@ static bool folio_referenced_one(struct folio *folio,
} }
if (pvmw.pte) { if (pvmw.pte) {
@ -125,16 +135,16 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
if (pmdp_clear_flush_young_notify(vma, address, if (pmdp_clear_flush_young_notify(vma, address,
pvmw.pmd)) pvmw.pmd))
@@ -875,7 +864,20 @@ static bool invalid_folio_referenced_vma @@ -875,7 +864,20 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
struct folio_referenced_arg *pra = arg; struct folio_referenced_arg *pra = arg;
struct mem_cgroup *memcg = pra->memcg; struct mem_cgroup *memcg = pra->memcg;
- if (!mm_match_cgroup(vma->vm_mm, memcg)) - if (!mm_match_cgroup(vma->vm_mm, memcg))
+ /* + /*
+ * Ignore references from this mapping if it has no recency. If the + * Ignore references from this mapping if it has no recency. If the
+ * page has been used in another mapping, we will catch it; if this + * folio has been used in another mapping, we will catch it; if this
+ * other mapping is already gone, the unmap path will have set the + * other mapping is already gone, the unmap path will have set the
+ * referenced flag or activated the page in zap_pte_range(). + * referenced flag or activated the folio in zap_pte_range().
+ */ + */
+ if (!vma_has_recency(vma)) + if (!vma_has_recency(vma))
+ return true; + return true;
@ -147,7 +157,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
return true; return true;
return false; return false;
@@ -906,6 +908,7 @@ int folio_referenced(struct folio *folio @@ -906,6 +908,7 @@ int folio_referenced(struct folio *folio, int is_locked,
.arg = (void *)&pra, .arg = (void *)&pra,
.anon_lock = folio_lock_anon_vma_read, .anon_lock = folio_lock_anon_vma_read,
.try_lock = true, .try_lock = true,
@ -155,7 +165,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
}; };
*vm_flags = 0; *vm_flags = 0;
@@ -921,15 +924,6 @@ int folio_referenced(struct folio *folio @@ -921,15 +924,6 @@ int folio_referenced(struct folio *folio, int is_locked,
return 1; return 1;
} }
@ -171,9 +181,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
rmap_walk(folio, &rwc); rmap_walk(folio, &rwc);
*vm_flags = pra.vm_flags; *vm_flags = pra.vm_flags;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 49da02f841c81..596fed6ae0439 100644
--- a/mm/vmscan.c --- a/mm/vmscan.c
+++ b/mm/vmscan.c +++ b/mm/vmscan.c
@@ -3766,7 +3766,10 @@ static int should_skip_vma(unsigned long @@ -3778,7 +3778,10 @@ static int should_skip_vma(unsigned long start, unsigned long end, struct mm_wal
if (is_vm_hugetlb_page(vma)) if (is_vm_hugetlb_page(vma))
return true; return true;
@ -185,3 +197,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
return true; return true;
if (vma == get_gate_vma(vma->vm_mm)) if (vma == get_gate_vma(vma->vm_mm))
--
2.40.1

View File

@ -1,7 +1,7 @@
From 686c3d4f71de9e0e7a27f03a5617a712385f90cd Mon Sep 17 00:00:00 2001 From 9ca4e437a24dfc4ec6c362f319eb9850b9eca497 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com> From: Yu Zhao <yuzhao@google.com>
Date: Fri, 30 Dec 2022 14:52:52 -0700 Date: Fri, 30 Dec 2022 14:52:52 -0700
Subject: [PATCH 20/29] mm: support POSIX_FADV_NOREUSE Subject: [PATCH 11/19] UPSTREAM: mm: support POSIX_FADV_NOREUSE
This patch adds POSIX_FADV_NOREUSE to vma_has_recency() so that the LRU This patch adds POSIX_FADV_NOREUSE to vma_has_recency() so that the LRU
algorithm can ignore access to mapped files marked by this flag. algorithm can ignore access to mapped files marked by this flag.
@ -22,7 +22,7 @@ Its limitations are:
by two users and one of them having set POSIX_FADV_NOREUSE on the by two users and one of them having set POSIX_FADV_NOREUSE on the
file, this page will be activated upon the second user accessing file, this page will be activated upon the second user accessing
it. This corner case can be covered by checking POSIX_FADV_NOREUSE it. This corner case can be covered by checking POSIX_FADV_NOREUSE
before calling mark_page_accessed() on the read path. But it is before calling folio_mark_accessed() on the read path. But it is
considered not worth the effort. considered not worth the effort.
There have been a few attempts to support POSIX_FADV_NOREUSE, e.g., [1]. There have been a few attempts to support POSIX_FADV_NOREUSE, e.g., [1].
@ -67,32 +67,40 @@ which makes it on par with the active/inactive LRU.
[2] https://openbenchmarking.org/result/2209259-PTS-MGLRU8GB57 [2] https://openbenchmarking.org/result/2209259-PTS-MGLRU8GB57
Link: https://lkml.kernel.org/r/20221230215252.2628425-2-yuzhao@google.com Link: https://lkml.kernel.org/r/20221230215252.2628425-2-yuzhao@google.com
Change-Id: I0b7f5f971d78014ea1ba44cee6a8ec902a4330d0
Signed-off-by: Yu Zhao <yuzhao@google.com> Signed-off-by: Yu Zhao <yuzhao@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andrea Righi <andrea.righi@canonical.com> Cc: Andrea Righi <andrea.righi@canonical.com>
Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michael Larabel <Michael@MichaelLarabel.com> Cc: Michael Larabel <Michael@MichaelLarabel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
(cherry picked from commit 17e810229cb3068b692fa078bd9b3a6527e0866a)
Bug: 274865848
Signed-off-by: T.J. Mercier <tjmercier@google.com>
--- ---
include/linux/fs.h | 2 ++ include/linux/fs.h | 2 ++
include/linux/mm_inline.h | 3 +++ include/linux/mm_inline.h | 3 +++
mm/fadvise.c | 5 ++++- mm/fadvise.c | 5 ++++-
3 files changed, 9 insertions(+), 1 deletion(-) 3 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f14ecbeab2a9d..97f9c41c1a43a 100644
--- a/include/linux/fs.h --- a/include/linux/fs.h
+++ b/include/linux/fs.h +++ b/include/linux/fs.h
@@ -166,6 +166,8 @@ typedef int (dio_iodone_t)(struct kiocb @@ -166,6 +166,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File supports DIRECT IO */ /* File supports DIRECT IO */
#define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000) #define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000)
+#define FMODE_NOREUSE ((__force fmode_t)0x400000) +#define FMODE_NOREUSE ((__force fmode_t)0x800000)
+ +
/* File was opened by fanotify and shouldn't generate fanotify events */ /* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000) #define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 7bb2e5f94734c..9a8e2049333c0 100644
--- a/include/linux/mm_inline.h --- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h
@@ -583,6 +583,9 @@ static inline bool vma_has_recency(struc @@ -600,6 +600,9 @@ static inline bool vma_has_recency(struct vm_area_struct *vma)
if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ)) if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))
return false; return false;
@ -102,9 +110,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
return true; return true;
} }
diff --git a/mm/fadvise.c b/mm/fadvise.c
index c76ee665355a4..2ba24d865bf5f 100644
--- a/mm/fadvise.c --- a/mm/fadvise.c
+++ b/mm/fadvise.c +++ b/mm/fadvise.c
@@ -80,7 +80,7 @@ int generic_fadvise(struct file *file, l @@ -80,7 +80,7 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
case POSIX_FADV_NORMAL: case POSIX_FADV_NORMAL:
file->f_ra.ra_pages = bdi->ra_pages; file->f_ra.ra_pages = bdi->ra_pages;
spin_lock(&file->f_lock); spin_lock(&file->f_lock);
@ -113,7 +123,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
spin_unlock(&file->f_lock); spin_unlock(&file->f_lock);
break; break;
case POSIX_FADV_RANDOM: case POSIX_FADV_RANDOM:
@@ -107,6 +107,9 @@ int generic_fadvise(struct file *file, l @@ -107,6 +107,9 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
force_page_cache_readahead(mapping, file, start_index, nrpages); force_page_cache_readahead(mapping, file, start_index, nrpages);
break; break;
case POSIX_FADV_NOREUSE: case POSIX_FADV_NOREUSE:
@ -123,3 +133,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
break; break;
case POSIX_FADV_DONTNEED: case POSIX_FADV_DONTNEED:
__filemap_fdatawrite_range(mapping, offset, endbyte, __filemap_fdatawrite_range(mapping, offset, endbyte,
--
2.40.1

View File

@ -0,0 +1,74 @@
From 1b5e4c317d80f4826eceb3781702d18d06b14394 Mon Sep 17 00:00:00 2001
From: "T.J. Alumbaugh" <talumbau@google.com>
Date: Wed, 18 Jan 2023 00:18:21 +0000
Subject: [PATCH 12/19] UPSTREAM: mm: multi-gen LRU: section for working set
protection
Patch series "mm: multi-gen LRU: improve".
This patch series improves a few MGLRU functions, collects related
functions, and adds additional documentation.
This patch (of 7):
Add a section for working set protection in the code and the design doc.
The admin doc already contains its usage.
Link: https://lkml.kernel.org/r/20230118001827.1040870-1-talumbau@google.com
Link: https://lkml.kernel.org/r/20230118001827.1040870-2-talumbau@google.com
Change-Id: I65599075fd42951db7739a2ab7cee78516e157b3
Signed-off-by: T.J. Alumbaugh <talumbau@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
(cherry picked from commit 7b8144e63d84716f16a1b929e0c7e03ae5c4d5c1)
Bug: 274865848
Signed-off-by: T.J. Mercier <tjmercier@google.com>
---
Documentation/mm/multigen_lru.rst | 15 +++++++++++++++
mm/vmscan.c | 4 ++++
2 files changed, 19 insertions(+)
diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst
index d8f721f98868a..6e1483e70fdca 100644
--- a/Documentation/mm/multigen_lru.rst
+++ b/Documentation/mm/multigen_lru.rst
@@ -141,6 +141,21 @@ loop has detected outlying refaults from the tier this page is in. To
this end, the feedback loop uses the first tier as the baseline, for
the reason stated earlier.
+Working set protection
+----------------------
+Each generation is timestamped at birth. If ``lru_gen_min_ttl`` is
+set, an ``lruvec`` is protected from the eviction when its oldest
+generation was born within ``lru_gen_min_ttl`` milliseconds. In other
+words, it prevents the working set of ``lru_gen_min_ttl`` milliseconds
+from getting evicted. The OOM killer is triggered if this working set
+cannot be kept in memory.
+
+This time-based approach has the following advantages:
+
+1. It is easier to configure because it is agnostic to applications
+ and memory sizes.
+2. It is more reliable because it is directly wired to the OOM killer.
+
Summary
-------
The multi-gen LRU can be disassembled into the following parts:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 596fed6ae0439..ab0b8d3b9d88f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4459,6 +4459,10 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
return true;
}
+/******************************************************************************
+ * working set protection
+ ******************************************************************************/
+
static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
{
int gen, type, zone;
--
2.40.1

View File

@ -0,0 +1,64 @@
From 5ddf9d53d375e42af49b744bd7c2f8247c6bce15 Mon Sep 17 00:00:00 2001
From: "T.J. Alumbaugh" <talumbau@google.com>
Date: Wed, 18 Jan 2023 00:18:22 +0000
Subject: [PATCH 13/19] UPSTREAM: mm: multi-gen LRU: section for rmap/PT walk
feedback
Add a section for lru_gen_look_around() in the code and the design doc.
Link: https://lkml.kernel.org/r/20230118001827.1040870-3-talumbau@google.com
Change-Id: I5097af63f61b3b69ec2abee6cdbdc33c296df213
Signed-off-by: T.J. Alumbaugh <talumbau@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
(cherry picked from commit db19a43d9b3a8876552f00f656008206ef9a5efa)
Bug: 274865848
Signed-off-by: T.J. Mercier <tjmercier@google.com>
---
Documentation/mm/multigen_lru.rst | 14 ++++++++++++++
mm/vmscan.c | 4 ++++
2 files changed, 18 insertions(+)
diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst
index 6e1483e70fdca..bd988a142bc2f 100644
--- a/Documentation/mm/multigen_lru.rst
+++ b/Documentation/mm/multigen_lru.rst
@@ -156,6 +156,20 @@ This time-based approach has the following advantages:
and memory sizes.
2. It is more reliable because it is directly wired to the OOM killer.
+Rmap/PT walk feedback
+---------------------
+Searching the rmap for PTEs mapping each page on an LRU list (to test
+and clear the accessed bit) can be expensive because pages from
+different VMAs (PA space) are not cache friendly to the rmap (VA
+space). For workloads mostly using mapped pages, searching the rmap
+can incur the highest CPU cost in the reclaim path.
+
+``lru_gen_look_around()`` exploits spatial locality to reduce the
+trips into the rmap. It scans the adjacent PTEs of a young PTE and
+promotes hot pages. If the scan was done cacheline efficiently, it
+adds the PMD entry pointing to the PTE table to the Bloom filter. This
+forms a feedback loop between the eviction and the aging.
+
Summary
-------
The multi-gen LRU can be disassembled into the following parts:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ab0b8d3b9d88f..8fa82630240d6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4553,6 +4553,10 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
}
}
+/******************************************************************************
+ * rmap/PT walk feedback
+ ******************************************************************************/
+
/*
* This function exploits spatial locality when shrink_folio_list() walks the
* rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages. If
--
2.40.1

View File

@ -0,0 +1,250 @@
From 397624e12244ec038f51cb1f178ccb7a2ec562e5 Mon Sep 17 00:00:00 2001
From: "T.J. Alumbaugh" <talumbau@google.com>
Date: Wed, 18 Jan 2023 00:18:23 +0000
Subject: [PATCH 14/19] UPSTREAM: mm: multi-gen LRU: section for Bloom filters
Move Bloom filters code into a dedicated section. Improve the design doc
to explain Bloom filter usage and connection between aging and eviction in
their use.
Link: https://lkml.kernel.org/r/20230118001827.1040870-4-talumbau@google.com
Change-Id: I73e866f687c1ed9f5c8538086aa39408b79897db
Signed-off-by: T.J. Alumbaugh <talumbau@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
(cherry picked from commit ccbbbb85945d8f0255aa9dbc1b617017e2294f2c)
Bug: 274865848
Signed-off-by: T.J. Mercier <tjmercier@google.com>
---
Documentation/mm/multigen_lru.rst | 16 +++
mm/vmscan.c | 180 +++++++++++++++---------------
2 files changed, 108 insertions(+), 88 deletions(-)
diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst
index bd988a142bc2f..770b5d539856c 100644
--- a/Documentation/mm/multigen_lru.rst
+++ b/Documentation/mm/multigen_lru.rst
@@ -170,6 +170,22 @@ promotes hot pages. If the scan was done cacheline efficiently, it
adds the PMD entry pointing to the PTE table to the Bloom filter. This
forms a feedback loop between the eviction and the aging.
+Bloom Filters
+-------------
+Bloom filters are a space and memory efficient data structure for set
+membership test, i.e., test if an element is not in the set or may be
+in the set.
+
+In the eviction path, specifically, in ``lru_gen_look_around()``, if a
+PMD has a sufficient number of hot pages, its address is placed in the
+filter. In the aging path, set membership means that the PTE range
+will be scanned for young pages.
+
+Note that Bloom filters are probabilistic on set membership. If a test
+is false positive, the cost is an additional scan of a range of PTEs,
+which may yield hot pages anyway. Parameters of the filter itself can
+control the false positive rate in the limit.
+
Summary
-------
The multi-gen LRU can be disassembled into the following parts:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8fa82630240d6..74b4f9d660b56 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3208,6 +3208,98 @@ static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
}
+/******************************************************************************
+ * Bloom filters
+ ******************************************************************************/
+
+/*
+ * Bloom filters with m=1<<15, k=2 and the false positive rates of ~1/5 when
+ * n=10,000 and ~1/2 when n=20,000, where, conventionally, m is the number of
+ * bits in a bitmap, k is the number of hash functions and n is the number of
+ * inserted items.
+ *
+ * Page table walkers use one of the two filters to reduce their search space.
+ * To get rid of non-leaf entries that no longer have enough leaf entries, the
+ * aging uses the double-buffering technique to flip to the other filter each
+ * time it produces a new generation. For non-leaf entries that have enough
+ * leaf entries, the aging carries them over to the next generation in
+ * walk_pmd_range(); the eviction also report them when walking the rmap
+ * in lru_gen_look_around().
+ *
+ * For future optimizations:
+ * 1. It's not necessary to keep both filters all the time. The spare one can be
+ * freed after the RCU grace period and reallocated if needed again.
+ * 2. And when reallocating, it's worth scaling its size according to the number
+ * of inserted entries in the other filter, to reduce the memory overhead on
+ * small systems and false positives on large systems.
+ * 3. Jenkins' hash function is an alternative to Knuth's.
+ */
+#define BLOOM_FILTER_SHIFT 15
+
+static inline int filter_gen_from_seq(unsigned long seq)
+{
+ return seq % NR_BLOOM_FILTERS;
+}
+
+static void get_item_key(void *item, int *key)
+{
+ u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2);
+
+ BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32));
+
+ key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1);
+ key[1] = hash >> BLOOM_FILTER_SHIFT;
+}
+
+static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
+{
+ int key[2];
+ unsigned long *filter;
+ int gen = filter_gen_from_seq(seq);
+
+ filter = READ_ONCE(lruvec->mm_state.filters[gen]);
+ if (!filter)
+ return true;
+
+ get_item_key(item, key);
+
+ return test_bit(key[0], filter) && test_bit(key[1], filter);
+}
+
+static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
+{
+ int key[2];
+ unsigned long *filter;
+ int gen = filter_gen_from_seq(seq);
+
+ filter = READ_ONCE(lruvec->mm_state.filters[gen]);
+ if (!filter)
+ return;
+
+ get_item_key(item, key);
+
+ if (!test_bit(key[0], filter))
+ set_bit(key[0], filter);
+ if (!test_bit(key[1], filter))
+ set_bit(key[1], filter);
+}
+
+static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq)
+{
+ unsigned long *filter;
+ int gen = filter_gen_from_seq(seq);
+
+ filter = lruvec->mm_state.filters[gen];
+ if (filter) {
+ bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT));
+ return;
+ }
+
+ filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT),
+ __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
+ WRITE_ONCE(lruvec->mm_state.filters[gen], filter);
+}
+
/******************************************************************************
* mm_struct list
******************************************************************************/
@@ -3333,94 +3425,6 @@ void lru_gen_migrate_mm(struct mm_struct *mm)
}
#endif
-/*
- * Bloom filters with m=1<<15, k=2 and the false positive rates of ~1/5 when
- * n=10,000 and ~1/2 when n=20,000, where, conventionally, m is the number of
- * bits in a bitmap, k is the number of hash functions and n is the number of
- * inserted items.
- *
- * Page table walkers use one of the two filters to reduce their search space.
- * To get rid of non-leaf entries that no longer have enough leaf entries, the
- * aging uses the double-buffering technique to flip to the other filter each
- * time it produces a new generation. For non-leaf entries that have enough
- * leaf entries, the aging carries them over to the next generation in
- * walk_pmd_range(); the eviction also report them when walking the rmap
- * in lru_gen_look_around().
- *
- * For future optimizations:
- * 1. It's not necessary to keep both filters all the time. The spare one can be
- * freed after the RCU grace period and reallocated if needed again.
- * 2. And when reallocating, it's worth scaling its size according to the number
- * of inserted entries in the other filter, to reduce the memory overhead on
- * small systems and false positives on large systems.
- * 3. Jenkins' hash function is an alternative to Knuth's.
- */
-#define BLOOM_FILTER_SHIFT 15
-
-static inline int filter_gen_from_seq(unsigned long seq)
-{
- return seq % NR_BLOOM_FILTERS;
-}
-
-static void get_item_key(void *item, int *key)
-{
- u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2);
-
- BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32));
-
- key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1);
- key[1] = hash >> BLOOM_FILTER_SHIFT;
-}
-
-static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq)
-{
- unsigned long *filter;
- int gen = filter_gen_from_seq(seq);
-
- filter = lruvec->mm_state.filters[gen];
- if (filter) {
- bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT));
- return;
- }
-
- filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT),
- __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
- WRITE_ONCE(lruvec->mm_state.filters[gen], filter);
-}
-
-static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
-{
- int key[2];
- unsigned long *filter;
- int gen = filter_gen_from_seq(seq);
-
- filter = READ_ONCE(lruvec->mm_state.filters[gen]);
- if (!filter)
- return;
-
- get_item_key(item, key);
-
- if (!test_bit(key[0], filter))
- set_bit(key[0], filter);
- if (!test_bit(key[1], filter))
- set_bit(key[1], filter);
-}
-
-static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
-{
- int key[2];
- unsigned long *filter;
- int gen = filter_gen_from_seq(seq);
-
- filter = READ_ONCE(lruvec->mm_state.filters[gen]);
- if (!filter)
- return true;
-
- get_item_key(item, key);
-
- return test_bit(key[0], filter) && test_bit(key[1], filter);
-}
-
static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last)
{
int i;
--
2.40.1

View File

@ -0,0 +1,440 @@
From 48c916b812652f9453be5bd45a703728926d41ca Mon Sep 17 00:00:00 2001
From: "T.J. Alumbaugh" <talumbau@google.com>
Date: Wed, 18 Jan 2023 00:18:24 +0000
Subject: [PATCH 15/19] UPSTREAM: mm: multi-gen LRU: section for memcg LRU
Move memcg LRU code into a dedicated section. Improve the design doc to
outline its architecture.
Link: https://lkml.kernel.org/r/20230118001827.1040870-5-talumbau@google.com
Change-Id: Id252e420cff7a858acb098cf2b3642da5c40f602
Signed-off-by: T.J. Alumbaugh <talumbau@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
(cherry picked from commit 36c7b4db7c942ae9e1b111f0c6b468c8b2e33842)
Bug: 274865848
Signed-off-by: T.J. Mercier <tjmercier@google.com>
---
Documentation/mm/multigen_lru.rst | 33 +++-
include/linux/mm_inline.h | 17 --
include/linux/mmzone.h | 13 +-
mm/memcontrol.c | 8 +-
mm/vmscan.c | 250 +++++++++++++++++-------------
5 files changed, 178 insertions(+), 143 deletions(-)
diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst
index 770b5d539856c..5f1f6ecbb79b9 100644
--- a/Documentation/mm/multigen_lru.rst
+++ b/Documentation/mm/multigen_lru.rst
@@ -186,9 +186,40 @@ is false positive, the cost is an additional scan of a range of PTEs,
which may yield hot pages anyway. Parameters of the filter itself can
control the false positive rate in the limit.
+Memcg LRU
+---------
+An memcg LRU is a per-node LRU of memcgs. It is also an LRU of LRUs,
+since each node and memcg combination has an LRU of folios (see
+``mem_cgroup_lruvec()``). Its goal is to improve the scalability of
+global reclaim, which is critical to system-wide memory overcommit in
+data centers. Note that memcg LRU only applies to global reclaim.
+
+The basic structure of an memcg LRU can be understood by an analogy to
+the active/inactive LRU (of folios):
+
+1. It has the young and the old (generations), i.e., the counterparts
+ to the active and the inactive;
+2. The increment of ``max_seq`` triggers promotion, i.e., the
+ counterpart to activation;
+3. Other events trigger similar operations, e.g., offlining an memcg
+ triggers demotion, i.e., the counterpart to deactivation.
+
+In terms of global reclaim, it has two distinct features:
+
+1. Sharding, which allows each thread to start at a random memcg (in
+ the old generation) and improves parallelism;
+2. Eventual fairness, which allows direct reclaim to bail out at will
+ and reduces latency without affecting fairness over some time.
+
+In terms of traversing memcgs during global reclaim, it improves the
+best-case complexity from O(n) to O(1) and does not affect the
+worst-case complexity O(n). Therefore, on average, it has a sublinear
+complexity.
+
Summary
-------
-The multi-gen LRU can be disassembled into the following parts:
+The multi-gen LRU (of folios) can be disassembled into the following
+parts:
* Generations
* Rmap walks
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 9a8e2049333c0..5567f4850243b 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -122,18 +122,6 @@ static inline bool lru_gen_in_fault(void)
return current->in_lru_fault;
}
-#ifdef CONFIG_MEMCG
-static inline int lru_gen_memcg_seg(struct lruvec *lruvec)
-{
- return READ_ONCE(lruvec->lrugen.seg);
-}
-#else
-static inline int lru_gen_memcg_seg(struct lruvec *lruvec)
-{
- return 0;
-}
-#endif
-
static inline int lru_gen_from_seq(unsigned long seq)
{
return seq % MAX_NR_GENS;
@@ -309,11 +297,6 @@ static inline bool lru_gen_in_fault(void)
return false;
}
-static inline int lru_gen_memcg_seg(struct lruvec *lruvec)
-{
- return 0;
-}
-
static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
{
return false;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 66e067a635682..403c7461e7a70 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -368,15 +368,6 @@ struct page_vma_mapped_walk;
#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
-/* see the comment on MEMCG_NR_GENS */
-enum {
- MEMCG_LRU_NOP,
- MEMCG_LRU_HEAD,
- MEMCG_LRU_TAIL,
- MEMCG_LRU_OLD,
- MEMCG_LRU_YOUNG,
-};
-
#ifdef CONFIG_LRU_GEN
enum {
@@ -557,7 +548,7 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg);
void lru_gen_online_memcg(struct mem_cgroup *memcg);
void lru_gen_offline_memcg(struct mem_cgroup *memcg);
void lru_gen_release_memcg(struct mem_cgroup *memcg);
-void lru_gen_rotate_memcg(struct lruvec *lruvec, int op);
+void lru_gen_soft_reclaim(struct lruvec *lruvec);
#else /* !CONFIG_MEMCG */
@@ -608,7 +599,7 @@ static inline void lru_gen_release_memcg(struct mem_cgroup *memcg)
{
}
-static inline void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
+static inline void lru_gen_soft_reclaim(struct lruvec *lruvec)
{
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7815d556e38cc..5397aeb43986d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -478,12 +478,8 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid)
struct mem_cgroup_tree_per_node *mctz;
if (lru_gen_enabled()) {
- struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec;
-
- /* see the comment on MEMCG_NR_GENS */
- if (soft_limit_excess(memcg) && lru_gen_memcg_seg(lruvec) != MEMCG_LRU_HEAD)
- lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD);
-
+ if (soft_limit_excess(memcg))
+ lru_gen_soft_reclaim(&memcg->nodeinfo[nid]->lruvec);
return;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 74b4f9d660b56..ccde215c084ca 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4689,6 +4689,148 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
mem_cgroup_unlock_pages();
}
+/******************************************************************************
+ * memcg LRU
+ ******************************************************************************/
+
+/* see the comment on MEMCG_NR_GENS */
+enum {
+ MEMCG_LRU_NOP,
+ MEMCG_LRU_HEAD,
+ MEMCG_LRU_TAIL,
+ MEMCG_LRU_OLD,
+ MEMCG_LRU_YOUNG,
+};
+
+#ifdef CONFIG_MEMCG
+
+static int lru_gen_memcg_seg(struct lruvec *lruvec)
+{
+ return READ_ONCE(lruvec->lrugen.seg);
+}
+
+static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
+{
+ int seg;
+ int old, new;
+ int bin = get_random_u32_below(MEMCG_NR_BINS);
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+
+ spin_lock(&pgdat->memcg_lru.lock);
+
+ VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
+
+ seg = 0;
+ new = old = lruvec->lrugen.gen;
+
+ /* see the comment on MEMCG_NR_GENS */
+ if (op == MEMCG_LRU_HEAD)
+ seg = MEMCG_LRU_HEAD;
+ else if (op == MEMCG_LRU_TAIL)
+ seg = MEMCG_LRU_TAIL;
+ else if (op == MEMCG_LRU_OLD)
+ new = get_memcg_gen(pgdat->memcg_lru.seq);
+ else if (op == MEMCG_LRU_YOUNG)
+ new = get_memcg_gen(pgdat->memcg_lru.seq + 1);
+ else
+ VM_WARN_ON_ONCE(true);
+
+ hlist_nulls_del_rcu(&lruvec->lrugen.list);
+
+ if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD)
+ hlist_nulls_add_head_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
+ else
+ hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
+
+ pgdat->memcg_lru.nr_memcgs[old]--;
+ pgdat->memcg_lru.nr_memcgs[new]++;
+
+ lruvec->lrugen.gen = new;
+ WRITE_ONCE(lruvec->lrugen.seg, seg);
+
+ if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq))
+ WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
+
+ spin_unlock(&pgdat->memcg_lru.lock);
+}
+
+void lru_gen_online_memcg(struct mem_cgroup *memcg)
+{
+ int gen;
+ int nid;
+ int bin = get_random_u32_below(MEMCG_NR_BINS);
+
+ for_each_node(nid) {
+ struct pglist_data *pgdat = NODE_DATA(nid);
+ struct lruvec *lruvec = get_lruvec(memcg, nid);
+
+ spin_lock(&pgdat->memcg_lru.lock);
+
+ VM_WARN_ON_ONCE(!hlist_nulls_unhashed(&lruvec->lrugen.list));
+
+ gen = get_memcg_gen(pgdat->memcg_lru.seq);
+
+ hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]);
+ pgdat->memcg_lru.nr_memcgs[gen]++;
+
+ lruvec->lrugen.gen = gen;
+
+ spin_unlock(&pgdat->memcg_lru.lock);
+ }
+}
+
+void lru_gen_offline_memcg(struct mem_cgroup *memcg)
+{
+ int nid;
+
+ for_each_node(nid) {
+ struct lruvec *lruvec = get_lruvec(memcg, nid);
+
+ lru_gen_rotate_memcg(lruvec, MEMCG_LRU_OLD);
+ }
+}
+
+void lru_gen_release_memcg(struct mem_cgroup *memcg)
+{
+ int gen;
+ int nid;
+
+ for_each_node(nid) {
+ struct pglist_data *pgdat = NODE_DATA(nid);
+ struct lruvec *lruvec = get_lruvec(memcg, nid);
+
+ spin_lock(&pgdat->memcg_lru.lock);
+
+ VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
+
+ gen = lruvec->lrugen.gen;
+
+ hlist_nulls_del_rcu(&lruvec->lrugen.list);
+ pgdat->memcg_lru.nr_memcgs[gen]--;
+
+ if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
+ WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
+
+ spin_unlock(&pgdat->memcg_lru.lock);
+ }
+}
+
+void lru_gen_soft_reclaim(struct lruvec *lruvec)
+{
+ /* see the comment on MEMCG_NR_GENS */
+ if (lru_gen_memcg_seg(lruvec) != MEMCG_LRU_HEAD)
+ lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD);
+}
+
+#else /* !CONFIG_MEMCG */
+
+static int lru_gen_memcg_seg(struct lruvec *lruvec)
+{
+ return 0;
+}
+
+#endif
+
/******************************************************************************
* the eviction
******************************************************************************/
@@ -5386,53 +5528,6 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
pgdat->kswapd_failures = 0;
}
-#ifdef CONFIG_MEMCG
-void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
-{
- int seg;
- int old, new;
- int bin = get_random_u32_below(MEMCG_NR_BINS);
- struct pglist_data *pgdat = lruvec_pgdat(lruvec);
-
- spin_lock(&pgdat->memcg_lru.lock);
-
- VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
-
- seg = 0;
- new = old = lruvec->lrugen.gen;
-
- /* see the comment on MEMCG_NR_GENS */
- if (op == MEMCG_LRU_HEAD)
- seg = MEMCG_LRU_HEAD;
- else if (op == MEMCG_LRU_TAIL)
- seg = MEMCG_LRU_TAIL;
- else if (op == MEMCG_LRU_OLD)
- new = get_memcg_gen(pgdat->memcg_lru.seq);
- else if (op == MEMCG_LRU_YOUNG)
- new = get_memcg_gen(pgdat->memcg_lru.seq + 1);
- else
- VM_WARN_ON_ONCE(true);
-
- hlist_nulls_del_rcu(&lruvec->lrugen.list);
-
- if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD)
- hlist_nulls_add_head_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
- else
- hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
-
- pgdat->memcg_lru.nr_memcgs[old]--;
- pgdat->memcg_lru.nr_memcgs[new]++;
-
- lruvec->lrugen.gen = new;
- WRITE_ONCE(lruvec->lrugen.seg, seg);
-
- if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq))
- WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
-
- spin_unlock(&pgdat->memcg_lru.lock);
-}
-#endif
-
/******************************************************************************
* state change
******************************************************************************/
@@ -6078,67 +6173,6 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg)
}
}
-void lru_gen_online_memcg(struct mem_cgroup *memcg)
-{
- int gen;
- int nid;
- int bin = get_random_u32_below(MEMCG_NR_BINS);
-
- for_each_node(nid) {
- struct pglist_data *pgdat = NODE_DATA(nid);
- struct lruvec *lruvec = get_lruvec(memcg, nid);
-
- spin_lock(&pgdat->memcg_lru.lock);
-
- VM_WARN_ON_ONCE(!hlist_nulls_unhashed(&lruvec->lrugen.list));
-
- gen = get_memcg_gen(pgdat->memcg_lru.seq);
-
- hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]);
- pgdat->memcg_lru.nr_memcgs[gen]++;
-
- lruvec->lrugen.gen = gen;
-
- spin_unlock(&pgdat->memcg_lru.lock);
- }
-}
-
-void lru_gen_offline_memcg(struct mem_cgroup *memcg)
-{
- int nid;
-
- for_each_node(nid) {
- struct lruvec *lruvec = get_lruvec(memcg, nid);
-
- lru_gen_rotate_memcg(lruvec, MEMCG_LRU_OLD);
- }
-}
-
-void lru_gen_release_memcg(struct mem_cgroup *memcg)
-{
- int gen;
- int nid;
-
- for_each_node(nid) {
- struct pglist_data *pgdat = NODE_DATA(nid);
- struct lruvec *lruvec = get_lruvec(memcg, nid);
-
- spin_lock(&pgdat->memcg_lru.lock);
-
- VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
-
- gen = lruvec->lrugen.gen;
-
- hlist_nulls_del_rcu(&lruvec->lrugen.list);
- pgdat->memcg_lru.nr_memcgs[gen]--;
-
- if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
- WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
-
- spin_unlock(&pgdat->memcg_lru.lock);
- }
-}
-
#endif /* CONFIG_MEMCG */
static int __init init_lru_gen(void)
--
2.40.1

View File

@ -0,0 +1,45 @@
From bec433f29537652ed054148edfd7e2183ddcf7c3 Mon Sep 17 00:00:00 2001
From: "T.J. Alumbaugh" <talumbau@google.com>
Date: Wed, 18 Jan 2023 00:18:25 +0000
Subject: [PATCH 16/19] UPSTREAM: mm: multi-gen LRU: improve
lru_gen_exit_memcg()
Add warnings and poison ->next.
Link: https://lkml.kernel.org/r/20230118001827.1040870-6-talumbau@google.com
Change-Id: I53de9e04c1ae941e122b33cd45d2bbb5f34aae0c
Signed-off-by: T.J. Alumbaugh <talumbau@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
(cherry picked from commit 37cc99979d04cca677c0ad5c0acd1149ec165d1b)
Bug: 274865848
Signed-off-by: T.J. Mercier <tjmercier@google.com>
---
mm/vmscan.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ccde215c084ca..d5d6f8d94f58a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6160,12 +6160,17 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg)
int i;
int nid;
+ VM_WARN_ON_ONCE(!list_empty(&memcg->mm_list.fifo));
+
for_each_node(nid) {
struct lruvec *lruvec = get_lruvec(memcg, nid);
+ VM_WARN_ON_ONCE(lruvec->mm_state.nr_walkers);
VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0,
sizeof(lruvec->lrugen.nr_pages)));
+ lruvec->lrugen.list.next = LIST_POISON1;
+
for (i = 0; i < NR_BLOOM_FILTERS; i++) {
bitmap_free(lruvec->mm_state.filters[i]);
lruvec->mm_state.filters[i] = NULL;
--
2.40.1

View File

@ -0,0 +1,140 @@
From fc0e3b06e0f19917b7ecad7967a72f61d4743644 Mon Sep 17 00:00:00 2001
From: "T.J. Alumbaugh" <talumbau@google.com>
Date: Wed, 18 Jan 2023 00:18:26 +0000
Subject: [PATCH 17/19] UPSTREAM: mm: multi-gen LRU: improve walk_pmd_range()
Improve readability of walk_pmd_range() and walk_pmd_range_locked().
Link: https://lkml.kernel.org/r/20230118001827.1040870-7-talumbau@google.com
Change-Id: Ia084fbf53fe989673b7804ca8ca520af12d7d52a
Signed-off-by: T.J. Alumbaugh <talumbau@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
(cherry picked from commit b5ff4133617d0eced35b685da0bd0929dd9fabb7)
Bug: 274865848
Signed-off-by: T.J. Mercier <tjmercier@google.com>
---
mm/vmscan.c | 40 ++++++++++++++++++++--------------------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d5d6f8d94f58a..8f496c2e670a9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3980,8 +3980,8 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
}
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
-static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma,
- struct mm_walk *args, unsigned long *bitmap, unsigned long *start)
+static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area_struct *vma,
+ struct mm_walk *args, unsigned long *bitmap, unsigned long *first)
{
int i;
pmd_t *pmd;
@@ -3994,18 +3994,19 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area
VM_WARN_ON_ONCE(pud_leaf(*pud));
/* try to batch at most 1+MIN_LRU_BATCH+1 entries */
- if (*start == -1) {
- *start = next;
+ if (*first == -1) {
+ *first = addr;
+ bitmap_zero(bitmap, MIN_LRU_BATCH);
return;
}
- i = next == -1 ? 0 : pmd_index(next) - pmd_index(*start);
+ i = addr == -1 ? 0 : pmd_index(addr) - pmd_index(*first);
if (i && i <= MIN_LRU_BATCH) {
__set_bit(i - 1, bitmap);
return;
}
- pmd = pmd_offset(pud, *start);
+ pmd = pmd_offset(pud, *first);
ptl = pmd_lockptr(args->mm, pmd);
if (!spin_trylock(ptl))
@@ -4016,15 +4017,16 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area
do {
unsigned long pfn;
struct folio *folio;
- unsigned long addr = i ? (*start & PMD_MASK) + i * PMD_SIZE : *start;
+
+ /* don't round down the first address */
+ addr = i ? (*first & PMD_MASK) + i * PMD_SIZE : *first;
pfn = get_pmd_pfn(pmd[i], vma, addr);
if (pfn == -1)
goto next;
if (!pmd_trans_huge(pmd[i])) {
- if (arch_has_hw_nonleaf_pmd_young() &&
- get_cap(LRU_GEN_NONLEAF_YOUNG))
+ if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG))
pmdp_test_and_clear_young(vma, addr, pmd + i);
goto next;
}
@@ -4053,12 +4055,11 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area
arch_leave_lazy_mmu_mode();
spin_unlock(ptl);
done:
- *start = -1;
- bitmap_zero(bitmap, MIN_LRU_BATCH);
+ *first = -1;
}
#else
-static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma,
- struct mm_walk *args, unsigned long *bitmap, unsigned long *start)
+static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area_struct *vma,
+ struct mm_walk *args, unsigned long *bitmap, unsigned long *first)
{
}
#endif
@@ -4071,9 +4072,9 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
unsigned long next;
unsigned long addr;
struct vm_area_struct *vma;
- unsigned long pos = -1;
+ unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)];
+ unsigned long first = -1;
struct lru_gen_mm_walk *walk = args->private;
- unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {};
VM_WARN_ON_ONCE(pud_leaf(*pud));
@@ -4115,18 +4116,17 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
continue;
- walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
+ walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
continue;
}
#endif
walk->mm_stats[MM_NONLEAF_TOTAL]++;
- if (arch_has_hw_nonleaf_pmd_young() &&
- get_cap(LRU_GEN_NONLEAF_YOUNG)) {
+ if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) {
if (!pmd_young(val))
continue;
- walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
+ walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
}
if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
@@ -4143,7 +4143,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
update_bloom_filter(walk->lruvec, walk->max_seq + 1, pmd + i);
}
- walk_pmd_range_locked(pud, -1, vma, args, bitmap, &pos);
+ walk_pmd_range_locked(pud, -1, vma, args, bitmap, &first);
if (i < PTRS_PER_PMD && get_next_vma(PUD_MASK, PMD_SIZE, args, &start, &end))
goto restart;
--
2.40.1

View File

@ -0,0 +1,153 @@
From e604c3ccb4dfbdde2467fccef9bb36170a392695 Mon Sep 17 00:00:00 2001
From: "T.J. Alumbaugh" <talumbau@google.com>
Date: Wed, 18 Jan 2023 00:18:27 +0000
Subject: [PATCH 18/19] UPSTREAM: mm: multi-gen LRU: simplify
lru_gen_look_around()
Update the folio generation in place with or without
current->reclaim_state->mm_walk. The LRU lock is held for longer, if
mm_walk is NULL and the number of folios to update is more than
PAGEVEC_SIZE.
This causes a measurable regression from the LRU lock contention during a
microbencmark. But a tiny regression is not worth the complexity.
Link: https://lkml.kernel.org/r/20230118001827.1040870-8-talumbau@google.com
Change-Id: I9ce18b4f4062e6c1c13c98ece9422478eb8e1846
Signed-off-by: T.J. Alumbaugh <talumbau@google.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
(cherry picked from commit abf086721a2f1e6897c57796f7268df1b194c750)
Bug: 274865848
Signed-off-by: T.J. Mercier <tjmercier@google.com>
---
mm/vmscan.c | 73 +++++++++++++++++------------------------------------
1 file changed, 23 insertions(+), 50 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8f496c2e670a9..f6ce7a1fd78a3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4571,13 +4571,12 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
{
int i;
- pte_t *pte;
unsigned long start;
unsigned long end;
- unsigned long addr;
struct lru_gen_mm_walk *walk;
int young = 0;
- unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {};
+ pte_t *pte = pvmw->pte;
+ unsigned long addr = pvmw->address;
struct folio *folio = pfn_folio(pvmw->pfn);
struct mem_cgroup *memcg = folio_memcg(folio);
struct pglist_data *pgdat = folio_pgdat(folio);
@@ -4594,25 +4593,28 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
/* avoid taking the LRU lock under the PTL when possible */
walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL;
- start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start);
- end = min(pvmw->address | ~PMD_MASK, pvmw->vma->vm_end - 1) + 1;
+ start = max(addr & PMD_MASK, pvmw->vma->vm_start);
+ end = min(addr | ~PMD_MASK, pvmw->vma->vm_end - 1) + 1;
if (end - start > MIN_LRU_BATCH * PAGE_SIZE) {
- if (pvmw->address - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
+ if (addr - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
end = start + MIN_LRU_BATCH * PAGE_SIZE;
- else if (end - pvmw->address < MIN_LRU_BATCH * PAGE_SIZE / 2)
+ else if (end - addr < MIN_LRU_BATCH * PAGE_SIZE / 2)
start = end - MIN_LRU_BATCH * PAGE_SIZE;
else {
- start = pvmw->address - MIN_LRU_BATCH * PAGE_SIZE / 2;
- end = pvmw->address + MIN_LRU_BATCH * PAGE_SIZE / 2;
+ start = addr - MIN_LRU_BATCH * PAGE_SIZE / 2;
+ end = addr + MIN_LRU_BATCH * PAGE_SIZE / 2;
}
}
- pte = pvmw->pte - (pvmw->address - start) / PAGE_SIZE;
+ /* folio_update_gen() requires stable folio_memcg() */
+ if (!mem_cgroup_trylock_pages(memcg))
+ return;
- rcu_read_lock();
arch_enter_lazy_mmu_mode();
+ pte -= (addr - start) / PAGE_SIZE;
+
for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) {
unsigned long pfn;
@@ -4637,56 +4639,27 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
!folio_test_swapcache(folio)))
folio_mark_dirty(folio);
+ if (walk) {
+ old_gen = folio_update_gen(folio, new_gen);
+ if (old_gen >= 0 && old_gen != new_gen)
+ update_batch_size(walk, folio, old_gen, new_gen);
+
+ continue;
+ }
+
old_gen = folio_lru_gen(folio);
if (old_gen < 0)
folio_set_referenced(folio);
else if (old_gen != new_gen)
- __set_bit(i, bitmap);
+ folio_activate(folio);
}
arch_leave_lazy_mmu_mode();
- rcu_read_unlock();
+ mem_cgroup_unlock_pages();
/* feedback from rmap walkers to page table walkers */
if (suitable_to_scan(i, young))
update_bloom_filter(lruvec, max_seq, pvmw->pmd);
-
- if (!walk && bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) {
- for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
- folio = pfn_folio(pte_pfn(pte[i]));
- folio_activate(folio);
- }
- return;
- }
-
- /* folio_update_gen() requires stable folio_memcg() */
- if (!mem_cgroup_trylock_pages(memcg))
- return;
-
- if (!walk) {
- spin_lock_irq(&lruvec->lru_lock);
- new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq);
- }
-
- for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
- folio = pfn_folio(pte_pfn(pte[i]));
- if (folio_memcg_rcu(folio) != memcg)
- continue;
-
- old_gen = folio_update_gen(folio, new_gen);
- if (old_gen < 0 || old_gen == new_gen)
- continue;
-
- if (walk)
- update_batch_size(walk, folio, old_gen, new_gen);
- else
- lru_gen_update_size(lruvec, folio, old_gen, new_gen);
- }
-
- if (!walk)
- spin_unlock_irq(&lruvec->lru_lock);
-
- mem_cgroup_unlock_pages();
}
/******************************************************************************
--
2.40.1