-
Notifications
You must be signed in to change notification settings - Fork 59
/
0132-prezero-20220308.patch
817 lines (770 loc) · 23.8 KB
/
0132-prezero-20220308.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Dave Hansen <[email protected]>
Date: Mon, 7 Mar 2022 11:59:20 -0800
Subject: [PATCH] prezero 20220308
---
include/linux/compaction.h | 2 +
include/linux/gfp.h | 2 +-
kernel/sysctl.c | 9 +
mm/compaction.c | 52 +++++
mm/huge_memory.c | 9 +-
mm/internal.h | 23 +-
mm/page_alloc.c | 416 +++++++++++++++++++++++++++++++++----
7 files changed, 468 insertions(+), 45 deletions(-)
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 34bce35c808d..45407237e6c9 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -82,6 +82,8 @@ static inline unsigned long compact_gap(unsigned int order)
#ifdef CONFIG_COMPACTION
extern unsigned int sysctl_compaction_proactiveness;
+extern int sysctl_zero_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *length, loff_t *ppos);
extern int sysctl_compaction_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos);
extern int compaction_proactiveness_sysctl_handler(struct ctl_table *table,
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 80f63c862be5..f49971123e2f 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -334,7 +334,7 @@ struct vm_area_struct;
#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | \
__GFP_SKIP_KASAN_POISON)
-#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
+#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | __GFP_ZERO |\
__GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 730ab56d9e92..95f4d4771470 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -94,6 +94,8 @@
#if defined(CONFIG_SYSCTL)
+extern int sysctl_zero_pages;
+
/* Constants used for minimum and maximum */
#ifdef CONFIG_PERF_EVENTS
@@ -2531,6 +2533,13 @@ static struct ctl_table vm_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = SYSCTL_FOUR,
},
+ {
+ .procname = "zero_pages",
+ .data = &sysctl_zero_pages,
+ .maxlen = sizeof(sysctl_zero_pages),
+ .mode = 0644,
+ .proc_handler = sysctl_zero_handler,
+ },
#ifdef CONFIG_COMPACTION
{
.procname = "compact_memory",
diff --git a/mm/compaction.c b/mm/compaction.c
index e9a36942c1fa..de6268560058 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -96,6 +96,16 @@ static void split_map_pages(struct list_head *list)
order = page_private(page);
nr_pages = 1 << order;
+ WARN_ON(PageBuddy(page));
+ // These pages recent came out of the buddy but
+ // they should have come via __isolate_free_page()
+ // which does del_page_from_free_list(). That
+ // should have left PageBuddy() clear.
+ // page_order() metadata was left presumably so
+ // that we could do this split and map here. It
+ // is likely no longer needed. Zap it to keep
+ // post_alloc_hook() from complaining.
+ page->private = 0;
post_alloc_hook(page, order, __GFP_MOVABLE);
if (order)
split_page(page, order);
@@ -2658,6 +2668,48 @@ static void proactive_compact_node(pg_data_t *pgdat)
}
}
+void zero_some_pages(struct zone *z, int pages);
+
+static void zero_nodes(int pages)
+{
+ int nid;
+
+ for_each_online_node(nid) {
+ pg_data_t *pgdat = NODE_DATA(nid);
+ int zoneid;
+
+ for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
+ struct zone *zone = &pgdat->node_zones[zoneid];
+ if (!populated_zone(zone))
+ continue;
+
+ zero_some_pages(zone, pages);
+ }
+ }
+}
+
+int sysctl_zero_pages;
+
+int sysctl_zero_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *length, loff_t *ppos)
+{
+ int rc;
+ int old = sysctl_zero_pages;
+
+ rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
+ if (rc)
+ return rc;
+
+
+
+ if (write)
+ zero_nodes(sysctl_zero_pages);
+
+ return 0;
+}
+
+
+
/* Compact all zones within a node */
static void compact_node(int nid)
{
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fb9163691705..ba8c298784c1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -617,7 +617,12 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
goto release;
}
- clear_huge_page(page, vmf->address, HPAGE_PMD_NR);
+ // move the zeroing to use __GFP_ZERO in
+ // the allocator. Clearing here has the advantage of not
+ // wasting the clear operation if the cgroup charge or
+ // page table allocation fails.
+ //
+ //clear_huge_page(page, vmf->address, HPAGE_PMD_NR);
/*
* The memory barrier inside __SetPageUptodate makes sure that
* clear_huge_page writes become visible before the set_pmd_at()
@@ -774,7 +779,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
return ret;
}
gfp = vma_thp_gfp_mask(vma);
- page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
+ page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
if (unlikely(!page)) {
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
diff --git a/mm/internal.h b/mm/internal.h
index d80300392a19..d6c7c26fe598 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -328,6 +328,22 @@ isolate_migratepages_range(struct compact_control *cc,
int find_suitable_fallback(struct free_area *area, unsigned int order,
int migratetype, bool only_stealable, bool *can_steal);
+/*
+ * Use the bit above the highest-possible buddy page
+ * order (MAX_ORDER-1).
+ */
+#define BUDDY_ZEROED (1UL << (ilog2(MAX_ORDER-1)+1))
+static inline unsigned int __buddy_order(struct page *page, bool unsafe)
+{
+ unsigned int ret;
+ if (unsafe)
+ ret = READ_ONCE(page_private(page));
+ else
+ ret = page_private(page);
+
+ return ret & ~BUDDY_ZEROED;
+}
+
/*
* This function returns the order of a free page in the buddy system. In
* general, page_zone(page)->lock must be held by the caller to prevent the
@@ -339,7 +355,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
static inline unsigned int buddy_order(struct page *page)
{
/* PageBuddy() must be checked by the caller */
- return page_private(page);
+ return __buddy_order(page, false);
}
/*
@@ -353,7 +369,10 @@ static inline unsigned int buddy_order(struct page *page)
* times, potentially observing different values in the tests and the actual
* use of the result.
*/
-#define buddy_order_unsafe(page) READ_ONCE(page_private(page))
+static inline unsigned int buddy_order_unsafe(struct page *page)
+{
+ return __buddy_order(page, true);
+}
/*
* These three helpers classifies VMAs for virtual memory accounting.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0ea48434ac7d..bddadb46bf78 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -15,6 +15,7 @@
* (lots of bits borrowed from Ingo Molnar & Andrew Morton)
*/
+#include <linux/debugfs.h>
#include <linux/stddef.h>
#include <linux/mm.h>
#include <linux/highmem.h>
@@ -758,6 +759,26 @@ void prep_compound_page(struct page *page, unsigned int order)
prep_compound_head(page, order);
}
+enum zero_state {
+ NOT_ZEROED,
+ PRE_ZEROED
+};
+
+static enum zero_state pre_zeroed(struct page *page)
+{
+ if (page_private(page) & BUDDY_ZEROED)
+ return PRE_ZEROED;
+ return NOT_ZEROED;
+}
+
+static void set_buddy_private(struct page *page, unsigned long value)
+{
+ WARN_ON(!PageBuddy(page));
+
+
+ set_page_private(page, value);
+}
+
#ifdef CONFIG_DEBUG_PAGEALLOC
unsigned int _debug_guardpage_minorder;
@@ -800,7 +821,7 @@ static inline bool set_page_guard(struct zone *zone, struct page *page,
__SetPageGuard(page);
INIT_LIST_HEAD(&page->lru);
- set_page_private(page, order);
+ set_buddy_private(page, order);
/* Guard pages are not available for any usage */
__mod_zone_freepage_state(zone, -(1 << order), migratetype);
@@ -815,7 +836,7 @@ static inline void clear_page_guard(struct zone *zone, struct page *page,
__ClearPageGuard(page);
- set_page_private(page, 0);
+ set_buddy_private(page, 0);
if (!is_migrate_isolate(migratetype))
__mod_zone_freepage_state(zone, (1 << order), migratetype);
}
@@ -880,12 +901,80 @@ void init_mem_debugging_and_hardening(void)
#endif
}
-static inline void set_buddy_order(struct page *page, unsigned int order)
+u64 prezero_really_skip = 1;
+u64 prezero_counter = 0;
+u64 prezero_could_have_skipped = 0;
+u64 prezero_check_zero_highpage = 0;
+u64 prezero_buddy_sane_checks = 0;
+u64 prezero_buddy_order = 9;
+static int prezero_debugfs(void)
{
- set_page_private(page, order);
+ debugfs_create_u64("prezero_really_skip", 0644, NULL, &prezero_really_skip);
+ debugfs_create_u64("prezero_counter", 0644, NULL, &prezero_counter);
+ debugfs_create_u64("prezero_check_zero_highpage", 0644, NULL, &prezero_check_zero_highpage);
+ debugfs_create_u64("prezero_could_have_skipped", 0644, NULL, &prezero_could_have_skipped);
+ debugfs_create_u64("prezero_buddy_sane_checks", 0644, NULL, &prezero_buddy_sane_checks);
+ debugfs_create_u64("prezero_buddy_order", 0644, NULL, &prezero_buddy_order);
+
+ return 0;
+}
+late_initcall(prezero_debugfs);
+
+void check_zero_highpage(struct page *page, int order, int numpages, int line, struct page *op)
+{
+ int nr;
+
+ if (!prezero_check_zero_highpage)
+ return;
+
+
+
+ if (!memchr_inv(page_address(page), 0, PAGE_SIZE<<order))
+ return;
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHMEM));
+
+ printk("check_zero_highpage() BAD pfn=0x%lx/%d numpages: %d from line %d\n", page_to_pfn(page), order, numpages, line);
+// trace_printk("check_zero_highpage() BAD pfn=0x%lx order=%d numpages: %d from line %d\n", page_to_pfn(page), order, numpages, line);
+// trace_printk("check_zero_highpage() real pfn=0x%lx\n", page_to_pfn(op));
+// tracing_off();
+ WARN_ON(1);
+ for (nr = 0; nr < 1<<order; nr++) {
+ struct page *tmp = &page[nr];
+ if (PageBuddy(tmp))
+ printk("page[0x%x] had PageBuddy pfn=0x%lx\n", nr, page_to_pfn(tmp));
+ clear_highpage(&page[nr]);
+ }
+}
+
+/*
+ * Only use this for pages which are new to the buddy allocator.
+ * They should not yet have PageBuddy() set.
+ */
+static inline void mark_new_buddy(struct page *page, unsigned int order,
+ enum zero_state zero)
+{
+ unsigned long private = order;
+
+ WARN_ON(PageBuddy(page));
+
+ if (zero == PRE_ZEROED) {
+ private |= BUDDY_ZEROED;
+ check_zero_highpage(page, order, 1<<order, __LINE__, page);
+ }
+
__SetPageBuddy(page);
+ set_buddy_private(page, private);
}
+/*
+static inline void change_buddy_order(struct page *page, unsigned int order)
+{
+ WARN_ON(!PageBuddy(page));
+ __SetPageBuddy(page);
+ set_page_private(page, order);
+}
+*/
+
/*
* This function checks whether a page is free && is the buddy
* we can coalesce a page and its buddy if
@@ -970,12 +1059,16 @@ compaction_capture(struct capture_control *capc, struct page *page,
}
#endif /* CONFIG_COMPACTION */
+#define list_check_buddy_is_sane(p, o) __list_check_buddy_is_sane(p, o, __LINE__)
+void __list_check_buddy_is_sane(struct page *page, int order, int line);
+
/* Used for pages not on another list */
static inline void add_to_free_list(struct page *page, struct zone *zone,
unsigned int order, int migratetype)
{
struct free_area *area = &zone->free_area[order];
+ list_check_buddy_is_sane(page, order);
list_add(&page->lru, &area->free_list[migratetype]);
area->nr_free++;
}
@@ -986,6 +1079,7 @@ static inline void add_to_free_list_tail(struct page *page, struct zone *zone,
{
struct free_area *area = &zone->free_area[order];
+ list_check_buddy_is_sane(page, order);
list_add_tail(&page->lru, &area->free_list[migratetype]);
area->nr_free++;
}
@@ -1000,6 +1094,7 @@ static inline void move_to_free_list(struct page *page, struct zone *zone,
{
struct free_area *area = &zone->free_area[order];
+ list_check_buddy_is_sane(page, order);
list_move_tail(&page->lru, &area->free_list[migratetype]);
}
@@ -1011,11 +1106,117 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone,
__ClearPageReported(page);
list_del(&page->lru);
+ set_buddy_private(page, 0);
__ClearPageBuddy(page);
- set_page_private(page, 0);
zone->free_area[order].nr_free--;
}
+bool __zero_one_page(struct zone *zone, int order)
+{
+ struct page *page;
+ int numpages = 1<<order;
+ int i;
+ int migratetype = MIGRATE_RECLAIMABLE;
+ struct free_area *area;
+ bool did_zero = false;
+ int got_mt;
+ int order_orig;
+
+ spin_lock(&zone->lock);
+ /* mostly ripped from __rmqueue_smallest() */
+ area = &(zone->free_area[order]);
+
+ /* Look for a page to zero in all migratetypes: */
+ while (migratetype >= 0) {
+ struct list_head *lh = &area->free_list[migratetype];
+ page = get_page_from_free_area(area, migratetype);
+ got_mt = migratetype;
+
+
+
+
+
+
+ /* Was a page located that needs to be zeroed? */
+ if (page && (pre_zeroed(page) == NOT_ZEROED))
+ break;
+
+ /* No page was found to zero. Try another migratetype. */
+ page = NULL;
+ migratetype--;
+ }
+ if (!page) {
+ spin_unlock(&zone->lock);
+ return did_zero;
+ }
+
+ order_orig = buddy_order(page);
+
+ del_page_from_free_list(page, zone, order);
+ spin_unlock(&zone->lock);
+
+ did_zero = true;
+ for (i = 0; i < numpages; i++) {
+ clear_highpage(page + i);
+ }
+
+ spin_lock(&zone->lock);
+ {
+ int pz_before = pre_zeroed(page);
+ int order_before = buddy_order(page);
+ int pz_after;
+ int order_after;
+
+ mark_new_buddy(page, order, PRE_ZEROED);
+ pz_after = pre_zeroed(page);
+ order_after = buddy_order(page);
+
+
+
+ }
+ add_to_free_list_tail(page, zone, order, migratetype);
+ //did_some_prezeroing = 1;
+ check_zero_highpage(page , order, 1<<order, __LINE__, page);
+ spin_unlock(&zone->lock);
+ return did_zero;
+}
+
+
+int zero_pages(struct zone *zone, int order, int do_count)
+{
+ int count = 0;
+
+ while (__zero_one_page(zone, order)) {
+ cond_resched();
+ count++;
+ // arbitrary limit to keep this from
+ // taking insane amounts of time:
+ if (count >= do_count)
+ break;
+ }
+
+
+
+
+
+
+ return count;
+}
+
+void zero_some_pages(struct zone *zone, int pages)
+{
+ int order;
+ long zero_count = 0;
+
+ for (order = MAX_ORDER-1; order >= prezero_buddy_order; order--) {
+ long did = zero_pages(zone, order, pages);
+ zero_count += did << order;
+ if (zero_count > pages)
+ break;
+ }
+
+}
+
/*
* If this is not the largest possible page, check if the buddy
* of the next-highest order is free. If it is, it's possible
@@ -1141,7 +1342,8 @@ static inline void __free_one_page(struct page *page,
}
done_merging:
- set_buddy_order(page, order);
+ list_check_buddy_is_sane(page, order);
+ mark_new_buddy(page, order, NOT_ZEROED);
if (fpi_flags & FPI_TO_TAIL)
to_tail = true;
@@ -1285,8 +1487,20 @@ static void kernel_init_free_pages(struct page *page, int numpages, bool zero_ta
kasan_disable_current();
for (i = 0; i < numpages; i++) {
u8 tag = page_kasan_tag(page + i);
+ bool need_to_zero = true;
+
page_kasan_tag_reset(page + i);
- clear_highpage(page + i);
+ if (pre_zeroed(page) == PRE_ZEROED) {
+ check_zero_highpage(page, ilog2(numpages), numpages, __LINE__, page);
+
+ if (prezero_really_skip)
+ need_to_zero = false;
+ prezero_could_have_skipped++;
+ }
+ if (need_to_zero)
+ clear_highpage(page + i);
+ else
+ prezero_counter++;
page_kasan_tag_set(page + i, tag);
}
kasan_enable_current();
@@ -1329,6 +1543,11 @@ static __always_inline bool free_pages_prepare(struct page *page,
ClearPageHasHWPoisoned(page);
}
for (i = 1; i < (1 << order); i++) {
+ /*
+ * This will leave BUDDY_ZEROED in place
+ * in tail pages. It should get cleared
+ * up before anyone notices in expand().
+ */
if (compound)
bad += free_tail_pages_check(page, page + i);
if (unlikely(check_free_page(page + i))) {
@@ -1393,44 +1612,58 @@ static __always_inline bool free_pages_prepare(struct page *page,
return true;
}
-#ifdef CONFIG_DEBUG_VM
/*
- * With DEBUG_VM enabled, order-0 pages are checked immediately when being freed
- * to pcp lists. With debug_pagealloc also enabled, they are also rechecked when
- * moved from pcp lists to free lists.
+ * Is extra page-free-time debugging needed? Returning true here will wreck
+ * performance, but add extra sanity checks to pages at free time. Only
+ * turn on when debugging.
*/
-static bool free_pcp_prepare(struct page *page, unsigned int order)
+static inline bool extra_debug_free(void)
{
- return free_pages_prepare(page, order, true, FPI_NONE);
+ return IS_ENABLED(CONFIG_DEBUG_VM) || debug_pagealloc_enabled_static();
}
-static bool bulkfree_pcp_prepare(struct page *page)
-{
- if (debug_pagealloc_enabled_static())
- return check_free_page(page);
- else
- return false;
-}
-#else
/*
- * With DEBUG_VM disabled, order-0 pages being freed are checked only when
- * moving from pcp lists to free list in order to reduce overhead. With
- * debug_pagealloc enabled, they are checked also immediately when being freed
- * to the pcp lists.
+ * Called when pages are freed into the allocaor but before being added to the
+ * pcp lists. Only do free page checking when some form of debugging is on to
+ * reduce overhead.
*/
static bool free_pcp_prepare(struct page *page, unsigned int order)
{
- if (debug_pagealloc_enabled_static())
- return free_pages_prepare(page, order, true, FPI_NONE);
- else
- return free_pages_prepare(page, order, false, FPI_NONE);
+
+ page->private = 0;
+
+
+ return free_pages_prepare(page, order, extra_debug_free(), FPI_NONE);
}
-static bool bulkfree_pcp_prepare(struct page *page)
+/*
+ * Called when pages are moved from the pcp lists to the main buddy free lists.
+ *
+ * These pages should have been checked when they were initially freed into the
+ * allocator via free_pcp_prepare(). Check them again if one the extra free
+ * debugging checks are on.
+ */
+static bool bulkfree_pcp_prepare(struct page *page, int order)
{
- return check_free_page(page);
+ unsigned long private = page->private;
+
+
+ /*
+ * Only BUDDY_ZEROED should be set in page->private at
+ * this point. If any other bit is set, we have uno
+ * problemo.
+ */
+ if ((private & ~BUDDY_ZEROED) && printk_ratelimit()) {
+ printk("%s()::%d %lx\n", __func__, __LINE__, page->private);
+ page->private = 0;
+
+ }
+
+ if (extra_debug_free())
+ return check_free_page(page);
+ else
+ return false;
}
-#endif /* CONFIG_DEBUG_VM */
static inline void prefetch_buddy(struct page *page)
{
@@ -1493,7 +1726,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
nr_freed += 1 << order;
count -= 1 << order;
- if (bulkfree_pcp_prepare(page))
+ if (bulkfree_pcp_prepare(page, order))
continue;
/* Encode order with the migratetype */
@@ -2294,7 +2527,7 @@ void __init init_cma_reserved_pageblock(struct page *page)
* -- nyc
*/
static inline void expand(struct zone *zone, struct page *page,
- int low, int high, int migratetype)
+ int low, int high, int migratetype, enum zero_state page_prezeroed)
{
unsigned long size = 1 << high;
@@ -2312,8 +2545,8 @@ static inline void expand(struct zone *zone, struct page *page,
if (set_page_guard(zone, &page[size], high, migratetype))
continue;
+ mark_new_buddy(&page[size], high, page_prezeroed);
add_to_free_list(&page[size], zone, high, migratetype);
- set_buddy_order(&page[size], high);
}
}
@@ -2392,10 +2625,19 @@ static bool check_new_pages(struct page *page, unsigned int order)
return false;
}
-inline void post_alloc_hook(struct page *page, unsigned int order,
+noinline void post_alloc_hook(struct page *page, unsigned int order,
gfp_t gfp_flags)
{
- set_page_private(page, 0);
+ if ((page->private & ~BUDDY_ZEROED) && printk_ratelimit()) {
+ printk("%s()::%d BAD page private: priv=%lx\n", __func__, __LINE__, page->private);
+ page->private = 0;
+ /*
+ * PageBuddy() is clear. This trips the
+ * PageBuddy check in set_buddy_private().
+ */
+ //set_buddy_private(page, 0);
+ dump_stack();
+ }
set_page_refcounted(page);
arch_alloc_page(page, order);
@@ -2428,7 +2670,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
page_table_check_alloc(page, order);
}
-static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
+static noinline void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
unsigned int alloc_flags)
{
post_alloc_hook(page, order, gfp_flags);
@@ -2462,13 +2704,30 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
/* Find a page of the appropriate size in the preferred list */
for (current_order = order; current_order < MAX_ORDER; ++current_order) {
+ enum zero_state page_pz;
area = &(zone->free_area[current_order]);
page = get_page_from_free_area(area, migratetype);
if (!page)
continue;
+ /* stash this away before del_page_from_free_list() zaps it: */
+ page_pz = pre_zeroed(page);
+
del_page_from_free_list(page, zone, current_order);
- expand(zone, page, order, current_order, migratetype);
+ expand(zone, page, order, current_order, migratetype, page_pz);
set_pcppage_migratetype(page, migratetype);
+ /*
+ * This is a bit of a kludge. The state was zapped above
+ * and is restored here. We should probably
+ * think about if del_page_from_free_list()
+ * leaves BUDDY_ZEROED in place and what the
+ * implications are.
+ *
+ * Without this, pages leaving the buddy always
+ * have page->private=0.
+ */
+ if (page_pz == PRE_ZEROED) {
+ page->private = BUDDY_ZEROED;
+ }
return page;
}
@@ -9490,7 +9749,9 @@ static void break_down_buddy_pages(struct zone *zone, struct page *page,
if (current_buddy != target) {
add_to_free_list(current_buddy, zone, high, migratetype);
- set_buddy_order(current_buddy, high);
+ // This is very rare. Do not bother
+ // trying to preserve zero state:
+ mark_new_buddy(current_buddy, high, NOT_ZEROED);
page = next_page;
}
}
@@ -9573,3 +9834,78 @@ bool has_managed_dma(void)
return false;
}
#endif /* CONFIG_ZONE_DMA */
+
+void __list_check_buddy_low_orders(struct page *page, int order, int line)
+{
+ int nr_pages = 1 << order;
+ int i;
+
+ for (i = 1; i < nr_pages; i++) {
+ struct page *child = &page[i];
+ unsigned long child_pfn = page_to_pfn(child);
+ unsigned long pfn = page_to_pfn(page);
+ if (!PageBuddy(child))
+ continue;
+
+ printk("bad low order: %d pfns: 0x%lx 0x%lx buddy: %d/%d line=%d bo=%d\n",
+ order, pfn, child_pfn,
+ PageBuddy(page),
+ PageBuddy(child),
+ line, buddy_order(child));
+ }
+}
+
+void __list_check_buddy_high_orders(struct page *page, int order, int line)
+{
+ unsigned long pfn = page_to_pfn(page);
+
+ // Highest-order buddy pages (MAX_ORDER-1) are not
+ // merged together and can be on lists together
+ if (order >= MAX_ORDER-1)
+ return;
+
+ while (order < MAX_ORDER-1) {
+ unsigned long buddy_pfn = __find_buddy_pfn(pfn, order);
+ struct page *buddy = pfn_to_page(buddy_pfn);
+ bool bad;
+
+ // not in the buddy, don't care
+ if (!PageBuddy(buddy))
+ goto next;
+
+ // starts after me, can't possible overlap, don't care
+ if (buddy_pfn >= pfn + (1<<order))
+ goto next;
+
+ // Starts before me. Does it cover me?
+ if (buddy_pfn + (1<<buddy_order(buddy)) <= pfn)
+ goto next;
+
+ bad = 1;
+ if (bad) {
+ printk("bad high order: %d pfns: 0x%lx 0x%lx buddy: %d/%d pib=%d line=%d bo=%d bad=%d\n",
+ order, pfn, buddy_pfn, PageBuddy(page),
+ PageBuddy(buddy),
+ page_is_buddy(page, buddy, order),
+ line,
+ buddy_order(buddy),
+ bad);
+ //WARN_ON(1);
+ }
+
+ // combine the PFNs to "move up" one order:
+ pfn = buddy_pfn & pfn;
+ page = pfn_to_page(pfn);
+ next:
+ order++;
+ }
+}
+
+
+void __list_check_buddy_is_sane(struct page *page, int order, int line)
+{
+ if (!prezero_buddy_sane_checks)
+ return;
+ __list_check_buddy_high_orders(page, order, line);
+ __list_check_buddy_low_orders(page, order, line);
+}
--
https://clearlinux.org