Sync up
This commit is contained in:
parent
f059736e2d
commit
905f90200e
4 changed files with 1149 additions and 1 deletions
275
scratch/01-mm_struct.md
Normal file
275
scratch/01-mm_struct.md
Normal file
|
|
@ -0,0 +1,275 @@
|
|||
# `#include <linux/mm_types.h>`
|
||||
> **TODO**
|
||||
> - `do_user_addr_fault`
|
||||
> - `handle_mm_fault`
|
||||
```c
|
||||
// From v6.7-rc6
|
||||
struct mm_struct {
|
||||
struct {
|
||||
/*
|
||||
* Fields which are often written to are placed in a separate
|
||||
* cache line.
|
||||
*/
|
||||
// Zk.
|
||||
// -- that is, this is a commonly-modified field which is best stored
|
||||
// within a separate cache line.
|
||||
struct {
|
||||
/**
|
||||
* @mm_count: The number of references to &struct
|
||||
* mm_struct (@mm_users count as 1).
|
||||
*
|
||||
* Use mmgrab()/mmdrop() to modify. When this drops to
|
||||
* 0, the &struct mm_struct is freed.
|
||||
*/
|
||||
// Zk.
|
||||
// mmgrab() -- Pin a &struct mm_struct for a longer/unbounded amnt. of time.
|
||||
// mmdrop() -- Undo above.
|
||||
atomic_t mm_count;
|
||||
} ____cacheline_aligned_in_smp; // Zk. -- eq. to `__aligned__(64)` for x86
|
||||
|
||||
// Zk.
|
||||
// Maple tree that stores VMA (Virtual Memory Area) -- of which each
|
||||
// mm_struct will have multiple (see /proc/[0-9]*/maps). They corresp.
|
||||
// to when userspace calls `mmap`.
|
||||
struct maple_tree mm_mt;
|
||||
#ifdef CONFIG_MMU
|
||||
unsigned long (*get_unmapped_area) (struct file *filp,
|
||||
unsigned long addr, unsigned long len,
|
||||
unsigned long pgoff, unsigned long flags);
|
||||
#endif
|
||||
unsigned long mmap_base; /* base of mmap area */
|
||||
unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */
|
||||
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
|
||||
/* Base addresses for compatible mmap() */
|
||||
unsigned long mmap_compat_base;
|
||||
unsigned long mmap_compat_legacy_base;
|
||||
#endif
|
||||
unsigned long task_size; /* size of task vm space */
|
||||
|
||||
// Zk.
|
||||
// Global page table
|
||||
pgd_t * pgd;
|
||||
|
||||
#ifdef CONFIG_MEMBARRIER
|
||||
/**
|
||||
* @membarrier_state: Flags controlling membarrier behavior.
|
||||
*
|
||||
* This field is close to @pgd to hopefully fit in the same
|
||||
* cache-line, which needs to be touched by switch_mm().
|
||||
*/
|
||||
atomic_t membarrier_state;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @mm_users: The number of users including userspace.
|
||||
*
|
||||
* Use mmget()/mmget_not_zero()/mmput() to modify. When this
|
||||
* drops to 0 (i.e. when the task exits and there are no other
|
||||
* temporary reference holders), we also release a reference on
|
||||
* @mm_count (which may then free the &struct mm_struct if
|
||||
* @mm_count also drops to 0).
|
||||
*/
|
||||
atomic_t mm_users;
|
||||
|
||||
#ifdef CONFIG_SCHED_MM_CID
|
||||
/**
|
||||
* @pcpu_cid: Per-cpu current cid.
|
||||
*
|
||||
* Keep track of the currently allocated mm_cid for each cpu.
|
||||
* The per-cpu mm_cid values are serialized by their respective
|
||||
* runqueue locks.
|
||||
*/
|
||||
struct mm_cid __percpu *pcpu_cid;
|
||||
/*
|
||||
* @mm_cid_next_scan: Next mm_cid scan (in jiffies).
|
||||
*
|
||||
* When the next mm_cid scan is due (in jiffies).
|
||||
*/
|
||||
unsigned long mm_cid_next_scan;
|
||||
#endif
|
||||
#ifdef CONFIG_MMU
|
||||
atomic_long_t pgtables_bytes; /* size of all page tables */
|
||||
#endif
|
||||
int map_count; /* number of VMAs */
|
||||
|
||||
spinlock_t page_table_lock; /* Protects page tables and some
|
||||
* counters
|
||||
*/
|
||||
/*
|
||||
* With some kernel config, the current mmap_lock's offset
|
||||
* inside 'mm_struct' is at 0x120, which is very optimal, as
|
||||
* its two hot fields 'count' and 'owner' sit in 2 different
|
||||
* cachelines, and when mmap_lock is highly contended, both
|
||||
* of the 2 fields will be accessed frequently, current layout
|
||||
* will help to reduce cache bouncing.
|
||||
*
|
||||
* So please be careful with adding new fields before
|
||||
* mmap_lock, which can easily push the 2 fields into one
|
||||
* cacheline.
|
||||
*/
|
||||
struct rw_semaphore mmap_lock;
|
||||
|
||||
struct list_head mmlist; /* List of maybe swapped mm's. These
|
||||
* are globally strung together off
|
||||
* init_mm.mmlist, and are protected
|
||||
* by mmlist_lock
|
||||
*/
|
||||
#ifdef CONFIG_PER_VMA_LOCK
|
||||
/*
|
||||
* This field has lock-like semantics, meaning it is sometimes
|
||||
* accessed with ACQUIRE/RELEASE semantics.
|
||||
* Roughly speaking, incrementing the sequence number is
|
||||
* equivalent to releasing locks on VMAs; reading the sequence
|
||||
* number can be part of taking a read lock on a VMA.
|
||||
*
|
||||
* Can be modified under write mmap_lock using RELEASE
|
||||
* semantics.
|
||||
* Can be read with no other protection when holding write
|
||||
* mmap_lock.
|
||||
* Can be read with ACQUIRE semantics if not holding write
|
||||
* mmap_lock.
|
||||
*/
|
||||
int mm_lock_seq;
|
||||
#endif
|
||||
|
||||
|
||||
unsigned long hiwater_rss; /* High-watermark of RSS usage */
|
||||
unsigned long hiwater_vm; /* High-water virtual memory usage */
|
||||
|
||||
unsigned long total_vm; /* Total pages mapped */
|
||||
unsigned long locked_vm; /* Pages that have PG_mlocked set */
|
||||
atomic64_t pinned_vm; /* Refcount permanently increased */
|
||||
unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
|
||||
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
|
||||
unsigned long stack_vm; /* VM_STACK */
|
||||
unsigned long def_flags;
|
||||
|
||||
/**
|
||||
* @write_protect_seq: Locked when any thread is write
|
||||
* protecting pages mapped by this mm to enforce a later COW,
|
||||
* for instance during page table copying for fork().
|
||||
*/
|
||||
seqcount_t write_protect_seq;
|
||||
|
||||
spinlock_t arg_lock; /* protect the below fields */
|
||||
|
||||
unsigned long start_code, end_code, start_data, end_data;
|
||||
unsigned long start_brk, brk, start_stack;
|
||||
unsigned long arg_start, arg_end, env_start, env_end;
|
||||
|
||||
unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
|
||||
|
||||
struct percpu_counter rss_stat[NR_MM_COUNTERS];
|
||||
|
||||
struct linux_binfmt *binfmt;
|
||||
|
||||
/* Architecture-specific MM context */
|
||||
mm_context_t context;
|
||||
|
||||
unsigned long flags; /* Must use atomic bitops to access */
|
||||
|
||||
#ifdef CONFIG_AIO
|
||||
spinlock_t ioctx_lock;
|
||||
struct kioctx_table __rcu *ioctx_table;
|
||||
#endif
|
||||
#ifdef CONFIG_MEMCG
|
||||
/*
|
||||
* "owner" points to a task that is regarded as the canonical
|
||||
* user/owner of this mm. All of the following must be true in
|
||||
* order for it to be changed:
|
||||
*
|
||||
* current == mm->owner
|
||||
* current->mm != mm
|
||||
* new_owner->mm == mm
|
||||
* new_owner->alloc_lock is held
|
||||
*/
|
||||
struct task_struct __rcu *owner;
|
||||
#endif
|
||||
struct user_namespace *user_ns;
|
||||
|
||||
/* store ref to file /proc/<pid>/exe symlink points to */
|
||||
struct file __rcu *exe_file;
|
||||
#ifdef CONFIG_MMU_NOTIFIER
|
||||
struct mmu_notifier_subscriptions *notifier_subscriptions;
|
||||
#endif
|
||||
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
|
||||
pgtable_t pmd_huge_pte; /* protected by page_table_lock */
|
||||
#endif
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
/*
|
||||
* numa_next_scan is the next time that PTEs will be remapped
|
||||
* PROT_NONE to trigger NUMA hinting faults; such faults gather
|
||||
* statistics and migrate pages to new nodes if necessary.
|
||||
*/
|
||||
unsigned long numa_next_scan;
|
||||
|
||||
/* Restart point for scanning and remapping PTEs. */
|
||||
unsigned long numa_scan_offset;
|
||||
|
||||
/* numa_scan_seq prevents two threads remapping PTEs. */
|
||||
int numa_scan_seq;
|
||||
#endif
|
||||
/*
|
||||
* An operation with batched TLB flushing is going on. Anything
|
||||
* that can move process memory needs to flush the TLB when
|
||||
* moving a PROT_NONE mapped page.
|
||||
*/
|
||||
atomic_t tlb_flush_pending;
|
||||
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
/* See flush_tlb_batched_pending() */
|
||||
atomic_t tlb_flush_batched;
|
||||
#endif
|
||||
struct uprobes_state uprobes_state;
|
||||
#ifdef CONFIG_PREEMPT_RT
|
||||
struct rcu_head delayed_drop;
|
||||
#endif
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
atomic_long_t hugetlb_usage;
|
||||
#endif
|
||||
struct work_struct async_put_work;
|
||||
|
||||
#ifdef CONFIG_IOMMU_SVA
|
||||
u32 pasid;
|
||||
#endif
|
||||
#ifdef CONFIG_KSM
|
||||
/*
|
||||
* Represent how many pages of this process are involved in KSM
|
||||
* merging (not including ksm_zero_pages).
|
||||
*/
|
||||
unsigned long ksm_merging_pages;
|
||||
/*
|
||||
* Represent how many pages are checked for ksm merging
|
||||
* including merged and not merged.
|
||||
*/
|
||||
unsigned long ksm_rmap_items;
|
||||
/*
|
||||
* Represent how many empty pages are merged with kernel zero
|
||||
* pages when enabling KSM use_zero_pages.
|
||||
*/
|
||||
unsigned long ksm_zero_pages;
|
||||
#endif /* CONFIG_KSM */
|
||||
#ifdef CONFIG_LRU_GEN
|
||||
struct {
|
||||
/* this mm_struct is on lru_gen_mm_list */
|
||||
struct list_head list;
|
||||
/*
|
||||
* Set when switching to this mm_struct, as a hint of
|
||||
* whether it has been used since the last time per-node
|
||||
* page table walkers cleared the corresponding bits.
|
||||
*/
|
||||
unsigned long bitmap;
|
||||
#ifdef CONFIG_MEMCG
|
||||
/* points to the memcg of "owner" above */
|
||||
struct mem_cgroup *memcg;
|
||||
#endif
|
||||
} lru_gen;
|
||||
#endif /* CONFIG_LRU_GEN */
|
||||
} __randomize_layout;
|
||||
|
||||
/*
|
||||
* The mm_cpumask needs to be at the end of mm_struct, because it
|
||||
* is dynamically sized based on nr_cpu_ids.
|
||||
*/
|
||||
unsigned long cpu_bitmap[];
|
||||
};
|
||||
```
|
||||
Loading…
Add table
Add a link
Reference in a new issue