...
This commit is contained in:
parent
6111e79686
commit
61ec001df6
3 changed files with 149 additions and 5 deletions
|
|
@ -621,3 +621,10 @@
|
||||||
pages = {1--11},
|
pages = {1--11},
|
||||||
year = {2014}
|
year = {2014}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@book{Corbet_Rubini_K-Hartman.LDD3.2005,
|
||||||
|
title={Linux device drivers},
|
||||||
|
author={Corbet, Jonathan and Rubini, Alessandro and Kroah-Hartman, Greg},
|
||||||
|
year={2005},
|
||||||
|
publisher={" O'Reilly Media, Inc."}
|
||||||
|
}
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -21,12 +21,12 @@
|
||||||
\usepackage{biblatex}
|
\usepackage{biblatex}
|
||||||
\addbibresource{mybibfile.bib}
|
\addbibresource{mybibfile.bib}
|
||||||
% <- biblatex
|
% <- biblatex
|
||||||
% -> nice definition listings
|
% -> definition & quotes
|
||||||
\usepackage{csquotes}
|
\usepackage{csquotes}
|
||||||
\usepackage{amsthm}
|
\usepackage{amsthm}
|
||||||
\theoremstyle{definition}
|
\theoremstyle{definition}
|
||||||
\newtheorem{definition}{Definition}
|
\newtheorem{definition}{Definition}
|
||||||
% <- definition
|
% <- definition & quotes
|
||||||
% -> code listing
|
% -> code listing
|
||||||
% [!] Requires external program: pypi:pygment
|
% [!] Requires external program: pypi:pygment
|
||||||
\usepackage{minted}
|
\usepackage{minted}
|
||||||
|
|
@ -364,7 +364,7 @@ Notably, kernel (driver) programming warrants programmer attention to software-m
|
||||||
However, it does not preclude CPU store reordering, so memory barriers remain necessary in a multiprocessing context.
|
However, it does not preclude CPU store reordering, so memory barriers remain necessary in a multiprocessing context.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
\item {
|
\item {\label{def:streaming-dma-map}
|
||||||
\textit{Streaming} DMA mappings:
|
\textit{Streaming} DMA mappings:
|
||||||
|
|
||||||
They provide no guarantee to coherency in-between concurrent CPU/DMA accesses. Programmers need to manually apply coherency maintenance subroutines for synchronization.
|
They provide no guarantee to coherency in-between concurrent CPU/DMA accesses. Programmers need to manually apply coherency maintenance subroutines for synchronization.
|
||||||
|
|
@ -597,7 +597,7 @@ The primary source of experimental data come from a virtualized machine: a virtu
|
||||||
\centering
|
\centering
|
||||||
\begin{tabular}{|c|c|}
|
\begin{tabular}{|c|c|}
|
||||||
\hline
|
\hline
|
||||||
Processors & AMD Ryzen 7 4800HS (8-core, 2-way SMT) \\
|
Processors & AMD Ryzen 7 4800HS (8 $\times$ 2-way SMT) \\
|
||||||
\hline
|
\hline
|
||||||
Freuqnecy & 2.9 GHz (4.2 GHz Turbo) \\
|
Freuqnecy & 2.9 GHz (4.2 GHz Turbo) \\
|
||||||
\hline
|
\hline
|
||||||
|
|
@ -670,7 +670,7 @@ In order to convert \texttt{dcache\_clean\_poc} to a traceable equivalent, a wra
|
||||||
|
|
||||||
void __dcache_clean_poc(ulong start, ulong end)
|
void __dcache_clean_poc(ulong start, ulong end)
|
||||||
{
|
{
|
||||||
dcache_clean_poc(start, end); // see $\ref{code:dcache_clean_poc}$
|
dcache_clean_poc(start, end); // see $\hyperref[code:dcache_clean_poc]{\texttt{arch/arm64/mm/cache.S}}$
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__dcache_clean_poc);
|
EXPORT_SYMBOL(__dcache_clean_poc);
|
||||||
\end{minted}
|
\end{minted}
|
||||||
|
|
@ -683,7 +683,144 @@ To simulate module-initiated cache coherence behavior over allocated kernel buff
|
||||||
\subsubsection{\texttt{my\_shmem}: Design}
|
\subsubsection{\texttt{my\_shmem}: Design}
|
||||||
The \texttt{my\_shmem} module is a utility for (lazily) allocating one or more kernel-space pages, re-mapping them into the userspace for reading/writing operations, and invoking cache-coherency operations \emph{as if} accessed via DMA on unmap.
|
The \texttt{my\_shmem} module is a utility for (lazily) allocating one or more kernel-space pages, re-mapping them into the userspace for reading/writing operations, and invoking cache-coherency operations \emph{as if} accessed via DMA on unmap.
|
||||||
|
|
||||||
|
To emulate \hyperref[def:streaming-dma-map]{streaming DMA mapping} allocation, the module is designed to allocate memory directly from the \textit{page allocator}, as required by the kernel documentation's guideline, \textit{What Memory is DMA'able?}\cite{Miller_Henderson_Jelinek.Kernelv6.7-DMA_guide.2024}:
|
||||||
|
\begin{displayquote}
|
||||||
|
If you acquired your memory via the page allocator (i.e. \texttt{\_\_get\_free\_page*()}) or the generic memory allocators (i.e. \texttt{kmalloc()} or \texttt{kmem\_cache\_alloc()}) then you may DMA to/from that memory using the addresses returned from those routines.
|
||||||
|
\end{displayquote}
|
||||||
|
|
||||||
|
To enable page sharing between user-space processes, the module implements a allocation accounting mechanism for re-mapping existing allocations to multiple user-space address spaces on-demand. Specifically, it involves:
|
||||||
|
\begin{itemize}
|
||||||
|
\item {
|
||||||
|
Allocation of contiguous pages to some user-specified order (i.e., $2^{order}$ pages).
|
||||||
|
}
|
||||||
|
\item {
|
||||||
|
Correct re-mapping behavior of existing allocations, for example computing the correct offset when re-mapping a multi-page allocation during any given page-fault, which may not be aligned with the first page in the allocation.
|
||||||
|
}
|
||||||
|
\item {
|
||||||
|
Software cache coherency maintenance on removal of mapping from any user-space program. This is intended to simulate the behavior of DMA API in a system without any specific DMA hardware.
|
||||||
|
}
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
The module should hence support userspace programs to be able to perform as follows:
|
||||||
|
\begin{enumerate}
|
||||||
|
\item {
|
||||||
|
Open the ``device'' file as exposed by the kernel module.
|
||||||
|
}
|
||||||
|
\item {
|
||||||
|
\texttt{mmap} on the opened file descriptor, as per POSIX syscall API.
|
||||||
|
}
|
||||||
|
\item {
|
||||||
|
Allocate memory due to load/store actions within the \texttt{mmap}-ed memory mapping.
|
||||||
|
}
|
||||||
|
\item {
|
||||||
|
Close the memory mapping, which initiates a simulated software cache coherency maintenance operation.
|
||||||
|
}
|
||||||
|
\end{enumerate}
|
||||||
|
|
||||||
\subsubsection{\texttt{my\_shmem}: Implementation}
|
\subsubsection{\texttt{my\_shmem}: Implementation}
|
||||||
|
To implement the features as specified, \texttt{my\_shmem} exposes itself as a character device file \texttt{/dev/my\_shmem}; implements \textit{file operations} \texttt{open}, \texttt{mmap}, and \texttt{release}; and implements \textit{vm operations} \texttt{close} and \texttt{fault}.
|
||||||
|
|
||||||
|
Additionally, the parameter \texttt{max\_contiguous\_alloc\_order} is exposed as a writable parameter file inside \textit{sysfs} to manually control the number of contiguous pages allocated per module allocation.
|
||||||
|
|
||||||
|
\paragraph{Static Data} \dots
|
||||||
|
|
||||||
|
\paragraph{File Operations}
|
||||||
|
The Linux kernel defines \textit{file operations} as a series of module-specific callbacks whenever the userspace invokes a corresponding syscall on the (character) device file. These callbacks may be declared inside a \texttt{file\_operations} struct\cite{Corbet_Rubini_K-Hartman.LDD3.2005}, which provides an interface for modules on file-related syscalls:
|
||||||
|
\begin{minted}[linenos, bgcolor=code-bg, mathescape]{c}
|
||||||
|
/* In include/linux/fs.h */
|
||||||
|
struct file_operations {
|
||||||
|
struct module *owner;
|
||||||
|
/* ... */
|
||||||
|
int (*mmap) (
|
||||||
|
struct file *, // opened (device) file
|
||||||
|
struct vm_area_struct * // kernel repr of mapping
|
||||||
|
); // Downstream of syscall: mmap
|
||||||
|
/* ... */
|
||||||
|
int (*open) (
|
||||||
|
struct inode *, // inode of file to be opened
|
||||||
|
struct file * // opened (generic) file
|
||||||
|
); // Downstream of libc: open
|
||||||
|
/* ... */
|
||||||
|
int (*release) (
|
||||||
|
struct inode *, // inode of file to be closed
|
||||||
|
struct file * // to be closed
|
||||||
|
); // Downstream of libc: close
|
||||||
|
/* ... */
|
||||||
|
} __randomize_layout;
|
||||||
|
\end{minted}
|
||||||
|
|
||||||
|
The corresponding structure for the particular module is hence defined as follows:
|
||||||
|
\begin{minted}[linenos, bgcolor=code-bg, mathescape]{c}
|
||||||
|
/* In my_shmem.c */
|
||||||
|
static const struct file_operations my_shmem_fops = {
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
.open = my_shmem_fops_open,
|
||||||
|
.mmap = my_shmem_fops_mmap,
|
||||||
|
.release = my_shmem_fops_release,
|
||||||
|
};
|
||||||
|
\end{minted}
|
||||||
|
|
||||||
|
Implementation of \texttt{.open} is simple. It suffices to install the module-specific \texttt{struct file\_operations} (i.e., \texttt{my\_shmem\_fops}) into the \texttt{struct file} passed in argument, which is constructed downstream via kernel's generic file opening mechanisms.
|
||||||
|
|
||||||
|
Likewise for \texttt{.release}, which does nothing except to print a debug message into the kernel ring buffer.
|
||||||
|
|
||||||
|
To implement \texttt{.mmap}, the kernel module attempts to \emph{re-map as much allocations into the given \texttt{struct vm\_area\_struct} as possible without making any allocation}. This centralizes allocation logic into the page fault handler, which is described later in \textcolor{red}{???}:
|
||||||
|
\begin{minted}[linenos, bgcolor=code-bg, mathescape]{c}
|
||||||
|
static int my_shmem_fops_mmap(
|
||||||
|
struct file *filp,
|
||||||
|
struct vm_area_struct *vma
|
||||||
|
) {
|
||||||
|
int ret = 0;
|
||||||
|
const ulong vma_pg_count =
|
||||||
|
(vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
|
||||||
|
struct page *pg;
|
||||||
|
ulong tgt_addr = vma->vm_start; // Current remap target addr
|
||||||
|
ulong src_head_pfn; // Current remap source: head PFN
|
||||||
|
ulong src_pg_nr; // Current remap source: length
|
||||||
|
ulong vma_remainder_count = vma_pg_count; // vma: remain pgs
|
||||||
|
|
||||||
|
/* Lock mutex... */
|
||||||
|
/* Iterate over allocations, remap as much as possible */
|
||||||
|
struct my_shmem_alloc *curr;
|
||||||
|
list_for_each_entry(curr, &my_shmem_allocs, list) {
|
||||||
|
/* exit if all of vma is mapped */
|
||||||
|
if (tgt_addr >= vma->vm_end)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* decrement page offset until alloc intersects */
|
||||||
|
if (vma_pgoff > ORDER_TO_PAGE_NR(curr->alloc_order)) {
|
||||||
|
vma_pgoff -= ORDER_TO_PAGE_NR(curr->alloc_order);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* intersects, hence compute PFN to remap */
|
||||||
|
pg = curr->page;
|
||||||
|
get_page(pg); // increment alloc. refcount
|
||||||
|
src_head_pfn = page_to_pfn(pg) + vma_pgoff;
|
||||||
|
src_pg_nr = min(
|
||||||
|
vma_remainder_count,
|
||||||
|
ORDER_TO_PAGE_NR(curr->alloc_order) - vma_pgoff
|
||||||
|
);
|
||||||
|
ret = remap_pfn_range(
|
||||||
|
vma, // remap target VM area
|
||||||
|
tgt_addr, // page-aligned tgt addr
|
||||||
|
src_head_pfn, // kernel PFN as source
|
||||||
|
src_pg_nr * PAGE_SIZE, // size of remap region
|
||||||
|
vma->vm_page_prot, // page protection flags
|
||||||
|
);
|
||||||
|
/* if (ret): goto error handling... */
|
||||||
|
/* Prepare for next iteration */
|
||||||
|
tgt_addr += src_pg_nr * PAGE_SIZE;
|
||||||
|
vma_remainder_count -= src_pg_nr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* return or error handling... */
|
||||||
|
}
|
||||||
|
\end{minted}
|
||||||
|
|
||||||
|
\paragraph{VM Operations} \dots
|
||||||
|
|
||||||
|
\paragraph{\textit{sysfs} Parameter} \dots
|
||||||
|
|
||||||
\subsection{Instrumentation: \texttt{ftrace} and \textit{eBPF}}
|
\subsection{Instrumentation: \texttt{ftrace} and \textit{eBPF}}
|
||||||
\subsection{Userspace Programs}
|
\subsection{Userspace Programs}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue