diff --git a/src/aarch64-linux-flush-dcache/my_shmem.c b/src/aarch64-linux-flush-dcache/my_shmem.c index b170101..62fda37 100644 --- a/src/aarch64-linux-flush-dcache/my_shmem.c +++ b/src/aarch64-linux-flush-dcache/my_shmem.c @@ -174,6 +174,7 @@ ok: return ret; } +// FIXME: bugs in order == 12, alloc pg cnt == 32768 static vm_fault_t my_shmem_vmops_fault(struct vm_fault *vmf) { pr_info("[%s] vm_fault @ 0x%lx (vma + %ld pages).\n", @@ -184,10 +185,10 @@ static vm_fault_t my_shmem_vmops_fault(struct vm_fault *vmf) mutex_lock(&my_shmem_allocs_mtx); locked_retry: - if (fault_pg_offset < my_shmem_page_count) { + if (fault_pg_offset < my_shmem_page_count) // => Already present, remap return __my_shmem_fault_remap(vmf); - } + // else => allocate `1 << order` pages opportunistically... struct my_shmem_alloc *new_alloc_handle = kzalloc( sizeof(struct my_shmem_alloc), GFP_KERNEL @@ -201,19 +202,23 @@ locked_retry: if (!new_alloc_pg) goto err_alloc_pages; - // get_page(new_alloc_pg); new_alloc_handle->page = new_alloc_pg; new_alloc_handle->alloc_order = max_contiguous_alloc_order; list_add_tail(&new_alloc_handle->list, &my_shmem_allocs); my_shmem_page_count += ORDER_TO_PAGE_NR(new_alloc_handle->alloc_order); pr_info("[%s] Allocated 1 << %ld pages: 0x%lx - 0x%lx. Current page count: %ld\n", __func__, max_contiguous_alloc_order, - page_to_pfn(new_alloc_pg), page_to_pfn(new_alloc_pg) + (1 << max_contiguous_alloc_order), - my_shmem_page_count); + page_to_pfn(new_alloc_pg), + page_to_pfn(new_alloc_pg) + (1 << max_contiguous_alloc_order), + my_shmem_page_count + ); goto locked_retry; err_alloc_pages: + pr_err("[%s] Allocation (ord: %ld) failed...\n", + __func__, max_contiguous_alloc_order + ); err_kzalloc_handle: ret |= VM_FAULT_OOM; err_generic: diff --git a/tex/draft/mybibfile.bib b/tex/draft/mybibfile.bib index 1cfc66a..f11c6e0 100644 --- a/tex/draft/mybibfile.bib +++ b/tex/draft/mybibfile.bib @@ -628,3 +628,19 @@ year={2005}, publisher={" O'Reilly Media, Inc."} } + +@misc{Rostedt.Kernelv6.7-ftrace.2023, + title={ftrace - Function Tracer}, + url={https://www.kernel.org/doc/html/v6.7/trace/ftrace.html#dynamic-ftrace}, + journal={The Linux Kernel documentation}, + author={Rostedt, Steven}, + editor={Changbin, Du}, + year={2023} +} + +@misc{N/A.Kernelv6.7-libbpf.2023, + title={libbpf Overview}, + url={https://www.kernel.org/doc/html/v6.7/bpf/libbpf/libbpf_overview.html}, + journal={The Linux Kernel documentation}, + year={2023} +} diff --git a/tex/draft/skeleton.pdf b/tex/draft/skeleton.pdf index 7f082ab..22b5350 100644 Binary files a/tex/draft/skeleton.pdf and b/tex/draft/skeleton.pdf differ diff --git a/tex/draft/skeleton.tex b/tex/draft/skeleton.tex index 4a99490..c5f40c8 100644 --- a/tex/draft/skeleton.tex +++ b/tex/draft/skeleton.tex @@ -661,7 +661,7 @@ The specifications of \texttt{rose} is listed in table \ref{table:rose}. \section{Methodology}\label{sec:sw-coherency-method} \subsection{Exporting \texttt{dcache\_clean\_poc}} -As established in subsection \ref{subsec:armv8a-swcoherency}, software cache-coherence maintenance operations (e.g., \texttt{dcache\_[clean|inval]\_poc}) are wrapped behind DMA API function calls and are hence unavailable for direct use in drivers. Moreover, instrumentation of assembly code becomes non-trivial when compared to instrumenting C function symbols, likely due to automatically stripped assembly symbols during kernel linkage. Consequently, it becomes impossible to utilize the existing instrumentation tools available in the Linux kernel (e.g., \texttt{ftrace}) to trace assembly routines. +As established in subsection \ref{subsec:armv8a-swcoherency}, software cache-coherence maintenance operations (e.g., \texttt{dcache\_[clean|inval]\_poc}) are wrapped behind DMA API function calls and are hence unavailable for direct use in drivers. Moreover, instrumentation of assembly code becomes non-trivial when compared to instrumenting C function symbols, likely due to automatically stripped assembly symbols in C object files. Consequently, it becomes impossible to utilize the existing instrumentation tools available in the Linux kernel (e.g., \texttt{ftrace}) to trace assembly routines. In order to convert \texttt{dcache\_clean\_poc} to a traceable equivalent, a wrapper function \texttt{\_\_dcache\_clean\_poc} is created as follows: \begin{minted}[mathescape, linenos, bgcolor=code-bg]{c} @@ -988,10 +988,25 @@ $ echo 2 > \ /sys/module/my_shmem/parameters/max_contiguous_alloc_order \end{minted} -Consequently, all allocations occuring after this change will be allocated with a 4-page contiguous granularity. +Consequently, all allocations occuring after this change will be allocated with a 4-page contiguous granularity. Upon further testing, the maximum value allowed here is 10 (i.e., $2^{10} = 1024$ 4K pages). + +\subsection{Instrumentation: \texttt{ftrace} and \texttt{bcc-tools}} +We use two instrumentation frameworks to evaluate the latency of software-initiated coherency operations. \texttt{ftrace} is the primary kernel tracing mechanism across multiple (supporting) architectures, which supports both \textit{static} tracing of tracepoints and \textit{dynamic} tracing of function symbols: +\begin{itemize} + \item { + \textbf{Static} tracepoints describe tracepoints compiled into the Linux kernel. They are defined by kernel programmers and is otherwise known as \textit{event tracing}. + } + \item { + \textbf{Dynamic} \texttt{ftrace} support is enabled by self-modifying the kernel code to replace injected placeholder nop-routines with \texttt{ftrace} infrastructure calls. This allows for function tracing of all function symbols present in C object files created for linkage. \cite{Rostedt.Kernelv6.7-ftrace.2023} + } +\end{itemize} + +Because we do not inline \texttt{\_\_dcache\_clean\_poc}, we are able to include its symbol inside compiled C object files and hence expose its internals for dynamic tracing. + +\texttt{bcc-tools}, on the other hand, provide an array of handy instrumentation tools that is compiled just-in-time into \textit{BPF} programs and ran inside a in-kernel virtual machine. Description of how BPF programs are parsed and run inside the Linux kernel is documented in the kernel documentations \cite{N/A.Kernelv6.7-libbpf.2023}. The ability of \texttt{bcc}/\texttt{libbpf} programs to interface with both userspace and kernelspace function tracing mechanisms make \texttt{bcc-tools} ideal as a easy tracing interface for both userspace and kernelspace tracing. -\subsection{Instrumentation: \texttt{ftrace} and \textit{eBPF}} \subsection{Userspace Programs} +Finally, two simple userspace programs are written to invoke the corresponding kernelspace callback operations -- namely, allocation and cleaning of kernel buffers for simulating DMA behaviors. To achieve this, it simply \texttt{mmap}s the amount of pages passed in as argument and either reads or writes the entirety of the buffer (which differentiates the two programs). A listing of their logic is at \textcolor{red}{Appendix ???}. \section{Results}\label{sec:sw-coherency-results} \subsection{Controlled Allocation Size; Variable Page Count}