diff --git a/tex/misc/w15_slides.pdf b/tex/misc/w15_slides.pdf index efbaabc..67cac67 100644 Binary files a/tex/misc/w15_slides.pdf and b/tex/misc/w15_slides.pdf differ diff --git a/tex/misc/w15_slides.tex b/tex/misc/w15_slides.tex index 6dd12b7..5a10121 100644 --- a/tex/misc/w15_slides.tex +++ b/tex/misc/w15_slides.tex @@ -108,7 +108,13 @@ synchronization with CPU cache. } \item { - We cannot assume MMU to magically automatically maintain coherence. + We cannot assume MMU to magically maintain coherence. + \begin{itemize} + \item { + This seems the case for x86\_64 (cache-coherent DMA), but + not ARM64. + } + \end{itemize} } \item { At transportation time: @@ -379,7 +385,7 @@ \section{3. Progress} \begin{frame} - \frametitle{Progress} + \frametitle{3. Progress} \begin{itemize} \item { Goal: in-kernel implementation of software cache-coherency via @@ -408,10 +414,35 @@ ARMv8 defines two levels of cache coherence: \begin{itemize} \item { - \textit{Point-of-Unification}: + \textit{Point-of-Unification}: Within a core, instruction + cache, data cache, and TLB all agree in the copy seen for a + particular address. + \begin{itemize} + \item Notably, changing PTE requires PoU. + \end{itemize} } \item { - \textit{Point-of-Coherence}: + \textit{Point-of-Coherence}: Between all DMA-capable + peripherals (CPU or otherwise), they all agree in the copy + seen for a particular address. + } + \end{itemize} + For this thesis's purposes, strive for PoC. + } + \item { + Operations to achieve the latter are encapsulated in the Linux + kernel as \texttt{(d|i)cache\_(clean|inval)\_poc}. + \begin{itemize} + \item Declared under \texttt{arch/arm64/include/asm/cacheflush.h}. + \item Defined in \texttt{arch/arm64/mm/cache.S}. + \item { + Takes virtual address wrt. \textit{current} address space to + writeback/invalidate cache entries. + } + \item { + Problem: Can only be called in process context (for userspace + virtual addresses) or in all contexts + (for kernel virtual addresses)? } \end{itemize} } @@ -420,21 +451,106 @@ \begin{frame} \frametitle{Kernel Patch for On-demand Coherency} - - - + \begin{itemize} + \item { + Problem: These symbols are not exported -- not intended for driver + use. + } + \item { + Temporary solution: re-export them via patching the kernel. + \begin{itemize} + \item Note: Kernel version v6.7.0 + \item { + Longish-term solution: arrange kernel module code in a way + that takes advantage of existing driver API + (e.g., via DMA API, for example \textit{smbdirect}). + } + \end{itemize} + } + \item { + Implements wrapper function \texttt{\_\_dcache\_clean\_poc} to + re-export \texttt{dcache\_clean\_poc} into driver namespace. + } + \item { + Exports symbol into separate header file. + } + \end{itemize} \end{frame} \begin{frame} \frametitle{Proof-of-Concept Kernel Module} - - - + \begin{itemize} + \item { + Dynamically allocates \texttt{GFP\_USER} pages and remaps to + userspace on \texttt{mmap}. + \begin{itemize} + \item { + \texttt{GFP\_USER} so (for convenience) pages can be + directly addressable in kernelspace (via kernel page table). + } + \item { + Pages are lazily allocated and shared between multiple + processes (i.e., user address spaces). + } + \item { + Exposed as character device \texttt{/dev/my\_shmem}. + } + \end{itemize} + } + \item Around 300+ LoC. + \item { + Problem: flawed premise for testing cache writeback! + \begin{itemize} + \item { + Summary: CPU datapath differs from DMA datapath, common cache + coherency maintenance operations are already performed + in common file/virtual memory area operation code. + } + \item { + Idea: perform cache write-back on \texttt{vm\_ops->close}. + } + \item { + Reality: virtual memory area already cleaned from cache and + removed from address space prior to calling + \texttt{vm\_ops->close}. + } + \item { + Fix: Implement custom \texttt{ioctl}? + } + \end{itemize} + } + \end{itemize} \end{frame} % Part 4: Future Work % ============================================================================= +\section{4. Future Work} + +\begin{frame} + \frametitle{4. Future Work} + \begin{enumerate} + \item { + Incorporate cache coherence mechanism into the larger project. + } + \item { + Implement memory model within the larger project. This involves: + \begin{itemize} + \item { + Making adjustment to message type and structure specifications + for better inter-operation with RDMA. + } + \item { + Implement memory model programmatically. + } + \end{itemize} + } + \end{enumerate} +\end{frame} % References +\begin{frame} + \frametitle{References} + \printbibliography +\end{frame} \end{document} \ No newline at end of file