Can begin Results
This commit is contained in:
parent
f9adbf1f1d
commit
fc777526ce
4 changed files with 44 additions and 8 deletions
|
|
@ -174,6 +174,7 @@ ok:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: bugs in order == 12, alloc pg cnt == 32768
|
||||||
static vm_fault_t my_shmem_vmops_fault(struct vm_fault *vmf)
|
static vm_fault_t my_shmem_vmops_fault(struct vm_fault *vmf)
|
||||||
{
|
{
|
||||||
pr_info("[%s] vm_fault @ 0x%lx (vma + %ld pages).\n",
|
pr_info("[%s] vm_fault @ 0x%lx (vma + %ld pages).\n",
|
||||||
|
|
@ -184,10 +185,10 @@ static vm_fault_t my_shmem_vmops_fault(struct vm_fault *vmf)
|
||||||
|
|
||||||
mutex_lock(&my_shmem_allocs_mtx);
|
mutex_lock(&my_shmem_allocs_mtx);
|
||||||
locked_retry:
|
locked_retry:
|
||||||
if (fault_pg_offset < my_shmem_page_count) {
|
if (fault_pg_offset < my_shmem_page_count)
|
||||||
// => Already present, remap
|
// => Already present, remap
|
||||||
return __my_shmem_fault_remap(vmf);
|
return __my_shmem_fault_remap(vmf);
|
||||||
}
|
|
||||||
// else => allocate `1 << order` pages opportunistically...
|
// else => allocate `1 << order` pages opportunistically...
|
||||||
struct my_shmem_alloc *new_alloc_handle = kzalloc(
|
struct my_shmem_alloc *new_alloc_handle = kzalloc(
|
||||||
sizeof(struct my_shmem_alloc), GFP_KERNEL
|
sizeof(struct my_shmem_alloc), GFP_KERNEL
|
||||||
|
|
@ -201,19 +202,23 @@ locked_retry:
|
||||||
if (!new_alloc_pg)
|
if (!new_alloc_pg)
|
||||||
goto err_alloc_pages;
|
goto err_alloc_pages;
|
||||||
|
|
||||||
// get_page(new_alloc_pg);
|
|
||||||
new_alloc_handle->page = new_alloc_pg;
|
new_alloc_handle->page = new_alloc_pg;
|
||||||
new_alloc_handle->alloc_order = max_contiguous_alloc_order;
|
new_alloc_handle->alloc_order = max_contiguous_alloc_order;
|
||||||
list_add_tail(&new_alloc_handle->list, &my_shmem_allocs);
|
list_add_tail(&new_alloc_handle->list, &my_shmem_allocs);
|
||||||
my_shmem_page_count += ORDER_TO_PAGE_NR(new_alloc_handle->alloc_order);
|
my_shmem_page_count += ORDER_TO_PAGE_NR(new_alloc_handle->alloc_order);
|
||||||
pr_info("[%s] Allocated 1 << %ld pages: 0x%lx - 0x%lx. Current page count: %ld\n",
|
pr_info("[%s] Allocated 1 << %ld pages: 0x%lx - 0x%lx. Current page count: %ld\n",
|
||||||
__func__, max_contiguous_alloc_order,
|
__func__, max_contiguous_alloc_order,
|
||||||
page_to_pfn(new_alloc_pg), page_to_pfn(new_alloc_pg) + (1 << max_contiguous_alloc_order),
|
page_to_pfn(new_alloc_pg),
|
||||||
my_shmem_page_count);
|
page_to_pfn(new_alloc_pg) + (1 << max_contiguous_alloc_order),
|
||||||
|
my_shmem_page_count
|
||||||
|
);
|
||||||
|
|
||||||
goto locked_retry;
|
goto locked_retry;
|
||||||
|
|
||||||
err_alloc_pages:
|
err_alloc_pages:
|
||||||
|
pr_err("[%s] Allocation (ord: %ld) failed...\n",
|
||||||
|
__func__, max_contiguous_alloc_order
|
||||||
|
);
|
||||||
err_kzalloc_handle:
|
err_kzalloc_handle:
|
||||||
ret |= VM_FAULT_OOM;
|
ret |= VM_FAULT_OOM;
|
||||||
err_generic:
|
err_generic:
|
||||||
|
|
|
||||||
|
|
@ -628,3 +628,19 @@
|
||||||
year={2005},
|
year={2005},
|
||||||
publisher={" O'Reilly Media, Inc."}
|
publisher={" O'Reilly Media, Inc."}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@misc{Rostedt.Kernelv6.7-ftrace.2023,
|
||||||
|
title={ftrace - Function Tracer},
|
||||||
|
url={https://www.kernel.org/doc/html/v6.7/trace/ftrace.html#dynamic-ftrace},
|
||||||
|
journal={The Linux Kernel documentation},
|
||||||
|
author={Rostedt, Steven},
|
||||||
|
editor={Changbin, Du},
|
||||||
|
year={2023}
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{N/A.Kernelv6.7-libbpf.2023,
|
||||||
|
title={libbpf Overview},
|
||||||
|
url={https://www.kernel.org/doc/html/v6.7/bpf/libbpf/libbpf_overview.html},
|
||||||
|
journal={The Linux Kernel documentation},
|
||||||
|
year={2023}
|
||||||
|
}
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -661,7 +661,7 @@ The specifications of \texttt{rose} is listed in table \ref{table:rose}.
|
||||||
|
|
||||||
\section{Methodology}\label{sec:sw-coherency-method}
|
\section{Methodology}\label{sec:sw-coherency-method}
|
||||||
\subsection{Exporting \texttt{dcache\_clean\_poc}}
|
\subsection{Exporting \texttt{dcache\_clean\_poc}}
|
||||||
As established in subsection \ref{subsec:armv8a-swcoherency}, software cache-coherence maintenance operations (e.g., \texttt{dcache\_[clean|inval]\_poc}) are wrapped behind DMA API function calls and are hence unavailable for direct use in drivers. Moreover, instrumentation of assembly code becomes non-trivial when compared to instrumenting C function symbols, likely due to automatically stripped assembly symbols during kernel linkage. Consequently, it becomes impossible to utilize the existing instrumentation tools available in the Linux kernel (e.g., \texttt{ftrace}) to trace assembly routines.
|
As established in subsection \ref{subsec:armv8a-swcoherency}, software cache-coherence maintenance operations (e.g., \texttt{dcache\_[clean|inval]\_poc}) are wrapped behind DMA API function calls and are hence unavailable for direct use in drivers. Moreover, instrumentation of assembly code becomes non-trivial when compared to instrumenting C function symbols, likely due to automatically stripped assembly symbols in C object files. Consequently, it becomes impossible to utilize the existing instrumentation tools available in the Linux kernel (e.g., \texttt{ftrace}) to trace assembly routines.
|
||||||
|
|
||||||
In order to convert \texttt{dcache\_clean\_poc} to a traceable equivalent, a wrapper function \texttt{\_\_dcache\_clean\_poc} is created as follows:
|
In order to convert \texttt{dcache\_clean\_poc} to a traceable equivalent, a wrapper function \texttt{\_\_dcache\_clean\_poc} is created as follows:
|
||||||
\begin{minted}[mathescape, linenos, bgcolor=code-bg]{c}
|
\begin{minted}[mathescape, linenos, bgcolor=code-bg]{c}
|
||||||
|
|
@ -988,10 +988,25 @@ $ echo 2 > \
|
||||||
/sys/module/my_shmem/parameters/max_contiguous_alloc_order
|
/sys/module/my_shmem/parameters/max_contiguous_alloc_order
|
||||||
\end{minted}
|
\end{minted}
|
||||||
|
|
||||||
Consequently, all allocations occuring after this change will be allocated with a 4-page contiguous granularity.
|
Consequently, all allocations occuring after this change will be allocated with a 4-page contiguous granularity. Upon further testing, the maximum value allowed here is 10 (i.e., $2^{10} = 1024$ 4K pages).
|
||||||
|
|
||||||
|
\subsection{Instrumentation: \texttt{ftrace} and \texttt{bcc-tools}}
|
||||||
|
We use two instrumentation frameworks to evaluate the latency of software-initiated coherency operations. \texttt{ftrace} is the primary kernel tracing mechanism across multiple (supporting) architectures, which supports both \textit{static} tracing of tracepoints and \textit{dynamic} tracing of function symbols:
|
||||||
|
\begin{itemize}
|
||||||
|
\item {
|
||||||
|
\textbf{Static} tracepoints describe tracepoints compiled into the Linux kernel. They are defined by kernel programmers and is otherwise known as \textit{event tracing}.
|
||||||
|
}
|
||||||
|
\item {
|
||||||
|
\textbf{Dynamic} \texttt{ftrace} support is enabled by self-modifying the kernel code to replace injected placeholder nop-routines with \texttt{ftrace} infrastructure calls. This allows for function tracing of all function symbols present in C object files created for linkage. \cite{Rostedt.Kernelv6.7-ftrace.2023}
|
||||||
|
}
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
Because we do not inline \texttt{\_\_dcache\_clean\_poc}, we are able to include its symbol inside compiled C object files and hence expose its internals for dynamic tracing.
|
||||||
|
|
||||||
|
\texttt{bcc-tools}, on the other hand, provide an array of handy instrumentation tools that is compiled just-in-time into \textit{BPF} programs and ran inside a in-kernel virtual machine. Description of how BPF programs are parsed and run inside the Linux kernel is documented in the kernel documentations \cite{N/A.Kernelv6.7-libbpf.2023}. The ability of \texttt{bcc}/\texttt{libbpf} programs to interface with both userspace and kernelspace function tracing mechanisms make \texttt{bcc-tools} ideal as a easy tracing interface for both userspace and kernelspace tracing.
|
||||||
|
|
||||||
\subsection{Instrumentation: \texttt{ftrace} and \textit{eBPF}}
|
|
||||||
\subsection{Userspace Programs}
|
\subsection{Userspace Programs}
|
||||||
|
Finally, two simple userspace programs are written to invoke the corresponding kernelspace callback operations -- namely, allocation and cleaning of kernel buffers for simulating DMA behaviors. To achieve this, it simply \texttt{mmap}s the amount of pages passed in as argument and either reads or writes the entirety of the buffer (which differentiates the two programs). A listing of their logic is at \textcolor{red}{Appendix ???}.
|
||||||
|
|
||||||
\section{Results}\label{sec:sw-coherency-results}
|
\section{Results}\label{sec:sw-coherency-results}
|
||||||
\subsection{Controlled Allocation Size; Variable Page Count}
|
\subsection{Controlled Allocation Size; Variable Page Count}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue