diff --git a/results/lshw-star.out b/results/lshw-star.out new file mode 100644 index 0000000..adf18f1 --- /dev/null +++ b/results/lshw-star.out @@ -0,0 +1,513 @@ +star + description: Computer + product: QEMU Virtual Machine + vendor: QEMU + version: virt-8.2 + width: 64 bits + capabilities: smbios-3.0.0 dmi-3.0.0 smp cp15_barrier setend swp tagged_addr_disabled + configuration: boot=normal uuid=e2cd944b-dd7d-4dbd-8957-fc775e5a6220 + *-core + description: Motherboard + physical id: 0 + *-cpu + description: CPU + vendor: QEMU + physical id: 400 + bus info: cpu@0 + version: virt-8.2 + slot: CPU 0 + size: 2GHz + capacity: 2GHz + configuration: cores=3 enabledcores=3 threads=6 + *-memory + description: System Memory + physical id: 1000 + size: 4GiB + capabilities: ecc + configuration: errordetection=multi-bit-ecc + *-bank + description: DIMM RAM + vendor: QEMU + physical id: 0 + slot: DIMM 0 + size: 4GiB + *-firmware + description: BIOS + vendor: EDK II + physical id: 0 + version: unknown + date: 2/2/2022 + size: 96KiB + capabilities: uefi virtualmachine + *-pci + description: Host bridge + product: QEMU PCIe Host bridge + vendor: Red Hat, Inc. + physical id: 100 + bus info: pci@0000:00:00.0 + version: 00 + width: 32 bits + clock: 33MHz + *-pci:0 + description: PCI bridge + product: QEMU PCIe Root port + vendor: Red Hat, Inc. + physical id: 1 + bus info: pci@0000:00:01.0 + version: 00 + width: 32 bits + clock: 33MHz + capabilities: pci pciexpress msix normal_decode bus_master cap_list + configuration: driver=pcieport + resources: irq:46 memory:11c00000-11c00fff ioport:1000(size=4096) memory:10000000-101fffff ioport:8000000000(size=2097152) + *-network + description: Ethernet controller + product: Virtio 1.0 network device + vendor: Red Hat, Inc. + physical id: 0 + bus info: pci@0000:01:00.0 + logical name: /dev/fb0 + version: 01 + width: 64 bits + clock: 33MHz + capabilities: msix pm pciexpress bus_master cap_list rom fb + configuration: depth=32 driver=virtio-pci latency=0 mode=1280x800 visual=truecolor xres=1280 yres=800 + resources: iomemory:800-7ff irq:46 memory:10040000-10040fff memory:8000000000-8000003fff memory:10000000-1003ffff + *-virtio0 + description: Ethernet interface + physical id: 0 + bus info: virtio@0 + logical name: enp1s0 + serial: 52:54:00:e8:3f:58 + capabilities: ethernet physical + configuration: autonegotiation=off broadcast=yes driver=virtio_net driverversion=1.0.0 ip=192.168.100.206 link=yes multicast=yes + *-pci:1 + description: PCI bridge + product: QEMU PCIe Root port + vendor: Red Hat, Inc. + physical id: 1.1 + bus info: pci@0000:00:01.1 + version: 00 + width: 32 bits + clock: 33MHz + capabilities: pci pciexpress msix normal_decode bus_master cap_list + configuration: driver=pcieport + resources: irq:46 memory:11c01000-11c01fff ioport:2000(size=4096) memory:10200000-103fffff ioport:8000200000(size=2097152) + *-usb + description: USB controller + product: QEMU XHCI Host Controller + vendor: Red Hat, Inc. + physical id: 0 + bus info: pci@0000:02:00.0 + version: 01 + width: 64 bits + clock: 33MHz + capabilities: msix pciexpress xhci bus_master cap_list + configuration: driver=xhci_hcd latency=0 + resources: irq:46 memory:10200000-10203fff + *-usbhost:0 + product: xHCI Host Controller + vendor: Linux 6.7.0-thesis-dirty xhci-hcd + physical id: 0 + bus info: usb@1 + logical name: usb1 + version: 6.07 + capabilities: usb-2.00 + configuration: driver=hub slots=15 speed=480Mbit/s + *-usb:0 + description: Keyboard + product: QEMU QEMU USB Keyboard + vendor: QEMU + physical id: 1 + bus info: usb@1:1 + logical name: input1 + logical name: /dev/input/event1 + logical name: input1::capslock + logical name: input1::compose + logical name: input1::kana + logical name: input1::numlock + logical name: input1::scrolllock + version: 0.00 + serial: 68284-0000:00:01.1:00.0-1 + capabilities: usb-2.00 usb + configuration: driver=usbhid maxpower=100mA speed=480Mbit/s + *-usb:1 + description: Human interface device + product: QEMU QEMU USB Tablet + vendor: QEMU + physical id: 2 + bus info: usb@1:2 + logical name: input2 + logical name: /dev/input/event2 + logical name: /dev/input/mouse0 + version: 0.00 + serial: 28754-0000:00:01.1:00.0-2 + capabilities: usb-2.00 usb + configuration: driver=usbhid maxpower=100mA speed=480Mbit/s + *-usbhost:1 + product: xHCI Host Controller + vendor: Linux 6.7.0-thesis-dirty xhci-hcd + physical id: 1 + bus info: usb@2 + logical name: usb2 + version: 6.07 + capabilities: usb-3.00 + configuration: driver=hub slots=15 speed=5000Mbit/s + *-pci:2 + description: PCI bridge + product: QEMU PCIe Root port + vendor: Red Hat, Inc. + physical id: 1.2 + bus info: pci@0000:00:01.2 + version: 00 + width: 32 bits + clock: 33MHz + capabilities: pci pciexpress msix normal_decode bus_master cap_list + configuration: driver=pcieport + resources: irq:46 memory:11c02000-11c02fff ioport:3000(size=4096) memory:10400000-105fffff ioport:8000400000(size=2097152) + *-scsi + description: SCSI storage controller + product: Virtio 1.0 SCSI + vendor: Red Hat, Inc. + physical id: 0 + bus info: pci@0000:03:00.0 + version: 01 + width: 64 bits + clock: 33MHz + capabilities: scsi msix pm pciexpress bus_master cap_list + configuration: driver=virtio-pci latency=0 + resources: iomemory:800-7ff irq:46 memory:10400000-10400fff memory:8000400000-8000403fff + *-virtio1 + description: Virtual I/O device + physical id: 0 + bus info: virtio@1 + logical name: scsi0 + configuration: driver=virtio_scsi + *-cdrom + description: DVD reader + product: QEMU CD-ROM + vendor: QEMU + physical id: 0.0.0 + bus info: scsi@0:0.0.0 + logical name: /dev/cdrom + logical name: /dev/sr0 + version: 2.5+ + capabilities: removable audio dvd + configuration: ansiversion=5 status=ready + *-medium + physical id: 0 + logical name: /dev/cdrom + capabilities: partitioned partitioned:dos + *-volume UNCLAIMED + description: Windows FAT volume + vendor: mkfs.fat + physical id: 2 + version: FAT12 + serial: deb0-0001 + size: 15EiB + capabilities: primary boot fat initialized + configuration: FATs=2 filesystem=fat + *-pci:3 + description: PCI bridge + product: QEMU PCIe Root port + vendor: Red Hat, Inc. + physical id: 1.3 + bus info: pci@0000:00:01.3 + version: 00 + width: 32 bits + clock: 33MHz + capabilities: pci pciexpress msix normal_decode bus_master cap_list + configuration: driver=pcieport + resources: irq:46 memory:11c03000-11c03fff ioport:4000(size=4096) memory:10600000-107fffff ioport:8000600000(size=2097152) + *-communication + description: Communication controller + product: Virtio 1.0 console + vendor: Red Hat, Inc. + physical id: 0 + bus info: pci@0000:04:00.0 + version: 01 + width: 64 bits + clock: 33MHz + capabilities: msix pm pciexpress bus_master cap_list + configuration: driver=virtio-pci latency=0 + resources: iomemory:800-7ff irq:46 memory:10600000-10600fff memory:8000600000-8000603fff + *-virtio2 UNCLAIMED + description: Virtual I/O device + physical id: 0 + bus info: virtio@2 + configuration: driver=virtio_console + *-pci:4 + description: PCI bridge + product: QEMU PCIe Root port + vendor: Red Hat, Inc. + physical id: 1.4 + bus info: pci@0000:00:01.4 + version: 00 + width: 32 bits + clock: 33MHz + capabilities: pci pciexpress msix normal_decode bus_master cap_list + configuration: driver=pcieport + resources: irq:46 memory:11c04000-11c04fff ioport:5000(size=4096) memory:10800000-109fffff ioport:8000800000(size=2097152) + *-scsi + description: SCSI storage controller + product: Virtio 1.0 block device + vendor: Red Hat, Inc. + physical id: 0 + bus info: pci@0000:05:00.0 + version: 01 + width: 64 bits + clock: 33MHz + capabilities: scsi msix pm pciexpress bus_master cap_list + configuration: driver=virtio-pci latency=0 + resources: iomemory:800-7ff irq:46 memory:10800000-10800fff memory:8000800000-8000803fff + *-virtio3 + description: Virtual I/O device + physical id: 0 + bus info: virtio@3 + logical name: /dev/vda + size: 32GiB (34GB) + capabilities: gpt-1.00 partitioned partitioned:gpt + configuration: driver=virtio_blk guid=ac22236a-71e4-471c-8910-a785c75d1fe8 logicalsectorsize=512 sectorsize=512 + *-volume:0 UNCLAIMED + description: Windows FAT volume + vendor: mkfs.fat + physical id: 1 + bus info: virtio@3,1 + version: FAT16 + serial: 2186-fc7b + size: 510MiB + capacity: 511MiB + capabilities: boot fat initialized + configuration: FATs=2 filesystem=fat + *-volume:1 + description: EXT4 volume + vendor: Linux + physical id: 2 + bus info: virtio@3,2 + logical name: /dev/vda2 + logical name: / + version: 1.0 + serial: d62516dc-8d27-4424-836b-35d5868e20fd + size: 30GiB + capabilities: journaled extended_attributes large_files huge_files dir_nlink recover 64bit extents ext4 ext2 initialized + configuration: created=2024-01-22 03:02:12 filesystem=ext4 lastmountpoint=/ modified=2024-03-16 22:54:43 mount.fstype=ext4 mount.options=rw,relatime,errors=remount-ro mounted=2024-03-16 20:54:10 state=mounted + *-volume:2 + description: Linux swap volume + vendor: Linux + physical id: 3 + bus info: virtio@3,3 + logical name: /dev/vda3 + version: 1 + serial: 79d589e9-326e-492c-88de-68728c399cad + size: 975MiB + capacity: 975MiB + capabilities: nofs swap initialized + configuration: filesystem=swap pagesize=4095 + *-pci:5 + description: PCI bridge + product: QEMU PCIe Root port + vendor: Red Hat, Inc. + physical id: 1.5 + bus info: pci@0000:00:01.5 + version: 00 + width: 32 bits + clock: 33MHz + capabilities: pci pciexpress msix normal_decode bus_master cap_list + configuration: driver=pcieport + resources: irq:46 memory:11c05000-11c05fff ioport:6000(size=4096) memory:10a00000-10bfffff ioport:8000a00000(size=2097152) + *-generic + description: Unclassified device + product: Virtio 1.0 memory balloon + vendor: Red Hat, Inc. + physical id: 0 + bus info: pci@0000:06:00.0 + version: 01 + width: 64 bits + clock: 33MHz + capabilities: pm pciexpress bus_master cap_list + configuration: driver=virtio-pci latency=0 + resources: iomemory:800-7ff irq:46 memory:8000a00000-8000a03fff + *-virtio4 UNCLAIMED + description: Virtual I/O device + physical id: 0 + bus info: virtio@4 + configuration: driver=virtio_balloon + *-pci:6 + description: PCI bridge + product: QEMU PCIe Root port + vendor: Red Hat, Inc. + physical id: 1.6 + bus info: pci@0000:00:01.6 + version: 00 + width: 32 bits + clock: 33MHz + capabilities: pci pciexpress msix normal_decode bus_master cap_list + configuration: driver=pcieport + resources: irq:46 memory:11c06000-11c06fff ioport:7000(size=4096) memory:10c00000-10dfffff ioport:8000c00000(size=2097152) + *-generic + description: Unclassified device + product: Virtio 1.0 RNG + vendor: Red Hat, Inc. + physical id: 0 + bus info: pci@0000:07:00.0 + version: 01 + width: 64 bits + clock: 33MHz + capabilities: msix pm pciexpress bus_master cap_list + configuration: driver=virtio-pci latency=0 + resources: iomemory:800-7ff irq:46 memory:10c00000-10c00fff memory:8000c00000-8000c03fff + *-virtio5 UNCLAIMED + description: Virtual I/O device + physical id: 0 + bus info: virtio@5 + configuration: driver=virtio_rng + *-pci:7 + description: PCI bridge + product: QEMU PCIe Root port + vendor: Red Hat, Inc. + physical id: 1.7 + bus info: pci@0000:00:01.7 + version: 00 + width: 32 bits + clock: 33MHz + capabilities: pci pciexpress msix normal_decode bus_master cap_list + configuration: driver=pcieport + resources: irq:46 memory:11c07000-11c07fff ioport:8000(size=4096) memory:10e00000-10ffffff ioport:8000e00000(size=2097152) + *-display + description: Display controller + product: Virtio 1.0 GPU + vendor: Red Hat, Inc. + physical id: 0 + bus info: pci@0000:08:00.0 + logical name: /dev/fb0 + version: 01 + width: 64 bits + clock: 33MHz + capabilities: msix pm pciexpress bus_master cap_list fb + configuration: depth=32 driver=virtio-pci latency=0 resolution=1280,800 + resources: iomemory:800-7ff irq:46 memory:10e00000-10e00fff memory:8000e00000-8000e03fff + *-virtio6 UNCLAIMED + description: Virtual I/O device + physical id: 0 + bus info: virtio@6 + configuration: driver=virtio_gpu + *-pci:8 + description: PCI bridge + product: QEMU PCIe Root port + vendor: Red Hat, Inc. + physical id: 2 + bus info: pci@0000:00:02.0 + version: 00 + width: 32 bits + clock: 33MHz + capabilities: pci pciexpress msix normal_decode bus_master cap_list + configuration: driver=pcieport + resources: irq:47 memory:11c08000-11c08fff ioport:9000(size=4096) memory:11000000-111fffff ioport:8001000000(size=2097152) + *-storage + description: Mass storage controller + product: Virtio file system + vendor: Red Hat, Inc. + physical id: 0 + bus info: pci@0000:09:00.0 + version: 01 + width: 64 bits + clock: 33MHz + capabilities: storage msix pm pciexpress bus_master cap_list + configuration: driver=virtio-pci latency=0 + resources: iomemory:800-7ff irq:47 memory:11000000-11000fff memory:8001000000-8001003fff + *-virtio7 UNCLAIMED + description: Virtual I/O device + physical id: 0 + bus info: virtio@7 + configuration: driver=virtiofs + *-pci:9 + description: PCI bridge + product: QEMU PCIe Root port + vendor: Red Hat, Inc. + physical id: 2.1 + bus info: pci@0000:00:02.1 + version: 00 + width: 32 bits + clock: 33MHz + capabilities: pci pciexpress msix normal_decode bus_master cap_list + configuration: driver=pcieport + resources: irq:47 memory:11c09000-11c09fff ioport:a000(size=4096) memory:11200000-113fffff ioport:8001200000(size=2097152) + *-network + description: Ethernet controller + product: Virtio 1.0 network device + vendor: Red Hat, Inc. + physical id: 0 + bus info: pci@0000:0a:00.0 + version: 01 + width: 64 bits + clock: 33MHz + capabilities: msix pm pciexpress bus_master cap_list rom + configuration: driver=virtio-pci latency=0 + resources: iomemory:800-7ff irq:47 memory:11240000-11240fff memory:8001200000-8001203fff memory:11200000-1123ffff + *-virtio8 DISABLED + description: Ethernet interface + physical id: 0 + bus info: virtio@8 + logical name: enp10s0 + serial: 52:54:00:96:60:df + capabilities: ethernet physical + configuration: autonegotiation=off broadcast=yes driver=virtio_net driverversion=1.0.0 link=no multicast=yes + *-pci:10 + description: PCI bridge + product: QEMU PCIe Root port + vendor: Red Hat, Inc. + physical id: 2.2 + bus info: pci@0000:00:02.2 + version: 00 + width: 32 bits + clock: 33MHz + capabilities: pci pciexpress msix normal_decode bus_master cap_list + configuration: driver=pcieport + resources: irq:47 memory:11c0a000-11c0afff ioport:b000(size=4096) memory:11400000-115fffff ioport:8001400000(size=2097152) + *-pci:11 + description: PCI bridge + product: QEMU PCIe Root port + vendor: Red Hat, Inc. + physical id: 2.3 + bus info: pci@0000:00:02.3 + version: 00 + width: 32 bits + clock: 33MHz + capabilities: pci pciexpress msix normal_decode bus_master cap_list + configuration: driver=pcieport + resources: irq:47 memory:11c0b000-11c0bfff ioport:c000(size=4096) memory:11600000-117fffff ioport:8001600000(size=2097152) + *-pci:12 + description: PCI bridge + product: QEMU PCIe Root port + vendor: Red Hat, Inc. + physical id: 2.4 + bus info: pci@0000:00:02.4 + version: 00 + width: 32 bits + clock: 33MHz + capabilities: pci pciexpress msix normal_decode bus_master cap_list + configuration: driver=pcieport + resources: irq:47 memory:11c0c000-11c0cfff ioport:d000(size=4096) memory:11800000-119fffff ioport:8001800000(size=2097152) + *-pci:13 + description: PCI bridge + product: QEMU PCIe Root port + vendor: Red Hat, Inc. + physical id: 2.5 + bus info: pci@0000:00:02.5 + version: 00 + width: 32 bits + clock: 33MHz + capabilities: pci pciexpress msix normal_decode bus_master cap_list + configuration: driver=pcieport + resources: irq:47 memory:11c0d000-11c0dfff ioport:e000(size=4096) memory:11a00000-11bfffff ioport:8001a00000(size=2097152) + *-pnp00:00 + product: PnP device PNP0c02 + physical id: 1 + capabilities: pnp + configuration: driver=system + *-input + product: Power Button + physical id: 1 + logical name: input0 + logical name: /dev/input/event0 + capabilities: platform diff --git a/tex/draft/skeleton.pdf b/tex/draft/skeleton.pdf index 44d8a33..2953346 100644 Binary files a/tex/draft/skeleton.pdf and b/tex/draft/skeleton.pdf differ diff --git a/tex/draft/skeleton.tex b/tex/draft/skeleton.tex index 7743bf2..066e424 100644 --- a/tex/draft/skeleton.tex +++ b/tex/draft/skeleton.tex @@ -10,7 +10,7 @@ % you should catch most accidental changes of page layout though. \usepackage{microtype} % recommended, but you can remove if it causes problems -% \usepackage{natbib} % recommended for citations +% \usepackage{natbib} % recommended for citations % but I have no experience with natbib... \usepackage[utf8]{inputenc} \usepackage[dvipsnames]{xcolor} \usepackage{hyperref} @@ -18,7 +18,7 @@ \usepackage{graphicx} \usepackage[english]{babel} % -> biblatex -\usepackage{biblatex} % full of mischief +\usepackage{biblatex} \addbibresource{mybibfile.bib} % <- biblatex % -> nice definition listings @@ -30,8 +30,12 @@ % -> code listing % [!] Requires external program: pypi:pygment \usepackage{minted} -\usemintedstyle{vs} +\usemintedstyle{xcode} +\definecolor{code-bg}{rgb}{0.98, 0.98, 0.99} % <- code listing +% -> draw textbook-style frames +\usepackage{mdframed} +% <- frames \begin{document} \begin{preliminary} @@ -64,12 +68,7 @@ \date{\today} \abstract{ -This skeleton demonstrates how to use the \texttt{infthesis} style for -undergraduate dissertations in the School of Informatics. It also emphasises the -page limit, and that you must not deviate from the required style. -The file \texttt{skeleton.tex} generates this document and should be used as a -starting point for your thesis. Replace this abstract text with a concise -summary of your report. + \textcolor{red}{To be done\dots} } \maketitle @@ -339,13 +338,14 @@ Using these definitions, a vendor could build \textit{heterogeneous} and \textit \end{definition} \subsection{ARMv8-A Software Cache Coherence in Linux Kernel} +\label{subsec:armv8a-swcoherency} Because of the lack of hardware guarantee on hardware DMA coherency (though such support exists \cite{Parris.AMBA_4_ACE-Lite.2013}), programmers need to invoke architecture-specific cache-coherency instructions when porting DMA hardware support over a diverse range of ARMv8 microarchitectures, often encapsulated in problem-specific subroutines. Notably, kernel (driver) programming warrants programmer attention to software-maintained coherency when userspace programmers downstream expect data-flow, interspersed between CPU and DMA operations, to follow program ordering and (driver vendor) specifications. One such example arises in the Linux kernel implementation of DMA memory management API \cite{Miller_Henderson_Jelinek.Kernelv6.7-DMA_guide.2024}\footnote[1]{Based on Linux kernel v6.7.0.}: \begin{definition}[DMA Mappings] The Linux kernel DMA memory allocation API, imported via - \begin{minted}[linenos]{c} + \begin{minted}[linenos, bgcolor=code-bg]{c} #include \end{minted} defines two variants of DMA mappings: @@ -370,7 +370,7 @@ Notably, kernel (driver) programming warrants programmer attention to software-m Consistent DMA mappings could be trivially created via allocating non-cacheable memory, which guarantees \textit{PoC} for all memory observers (though system-specific fastpaths exist). On the other hand, streaming DMA mappings require manual synchronization upon programmed CPU/DMA access. Take single-buffer synchronization on CPU after DMA access for example: -\begin{minted}[linenos, mathescape]{c} +\begin{minted}[linenos, mathescape, bgcolor=code-bg]{c} /* In kernel/dma/mapping.c $\label{code:dma_sync_single_for_cpu}$*/ void dma_sync_single_for_cpu( struct device *dev, // kernel repr for DMA device @@ -386,11 +386,11 @@ void dma_sync_single_for_cpu( arch_sync_dma_for_cpu_all(); // MIPS quirks... } - /* Miscellaneous cases... */ + /* Miscellaneous cases...*/ } \end{minted} -\begin{minted}[linenos]{c} +\begin{minted}[linenos, mathescape, bgcolor=code-bg]{c} /* In arch/arm64/mm/dma-mapping.c */ void arch_sync_dma_for_cpu( phys_addr_t paddr, @@ -411,7 +411,7 @@ void arch_sync_dma_for_cpu( This call-chain, as well as its mirror case which maintains cache coherency for the DMA device after CPU access: \mint[breaklines=true]{c}|dma_sync_single_for_device(struct device *, dma_addr_t, size_t, enum dma_data_direction)|, call into the following procedures, respectively: -\begin{minted}[linenos]{c} +\begin{minted}[linenos, mathescape, bgcolor=code-bg]{c} /* Exported @ arch/arm64/include/asm/cacheflush.h */ /* Defined @ arch/arm64/mm/cache.S */ /* All functions accept virtual start, end addresses. */ @@ -425,7 +425,7 @@ extern void dcache_inval_poc( unsigned long start, unsigned long end ); -/* Clean data cache region [start, end) to PoC. +/* Clean data cache region [start, end) to PoC. $\ref{code:dcache_clean_poc}$ * * Write-back CPU cache entries that intersect with [start, end), * such that data from CPU becomes visible to external writers. @@ -438,7 +438,7 @@ extern void dcache_clean_poc( \subsubsection{Addendum: \texttt{enum dma\_data\_direction}} The Linux kernel defines 4 direction \texttt{enum} values for fine-tuning synchronization behaviors: -\begin{minted}[linenos]{c} +\begin{minted}[linenos, bgcolor=code-bg]{c} /* In include/linux/dma-direction.h */ enum dma_data_direction { DMA_BIDIRECTION = 0, // data transfer direction uncertain. @@ -452,25 +452,21 @@ These values allow for certain fast-paths to be taken at runtime. For example, \ % TODO: Move to addendum section. \subsubsection{Use-case: Kernel-space \textit{SMBDirect} Driver} -\textit{SMBDirect} is an extension of the \textit{SMB} (\textit{Server Message Block}) protocol for opportunistically establishing the communication protocol over RDMA-capable network interfaces \cite{many.MSFTLearn-SMBDirect.2024}. +An example of cache-coherent in-kernel RDMA networking module over heterogeneous ISAs could be found in the Linux implementation of \textit{SMBDirect}. \textit{SMBDirect} is an extension of the \textit{SMB} (\textit{Server Message Block}) protocol for opportunistically establishing the communication protocol over RDMA-capable network interfaces \cite{many.MSFTLearn-SMBDirect.2024}. We focus on two procedures inside the in-kernel SMBDirect implementation: \paragraph{Before send: \texttt{smbd\_post\_send}} -\begin{minted}[linenos]{c} +\texttt{smbd\_post\_send} is a function downstream of the call-chain of \texttt{smbd\_send}, which sends SMBDirect payload for transport over network. Payloads are constructed and batched for maximized bandwidth, then \texttt{smbd\_post\_send} is called to signal the RDMA NIC for transport. + +The function body is roughly as follows: +\begin{minted}[linenos, mathescape, bgcolor=code-bg]{c} /* In fs/smb/client/smbdirect.c */ static int smbd_post_send( struct smbd_connection *info, // SMBDirect transport context struct smbd_request *request, // SMBDirect request context -) // ... -\end{minted} - -Downstream of \texttt{smbd\_send}, which sends SMBDirect payload for transport over network. Payloads are constructed and batched for maximized bandwidth, then \texttt{smbd\_post\_send} is called to signal the RDMA NIC for transport. - -The function body is roughly as follows: -\begin{minted}[linenos, firstnumber=last, mathescape]{c} -{ - struct ib_send_wr send_wr; // "Write Request" for entire payload +) { + struct ib_send_wr send_wr; // Ib "Write Request" for payload int rc, i; /* For each message in batched payload */ @@ -503,18 +499,16 @@ The function body is roughly as follows: Line \ref{code:ib_dma_sync_single_for_device} writes back CPU cache lines to be visible for RDMA NIC in preparation for DMA operations when the posted \textit{send request} is worked upon. \paragraph{Upon reception: \texttt{recv\_done}} -\begin{minted}[linenos]{c} +\texttt{recv\_done} is called when the RDMA subsystem works on the received payload over RDMA. + +Mirroring the case for \texttt{smbd\_post\_send}, it invalidates CPU cache lines for DMA-ed data to be visible at CPU cores prior to any operations on received data: + +\begin{minted}[linenos, mathescape, bgcolor=code-bg]{c} /* In fs/smb/client/smbdirect.c */ static void recv_done( struct ib_cq *cq, // "Completion Queue" struct ib_wc *wc, // "Work Completion" -) // ... -\end{minted} - -Called when the RDMA subsystem works on the received payload over RDMA. Mirroring the case for \texttt{smbd\_post\_send}, it invalidates CPU cache lines for DMA-ed data to be visible at CPU cores prior to any operations on received data: - -\begin{minted}[linenos, firstnumber=last, mathescape]{c} -{ +) { struct smbd_data_transfer *data_transfer; struct smbd_response *response = container_of( wc->wr_cqe, // ptr: pointer to member @@ -539,9 +533,83 @@ Called when the RDMA subsystem works on the received payload over RDMA. Mirrorin \end{minted} \chapter{Software Coherency Latency} +Coherency must be maintained at software level when hardware cache coherency cannot be guaranteed for some specific ISA (as established in subsection \ref{subsec:armv8a-swcoherency}). There is, therefore, interest in knowing the latency of coherence-maintenance operations for performance engineering purposes, for example OS jitter analysis for scientific computing in heterogeneous clusters and, more pertinently, comparative analysis between software and hardware-backed DSM systems (e.g. \cites{Masouros_etal.Adrias.2023}{Wang_etal.Concordia.2021}). + +The purpose of this chapter is hence to provide a statistical analysis over software coherency latency in ARM64 systems by instrumenting hypothetical scenarios of software-initiated coherence maintenance in ARM64 test-benches. + +The rest of the chapter is structured as follows: +\begin{itemize} + \item { + \hyperref[sec:sw-coherency-setup]{\textbf{Experiment Setup}} covers the test-benches used for instrumentation, including the kernel version, distribution, and the specifications of the instrumented (bare-metal/virtual) machine. + } + \item { + \hyperref[sec:sw-coherency-method]{\textbf{Methodology}} covers the kernel module and workload used for instrumentation and experimentation, including changes made to the kernel, the kernel module, and userspace programs used for experimentation. + } + \item { + \hyperref[sec:sw-coherency-results]{\textbf{Results}} covers the results gathered during instrumentation from various test-benches, segmented by experiment. + } + \item { + \hyperref[sec:sw-coherency-discuss]{\textbf{Discussion}} identifies key insights from experimental results, as well as deficiencies in research method and possible directions of future works. + } +\end{itemize} + +\section{Experiment Setup}\label{sec:sw-coherency-setup} +\subsection{QEMU-over-x86: \texttt{star}} +The primary source of experimental data come from a virtualized machine: a virtualized guest running a lightly-customized Linux v6.7.0 preemptive kernel with standard non-graphical Debian 12 distribution installed to provide userspace support. The specifics of this QEMU-emulated ARM64 test-bench, running atop of an x86-64 host PC, is at \ref{table:2}. + +\begin{table}[h] + \centering + \begin{tabular}{|c|c|} + \hline + Processors & 3x QEMU virt-8.2 (2-way SMT; emulates Cortex-A76) \\ + \hline + CPU Flags & + \begin{tabular}{@{}cccccc@{}} + % 1 2 3 4 5 6 + fp & asimd & evtstrm & aes & pmull & sha1 \\ + sha2 & crc32 & atomics & fphp & asimdhp & cpuid \\ + asimdrdm & lrcpc & dcpop & asimddp & & \\ + \end{tabular} \\ + \hline + NUMA Nodes & 1: $\{P_0, \dots, P_5\}$ \\ + \hline + Memory & 4GiB \\ + \hline + \end{tabular} + \caption{Specification of \texttt{star}} + \label{table:2} +\end{table} + +\begin{table}[h] + \centering + \begin{tabular}{|c|c|} + \hline + \end{tabular} + \caption{Specification of Host} + \label{table:3} +\end{table} + +\subsection{\textit{Neoverse N1}: \texttt{rose}} +% - QEMU-over-x86; preemptive-on-preemptive +% - Native server-ready ARM64 (preemptive), which I didn't run for long ngl + +\section{Methodology}\label{sec:sw-coherency-method} +\subsection{Exporting \texttt{dcache\_clean\_poc}} +\subsection{Kernel Module: \texttt{my\_shmem}} +\subsection{Instrumentation: \texttt{ftrace} and \textit{eBPF}} +\subsection{Userspace Programs} + +\section{Results}\label{sec:sw-coherency-results} +\subsection{Controlled Allocation Size; Variable Page Count} +\subsection{Controlled Page Count; Variable Allocation Size} + +\section{Discussion}\label{sec:sw-coherency-discuss} +% - you should also measure the access latency after coherency operation, though this is impl-specific (e.g., one vendor can have a simple PoC mechanism where e.g. you have a shared L2-cache that is snooped by DMA engine, hence flush to L2-cache and call it a day for PoC; but another can just as well call main mem the PoC, dep. on impl.) \chapter{DSM System Design} +\chapter{Summary} + % \bibliographystyle{plain} % \bibliographystyle{plainnat} % \bibliography{mybibfile}