Can't remember... whatever rollback is possible anyways
This commit is contained in:
parent
15649fa0c7
commit
1fbefd49f0
6 changed files with 234 additions and 5 deletions
36
.vscode-ctags
Normal file
36
.vscode-ctags
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
!_TAG_EXTRA_DESCRIPTION anonymous /Include tags for non-named objects like lambda/
|
||||
!_TAG_EXTRA_DESCRIPTION fileScope /Include tags of file scope/
|
||||
!_TAG_EXTRA_DESCRIPTION pseudo /Include pseudo tags/
|
||||
!_TAG_EXTRA_DESCRIPTION subparser /Include tags generated by subparsers/
|
||||
!_TAG_FIELD_DESCRIPTION epoch /the last modified time of the input file (only for F\/file kind tag)/
|
||||
!_TAG_FIELD_DESCRIPTION file /File-restricted scoping/
|
||||
!_TAG_FIELD_DESCRIPTION input /input file/
|
||||
!_TAG_FIELD_DESCRIPTION name /tag name/
|
||||
!_TAG_FIELD_DESCRIPTION pattern /pattern/
|
||||
!_TAG_FIELD_DESCRIPTION typeref /Type and name of a variable or typedef/
|
||||
!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/
|
||||
!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/
|
||||
!_TAG_KIND_DESCRIPTION!C d,macro /macro definitions/
|
||||
!_TAG_KIND_DESCRIPTION!C e,enumerator /enumerators (values inside an enumeration)/
|
||||
!_TAG_KIND_DESCRIPTION!C f,function /function definitions/
|
||||
!_TAG_KIND_DESCRIPTION!C g,enum /enumeration names/
|
||||
!_TAG_KIND_DESCRIPTION!C h,header /included header files/
|
||||
!_TAG_KIND_DESCRIPTION!C m,member /struct, and union members/
|
||||
!_TAG_KIND_DESCRIPTION!C s,struct /structure names/
|
||||
!_TAG_KIND_DESCRIPTION!C t,typedef /typedefs/
|
||||
!_TAG_KIND_DESCRIPTION!C u,union /union names/
|
||||
!_TAG_KIND_DESCRIPTION!C v,variable /variable definitions/
|
||||
!_TAG_OUTPUT_EXCMD mixed /number, pattern, mixed, or combineV2/
|
||||
!_TAG_OUTPUT_FILESEP slash /slash or backslash/
|
||||
!_TAG_OUTPUT_MODE u-ctags /u-ctags or e-ctags/
|
||||
!_TAG_OUTPUT_VERSION 0.0 /current.age/
|
||||
!_TAG_PARSER_VERSION!C 0.0 /current.age/
|
||||
!_TAG_PATTERN_LENGTH_LIMIT 96 /0 for no limit/
|
||||
!_TAG_PROC_CWD /home/rubberhead/Git/00-UOE/unnamed_ba_thesis/ //
|
||||
!_TAG_PROGRAM_AUTHOR Universal Ctags Team //
|
||||
!_TAG_PROGRAM_NAME Universal Ctags /Derived from Exuberant Ctags/
|
||||
!_TAG_PROGRAM_URL https://ctags.io/ /official site/
|
||||
!_TAG_PROGRAM_VERSION 6.0.0 /p6.0.20221218.0/
|
||||
!_TAG_ROLE_DESCRIPTION!C!header local /local header/
|
||||
!_TAG_ROLE_DESCRIPTION!C!header system /system header/
|
||||
!_TAG_ROLE_DESCRIPTION!C!macro undef /undefined/
|
||||
22
tex/main.bib
22
tex/main.bib
|
|
@ -158,3 +158,25 @@ abstract = {In this paper we describe the way thread migration can be carried in
|
|||
publisher={VLDB Endowment}
|
||||
}
|
||||
|
||||
@inproceedings{shan2017distributed,
|
||||
title={Distributed shared persistent memory},
|
||||
author={Shan, Yizhou and Tsai, Shin-Yeh and Zhang, Yiying},
|
||||
booktitle={Proceedings of the 2017 Symposium on Cloud Computing},
|
||||
pages={323--337},
|
||||
year={2017}
|
||||
}
|
||||
|
||||
@inproceedings{EndoWataru2020MADD,
|
||||
abstract = {The spread of RDMA-capable interconnects on supercomputers has enabled the middleware developers to explore new design options for runtime systems based on efficient communications. Observing low-latency networks and shared-memory infrastructure for multi-core processors, we have focused on extending shared-memory abstraction into multiple nodes exploiting RDMA, i.e., Distributed Shared Memory (DSM). We have found that the traditional protocols of DSM designed for two-sided communications cannot fully exploit the performance of RDMA, which necessitates decentralization and coarse-grained communications. To solve this problem, we introduced two methods for the DSM coherence protocol to exploit RDMA and implemented a DSM library MENPS using this protocol. Our evaluation shows that MENPS could accelerate two of five shared-memory applications with minimal modifications and beat an existing RDMA-based DSM runtime.},
|
||||
author = {Endo, Wataru and Sato, Shigeyuki and Taura, Kenjiro},
|
||||
address = {LOS ALAMITOS},
|
||||
booktitle = {2020 IEEE/ACM Fourth Annual Workshop on Emerging Parallel and Distributed Runtime Systems and Middleware (IPDRM)},
|
||||
isbn = {1665422769},
|
||||
keywords = {cache coherence protocol ; coarse-grained communications ; Coherence ; Computer Science ; Computer Science, Hardware & Architecture ; Computer Science, Software Engineering ; Computer Science, Theory & Methods ; decentralized distributed shared memory ; design options ; distributed shared memory ; distributed shared memory systems ; DSM coherence protocol ; DSM library MENPS ; efficient communications ; existing RDMA-based DSM runtime ; home migration ; Libraries ; Merging ; message passing ; middleware ; middleware developers ; multicore processors ; multiple nodes ; Program processors ; protocols ; RDMA ; RDMA-capable interconnects ; Runtime ; runtime systems ; Science & Technology ; shared memory systems ; shared-memory abstraction ; shared-memory applications ; shared-memory infrastructure ; Synchronization ; Technology ; timestamp based coherence ; traditional protocols ; two-sided communications},
|
||||
language = {eng},
|
||||
organization = {IEEE Comp Soc},
|
||||
pages = {9-16},
|
||||
publisher = {IEEE},
|
||||
title = {MENPS: A Decentralized Distributed Shared Memory Exploiting RDMA},
|
||||
year = {2020},
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,3 +1,4 @@
|
|||
% Yeah "slices" whatever lol
|
||||
\documentclass{beamer}
|
||||
\usepackage[style=authortitle-comp]{biblatex}
|
||||
\usepackage[export]{adjustbox}
|
||||
|
|
@ -12,6 +13,129 @@
|
|||
% Title page
|
||||
\frame{\titlepage}
|
||||
|
||||
% Page -2
|
||||
\begin{frame}
|
||||
\frametitle{
|
||||
Literature Review: (Shan, Tsai, \& Zhang. 2017\footcite{shan2017distributed})
|
||||
}
|
||||
\begin{itemize}
|
||||
\item {
|
||||
Concerns with the sharing of persistent memory --
|
||||
\begin{itemize}
|
||||
\item More or less similar to sharing regular memory, but\dots
|
||||
\item Data replication is key $\Rightarrow$ Multiple data provider.
|
||||
\end{itemize}
|
||||
}
|
||||
\item {
|
||||
Supports both Multi-Writer Multi-Reader and Multi-Writer Single-Writer Protocols
|
||||
\begin{itemize}
|
||||
\item MRMW ``support(s) great parallelism''
|
||||
\item MRSW enables ``stronger consistency''
|
||||
\end{itemize}
|
||||
}
|
||||
\item {
|
||||
Makes distinction between 3 variants of nodes:
|
||||
\begin{itemize}
|
||||
\item Commit Node -- Node who wishes to commit changes wrt. the system.
|
||||
\item Owner Node -- Node(s) who act as data provider for latest page content.
|
||||
\item Manager Node -- Node who provide (serialized) write access control to page.
|
||||
\end{itemize}
|
||||
}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{
|
||||
Literature Review: (Shan, Tsai, \& Zhang. 2017\footcite{shan2017distributed})
|
||||
}
|
||||
\begin{itemize}
|
||||
\item {
|
||||
For data replication and fault tolerance, necessitates:
|
||||
\begin{enumerate}
|
||||
\item Commit status logging (akin to journaled file system)
|
||||
\item Persistent Commit ID
|
||||
\item \textbf{Required} deg. of replication -- each ON shares to $N$ nodes.
|
||||
\end{enumerate}
|
||||
}
|
||||
\item {
|
||||
Fault tolerance is out of this thesis's scope. However\dots
|
||||
\begin{itemize}
|
||||
\item Prob. no need for requiring any degree of data replication.
|
||||
\item Dropping data replication req. $\Rightarrow$ no need for replication comms.
|
||||
\item Commit status logging \& persistent CID can be helpful \& should not introduce additional comms.
|
||||
\end{itemize}
|
||||
}
|
||||
\item {
|
||||
MRSW provides ``simpler and more efficient'' commits than MRMW -- no concurrent
|
||||
commits to same shared memory object exists.
|
||||
\begin{itemize}
|
||||
\item Also makes more sense from a CPU-accelerator dichotomy outlook (ofc. wrt. this thesis's system).
|
||||
\end{itemize}
|
||||
}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{MRSW: (Shan, Tsai, \& Zhang. 2017\footcite{shan2017distributed})}
|
||||
\begin{figure}
|
||||
\includegraphics[width=\linewidth]{w12_slides_resources/dspm.fig8.png}
|
||||
\end{figure}
|
||||
Note: CN: Node 1, MN: Node 2, ON: Node 2 \& 3. Node 4 may or may not already
|
||||
share the committed page prior to acquire.
|
||||
\end{frame}
|
||||
|
||||
% Page 0
|
||||
\begin{frame}
|
||||
\frametitle{Literature Review: (Ramesh. 2023)}
|
||||
\begin{itemize}
|
||||
\item Popcorn-derived.
|
||||
\item {
|
||||
Sequential consistency, MRSW protocol offloaded onto sNIC:
|
||||
\begin{itemize}
|
||||
\item DSM protocol processor implemented on sNIC FPGA core.
|
||||
\item sNIC \textbf{keeps track of memory ownership, status, R/W permissions} at page level granularity.
|
||||
\item Removes the need for distinct memory management nodes.
|
||||
\item (i.e., the sNIC IS the memory management node -- except of course allocation).
|
||||
\end{itemize}
|
||||
}
|
||||
\item {
|
||||
Similar idea occurred in \textit{Concordia}\footcite{wang2021concordia}:
|
||||
\begin{itemize}
|
||||
\item Concurrency control and multicast offloaded to network switch.
|
||||
\item Authors claim this is more scalable (?)
|
||||
\end{itemize}
|
||||
}
|
||||
\end{itemize}
|
||||
\footnote{
|
||||
Ramesh., ``SNIC-DSM: SmartNIC based DSM Infrastructure for Heterogeneous-ISA Machines''
|
||||
}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Literature Review: (Endo, Sato, \& Taura. 2020)\footcite{EndoWataru2020MADD}}
|
||||
\begin{itemize}
|
||||
\item MRMW: use timestamps to store reader ``intervals''.
|
||||
\item {
|
||||
Introduces the home-migration concept:
|
||||
\begin{itemize}
|
||||
\item At commit, make the CN the home node instead of invalidating the home node.
|
||||
\item This removes communications needed for diff-merging at home node -- this can be done locally.
|
||||
\item No support for multiple home nodes.
|
||||
\end{itemize}
|
||||
}
|
||||
\item {
|
||||
No performance improvement over PGAS programming framework (OpenMPI).
|
||||
}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Literature Review: (Endo, Sato, \& Taura. 2020)\footcite{EndoWataru2020MADD}}
|
||||
\begin{figure}
|
||||
\includegraphics[width=\linewidth]{w12_slides_resources/menps.fig5.png}
|
||||
\end{figure}
|
||||
\end{frame}
|
||||
|
||||
% Page 1
|
||||
\begin{frame}
|
||||
\frametitle{The System}
|
||||
|
|
@ -97,7 +221,8 @@
|
|||
w12_slides_resources/Fig-RwLockProtocol 2023-12-04 21_03_50.pdf
|
||||
}
|
||||
\end{figure}
|
||||
Note: The blue arrow should be acknowledged by P3 -- forgot to put the ack. arrow in.
|
||||
Note: The blue arrow should be acknowledged via commit by P3 to P1 --
|
||||
forgot to put the ack. arrow in.
|
||||
\end{frame}
|
||||
|
||||
% Page 5
|
||||
|
|
@ -124,6 +249,8 @@
|
|||
i.e., Instead of write-invalidate, perform acquire-invalidate.
|
||||
}
|
||||
\end{itemize}
|
||||
|
||||
This may require pages to be marked as CoW if the sharer wants also to act as a home node.
|
||||
\end{frame}
|
||||
|
||||
% Page 6
|
||||
|
|
@ -134,11 +261,13 @@
|
|||
\item {
|
||||
Multi-home Protocol: instead of having one home at a time, have
|
||||
multiple homes (e.g., when writer commits) to prevent network bottleneck.
|
||||
\begin{itemize}
|
||||
\item Extra metadata can limit scalability (e.g., granularity of directories)
|
||||
\end{itemize}
|
||||
}
|
||||
\item {
|
||||
Auto-share: Mark pages shared via \texttt{/dev/rshm} as automatically
|
||||
shared to some remote nodes such that 1-way communications suffice to
|
||||
re-validate invalidated pages.
|
||||
Auto-share: Automatically share pages at commit time using 1-way
|
||||
communications.
|
||||
\begin{itemize}
|
||||
\item Potential for communication reduction -- debatable.
|
||||
\end{itemize}
|
||||
|
|
@ -146,6 +275,48 @@
|
|||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Why this design?}
|
||||
\begin{itemize}
|
||||
\item Largely inspired by DSPM\footcite{shan2017distributed}.
|
||||
\item Removed arrows for enforced data duplication -- duplication is solely on-demand.
|
||||
\item {
|
||||
Introduces transitional state ``T'':
|
||||
\begin{itemize}
|
||||
\item Used to flag a page as unserviceable -- visible only at MN.
|
||||
\item All read/write access to T-page is kept on hold until MN receives commit msg.
|
||||
\item After commit, MN forwards queued R/W access to moved home.
|
||||
\item This (at least) maintains RAW, WAW data dependency for whichever issue serialization.
|
||||
\item Removing T allows stale data to be served -- violates RAW for better throughput.
|
||||
\end{itemize}
|
||||
}
|
||||
\item Extensible (as mentioned in prior page).
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Why not this design?}
|
||||
At the very least\dots
|
||||
\begin{itemize}
|
||||
\item {
|
||||
De-coupled home and access-management nodes require:
|
||||
\begin{itemize}
|
||||
\item Each home node need to be MN-aware (easy).
|
||||
\item {
|
||||
MN need to be home-aware (also easy with single-writer, but spatial complexity is a concern):
|
||||
\begin{itemize}
|
||||
\item Naive directory scheme is not scalable.
|
||||
\item Coarse directory scheme (e.g., SGI Origin 2000) is wasteful (but may be the fastest in practice).
|
||||
\item Distributed directory scheme may provide terrible latency.
|
||||
\item More sophisticated schemes are possible but needs work \& experimentation.
|
||||
\end{itemize}
|
||||
}
|
||||
\end{itemize}
|
||||
}
|
||||
\item Strict consistency limits throughput.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Page 7
|
||||
\begin{frame}
|
||||
\frametitle{What about Consistency \textbf{Model}?}
|
||||
|
|
@ -155,7 +326,7 @@
|
|||
\begin{itemize}
|
||||
\item {
|
||||
Weak ordering architectures (e.g., ARMv8) more or less depends on
|
||||
compiler/interpreter to emit barriers as see fit \cite{Haynes_2022}.
|
||||
compiler/interpreter to emit barriers as see fit \footcite{Haynes_2022}.
|
||||
}
|
||||
\item {
|
||||
Bad for usability/portability -- programs may need
|
||||
|
|
|
|||
BIN
tex/misc/w12_slides_resources/dspm.fig8.png
Normal file
BIN
tex/misc/w12_slides_resources/dspm.fig8.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 136 KiB |
BIN
tex/misc/w12_slides_resources/menps.fig5.png
Normal file
BIN
tex/misc/w12_slides_resources/menps.fig5.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 204 KiB |
Loading…
Add table
Add a link
Reference in a new issue