Can't remember... whatever rollback is possible anyways

This commit is contained in:
Zhengyi Chen 2023-12-06 02:20:03 +00:00
parent 15649fa0c7
commit 1fbefd49f0
6 changed files with 234 additions and 5 deletions

36
.vscode-ctags Normal file
View file

@ -0,0 +1,36 @@
!_TAG_EXTRA_DESCRIPTION anonymous /Include tags for non-named objects like lambda/
!_TAG_EXTRA_DESCRIPTION fileScope /Include tags of file scope/
!_TAG_EXTRA_DESCRIPTION pseudo /Include pseudo tags/
!_TAG_EXTRA_DESCRIPTION subparser /Include tags generated by subparsers/
!_TAG_FIELD_DESCRIPTION epoch /the last modified time of the input file (only for F\/file kind tag)/
!_TAG_FIELD_DESCRIPTION file /File-restricted scoping/
!_TAG_FIELD_DESCRIPTION input /input file/
!_TAG_FIELD_DESCRIPTION name /tag name/
!_TAG_FIELD_DESCRIPTION pattern /pattern/
!_TAG_FIELD_DESCRIPTION typeref /Type and name of a variable or typedef/
!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/
!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/
!_TAG_KIND_DESCRIPTION!C d,macro /macro definitions/
!_TAG_KIND_DESCRIPTION!C e,enumerator /enumerators (values inside an enumeration)/
!_TAG_KIND_DESCRIPTION!C f,function /function definitions/
!_TAG_KIND_DESCRIPTION!C g,enum /enumeration names/
!_TAG_KIND_DESCRIPTION!C h,header /included header files/
!_TAG_KIND_DESCRIPTION!C m,member /struct, and union members/
!_TAG_KIND_DESCRIPTION!C s,struct /structure names/
!_TAG_KIND_DESCRIPTION!C t,typedef /typedefs/
!_TAG_KIND_DESCRIPTION!C u,union /union names/
!_TAG_KIND_DESCRIPTION!C v,variable /variable definitions/
!_TAG_OUTPUT_EXCMD mixed /number, pattern, mixed, or combineV2/
!_TAG_OUTPUT_FILESEP slash /slash or backslash/
!_TAG_OUTPUT_MODE u-ctags /u-ctags or e-ctags/
!_TAG_OUTPUT_VERSION 0.0 /current.age/
!_TAG_PARSER_VERSION!C 0.0 /current.age/
!_TAG_PATTERN_LENGTH_LIMIT 96 /0 for no limit/
!_TAG_PROC_CWD /home/rubberhead/Git/00-UOE/unnamed_ba_thesis/ //
!_TAG_PROGRAM_AUTHOR Universal Ctags Team //
!_TAG_PROGRAM_NAME Universal Ctags /Derived from Exuberant Ctags/
!_TAG_PROGRAM_URL https://ctags.io/ /official site/
!_TAG_PROGRAM_VERSION 6.0.0 /p6.0.20221218.0/
!_TAG_ROLE_DESCRIPTION!C!header local /local header/
!_TAG_ROLE_DESCRIPTION!C!header system /system header/
!_TAG_ROLE_DESCRIPTION!C!macro undef /undefined/

View file

@ -158,3 +158,25 @@ abstract = {In this paper we describe the way thread migration can be carried in
publisher={VLDB Endowment} publisher={VLDB Endowment}
} }
@inproceedings{shan2017distributed,
title={Distributed shared persistent memory},
author={Shan, Yizhou and Tsai, Shin-Yeh and Zhang, Yiying},
booktitle={Proceedings of the 2017 Symposium on Cloud Computing},
pages={323--337},
year={2017}
}
@inproceedings{EndoWataru2020MADD,
abstract = {The spread of RDMA-capable interconnects on supercomputers has enabled the middleware developers to explore new design options for runtime systems based on efficient communications. Observing low-latency networks and shared-memory infrastructure for multi-core processors, we have focused on extending shared-memory abstraction into multiple nodes exploiting RDMA, i.e., Distributed Shared Memory (DSM). We have found that the traditional protocols of DSM designed for two-sided communications cannot fully exploit the performance of RDMA, which necessitates decentralization and coarse-grained communications. To solve this problem, we introduced two methods for the DSM coherence protocol to exploit RDMA and implemented a DSM library MENPS using this protocol. Our evaluation shows that MENPS could accelerate two of five shared-memory applications with minimal modifications and beat an existing RDMA-based DSM runtime.},
author = {Endo, Wataru and Sato, Shigeyuki and Taura, Kenjiro},
address = {LOS ALAMITOS},
booktitle = {2020 IEEE/ACM Fourth Annual Workshop on Emerging Parallel and Distributed Runtime Systems and Middleware (IPDRM)},
isbn = {1665422769},
keywords = {cache coherence protocol ; coarse-grained communications ; Coherence ; Computer Science ; Computer Science, Hardware & Architecture ; Computer Science, Software Engineering ; Computer Science, Theory & Methods ; decentralized distributed shared memory ; design options ; distributed shared memory ; distributed shared memory systems ; DSM coherence protocol ; DSM library MENPS ; efficient communications ; existing RDMA-based DSM runtime ; home migration ; Libraries ; Merging ; message passing ; middleware ; middleware developers ; multicore processors ; multiple nodes ; Program processors ; protocols ; RDMA ; RDMA-capable interconnects ; Runtime ; runtime systems ; Science & Technology ; shared memory systems ; shared-memory abstraction ; shared-memory applications ; shared-memory infrastructure ; Synchronization ; Technology ; timestamp based coherence ; traditional protocols ; two-sided communications},
language = {eng},
organization = {IEEE Comp Soc},
pages = {9-16},
publisher = {IEEE},
title = {MENPS: A Decentralized Distributed Shared Memory Exploiting RDMA},
year = {2020},
}

Binary file not shown.

View file

@ -1,3 +1,4 @@
% Yeah "slices" whatever lol
\documentclass{beamer} \documentclass{beamer}
\usepackage[style=authortitle-comp]{biblatex} \usepackage[style=authortitle-comp]{biblatex}
\usepackage[export]{adjustbox} \usepackage[export]{adjustbox}
@ -12,6 +13,129 @@
% Title page % Title page
\frame{\titlepage} \frame{\titlepage}
% Page -2
\begin{frame}
\frametitle{
Literature Review: (Shan, Tsai, \& Zhang. 2017\footcite{shan2017distributed})
}
\begin{itemize}
\item {
Concerns with the sharing of persistent memory --
\begin{itemize}
\item More or less similar to sharing regular memory, but\dots
\item Data replication is key $\Rightarrow$ Multiple data provider.
\end{itemize}
}
\item {
Supports both Multi-Writer Multi-Reader and Multi-Writer Single-Writer Protocols
\begin{itemize}
\item MRMW ``support(s) great parallelism''
\item MRSW enables ``stronger consistency''
\end{itemize}
}
\item {
Makes distinction between 3 variants of nodes:
\begin{itemize}
\item Commit Node -- Node who wishes to commit changes wrt. the system.
\item Owner Node -- Node(s) who act as data provider for latest page content.
\item Manager Node -- Node who provide (serialized) write access control to page.
\end{itemize}
}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{
Literature Review: (Shan, Tsai, \& Zhang. 2017\footcite{shan2017distributed})
}
\begin{itemize}
\item {
For data replication and fault tolerance, necessitates:
\begin{enumerate}
\item Commit status logging (akin to journaled file system)
\item Persistent Commit ID
\item \textbf{Required} deg. of replication -- each ON shares to $N$ nodes.
\end{enumerate}
}
\item {
Fault tolerance is out of this thesis's scope. However\dots
\begin{itemize}
\item Prob. no need for requiring any degree of data replication.
\item Dropping data replication req. $\Rightarrow$ no need for replication comms.
\item Commit status logging \& persistent CID can be helpful \& should not introduce additional comms.
\end{itemize}
}
\item {
MRSW provides ``simpler and more efficient'' commits than MRMW -- no concurrent
commits to same shared memory object exists.
\begin{itemize}
\item Also makes more sense from a CPU-accelerator dichotomy outlook (ofc. wrt. this thesis's system).
\end{itemize}
}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{MRSW: (Shan, Tsai, \& Zhang. 2017\footcite{shan2017distributed})}
\begin{figure}
\includegraphics[width=\linewidth]{w12_slides_resources/dspm.fig8.png}
\end{figure}
Note: CN: Node 1, MN: Node 2, ON: Node 2 \& 3. Node 4 may or may not already
share the committed page prior to acquire.
\end{frame}
% Page 0
\begin{frame}
\frametitle{Literature Review: (Ramesh. 2023)}
\begin{itemize}
\item Popcorn-derived.
\item {
Sequential consistency, MRSW protocol offloaded onto sNIC:
\begin{itemize}
\item DSM protocol processor implemented on sNIC FPGA core.
\item sNIC \textbf{keeps track of memory ownership, status, R/W permissions} at page level granularity.
\item Removes the need for distinct memory management nodes.
\item (i.e., the sNIC IS the memory management node -- except of course allocation).
\end{itemize}
}
\item {
Similar idea occurred in \textit{Concordia}\footcite{wang2021concordia}:
\begin{itemize}
\item Concurrency control and multicast offloaded to network switch.
\item Authors claim this is more scalable (?)
\end{itemize}
}
\end{itemize}
\footnote{
Ramesh., ``SNIC-DSM: SmartNIC based DSM Infrastructure for Heterogeneous-ISA Machines''
}
\end{frame}
\begin{frame}
\frametitle{Literature Review: (Endo, Sato, \& Taura. 2020)\footcite{EndoWataru2020MADD}}
\begin{itemize}
\item MRMW: use timestamps to store reader ``intervals''.
\item {
Introduces the home-migration concept:
\begin{itemize}
\item At commit, make the CN the home node instead of invalidating the home node.
\item This removes communications needed for diff-merging at home node -- this can be done locally.
\item No support for multiple home nodes.
\end{itemize}
}
\item {
No performance improvement over PGAS programming framework (OpenMPI).
}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Literature Review: (Endo, Sato, \& Taura. 2020)\footcite{EndoWataru2020MADD}}
\begin{figure}
\includegraphics[width=\linewidth]{w12_slides_resources/menps.fig5.png}
\end{figure}
\end{frame}
% Page 1 % Page 1
\begin{frame} \begin{frame}
\frametitle{The System} \frametitle{The System}
@ -97,7 +221,8 @@
w12_slides_resources/Fig-RwLockProtocol 2023-12-04 21_03_50.pdf w12_slides_resources/Fig-RwLockProtocol 2023-12-04 21_03_50.pdf
} }
\end{figure} \end{figure}
Note: The blue arrow should be acknowledged by P3 -- forgot to put the ack. arrow in. Note: The blue arrow should be acknowledged via commit by P3 to P1 --
forgot to put the ack. arrow in.
\end{frame} \end{frame}
% Page 5 % Page 5
@ -124,6 +249,8 @@
i.e., Instead of write-invalidate, perform acquire-invalidate. i.e., Instead of write-invalidate, perform acquire-invalidate.
} }
\end{itemize} \end{itemize}
This may require pages to be marked as CoW if the sharer wants also to act as a home node.
\end{frame} \end{frame}
% Page 6 % Page 6
@ -134,11 +261,13 @@
\item { \item {
Multi-home Protocol: instead of having one home at a time, have Multi-home Protocol: instead of having one home at a time, have
multiple homes (e.g., when writer commits) to prevent network bottleneck. multiple homes (e.g., when writer commits) to prevent network bottleneck.
\begin{itemize}
\item Extra metadata can limit scalability (e.g., granularity of directories)
\end{itemize}
} }
\item { \item {
Auto-share: Mark pages shared via \texttt{/dev/rshm} as automatically Auto-share: Automatically share pages at commit time using 1-way
shared to some remote nodes such that 1-way communications suffice to communications.
re-validate invalidated pages.
\begin{itemize} \begin{itemize}
\item Potential for communication reduction -- debatable. \item Potential for communication reduction -- debatable.
\end{itemize} \end{itemize}
@ -146,6 +275,48 @@
\end{itemize} \end{itemize}
\end{frame} \end{frame}
\begin{frame}
\frametitle{Why this design?}
\begin{itemize}
\item Largely inspired by DSPM\footcite{shan2017distributed}.
\item Removed arrows for enforced data duplication -- duplication is solely on-demand.
\item {
Introduces transitional state ``T'':
\begin{itemize}
\item Used to flag a page as unserviceable -- visible only at MN.
\item All read/write access to T-page is kept on hold until MN receives commit msg.
\item After commit, MN forwards queued R/W access to moved home.
\item This (at least) maintains RAW, WAW data dependency for whichever issue serialization.
\item Removing T allows stale data to be served -- violates RAW for better throughput.
\end{itemize}
}
\item Extensible (as mentioned in prior page).
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Why not this design?}
At the very least\dots
\begin{itemize}
\item {
De-coupled home and access-management nodes require:
\begin{itemize}
\item Each home node need to be MN-aware (easy).
\item {
MN need to be home-aware (also easy with single-writer, but spatial complexity is a concern):
\begin{itemize}
\item Naive directory scheme is not scalable.
\item Coarse directory scheme (e.g., SGI Origin 2000) is wasteful (but may be the fastest in practice).
\item Distributed directory scheme may provide terrible latency.
\item More sophisticated schemes are possible but needs work \& experimentation.
\end{itemize}
}
\end{itemize}
}
\item Strict consistency limits throughput.
\end{itemize}
\end{frame}
% Page 7 % Page 7
\begin{frame} \begin{frame}
\frametitle{What about Consistency \textbf{Model}?} \frametitle{What about Consistency \textbf{Model}?}
@ -155,7 +326,7 @@
\begin{itemize} \begin{itemize}
\item { \item {
Weak ordering architectures (e.g., ARMv8) more or less depends on Weak ordering architectures (e.g., ARMv8) more or less depends on
compiler/interpreter to emit barriers as see fit \cite{Haynes_2022}. compiler/interpreter to emit barriers as see fit \footcite{Haynes_2022}.
} }
\item { \item {
Bad for usability/portability -- programs may need Bad for usability/portability -- programs may need

Binary file not shown.

After

Width:  |  Height:  |  Size: 136 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 204 KiB