w15 slices around 2/3 ~ 3/4 done?
This commit is contained in:
parent
169382407d
commit
a7eb8e8214
4 changed files with 184 additions and 13 deletions
|
|
@ -204,24 +204,174 @@
|
|||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Consistency Model}
|
||||
|
||||
|
||||
|
||||
\frametitle{Protocol Excerpt: Write-Invalidate}
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics[width=\linewidth]{
|
||||
w12_slides_resources/Fig-RwlockProtocol 2023-12-06 19_05_06.pdf
|
||||
}
|
||||
\end{figure}
|
||||
The \textit{T}-state indicates a transitionary state for some shared page.
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Coherence Protocol}
|
||||
|
||||
|
||||
|
||||
\frametitle{Consistency Model: TSO}
|
||||
\begin{itemize}
|
||||
\item {
|
||||
Total Store Ordering allows Reads to bypass Stores.
|
||||
}
|
||||
\item {
|
||||
Assuming correct use of node-local synchronization on all nodes,
|
||||
applying TSO in a home-based DSM allows for:
|
||||
\begin{itemize}
|
||||
\item {
|
||||
When another node tries to read T-page from access-control
|
||||
node: W$\rightarrow$R violation.
|
||||
}
|
||||
\item {
|
||||
When another node tries to read S-page from data-provider
|
||||
nodes: W$\rightarrow$R violation (if e.g., the invalidation
|
||||
message from access-control node was received afterwards).
|
||||
}
|
||||
\item {
|
||||
Data-provider and access-control nodes work on one request
|
||||
at a time: no R$\rightarrow$W violation.
|
||||
}
|
||||
\item {
|
||||
Write-accesses serialized at access-control node: no
|
||||
W$\rightarrow$W violation.
|
||||
}
|
||||
\end{itemize}
|
||||
}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Stateful Nodes}
|
||||
\frametitle{Consistency Model: Strengthen to Sequential}
|
||||
\begin{itemize}
|
||||
\item {
|
||||
By corollary, can reverse the previous page's statements to
|
||||
strengthen to sequential consistency:
|
||||
\begin{itemize}
|
||||
\item {
|
||||
Disallow T-pages from being serviced until new page content
|
||||
is installed: lengthens critical section.
|
||||
}
|
||||
\item {
|
||||
Abolish data-provider nodes: access-control nodes become
|
||||
bottleneck.
|
||||
}
|
||||
\end{itemize}
|
||||
}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Coherence Protocol: Possible Features}
|
||||
\begin{itemize}
|
||||
\item {
|
||||
Multi-data-provider Protocol: Instead of having one data-provider,
|
||||
have multiple data-provider nodes that are automatically write-back
|
||||
to prevent network bottleneck.
|
||||
\begin{itemize}
|
||||
\item Data provider nodes may be dynamically assigned.
|
||||
\item Extra metadata can limit scalability.
|
||||
\end{itemize}
|
||||
}
|
||||
\item {
|
||||
Auto-share: likewise, write-back pages to non-data-provider nodes to
|
||||
take advantage of 1-sided communications.
|
||||
}
|
||||
\item {
|
||||
Request aggregation: aggregate RDMA transfers for optimal transfer
|
||||
performance.
|
||||
\begin{itemize}
|
||||
\item Need to be coherent with program sequence!
|
||||
\item Enables write-request merging.
|
||||
\end{itemize}
|
||||
}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Stateful Nodes \& Transitions (Provisional)}
|
||||
\begin{itemize}
|
||||
\item {
|
||||
Nodes (e.g., within the cluster) become tightly bound with the
|
||||
properties of each shared page(s).
|
||||
}
|
||||
\end{itemize}
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics[width=\linewidth]{
|
||||
w15_resources/截屏 2024-01-30 19.15.45 2024-01-30 19_16_19.png
|
||||
}
|
||||
\end{figure}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Stateful Nodes \& Transitions (Provisional) (Cont.)}
|
||||
\begin{itemize}
|
||||
\item {
|
||||
MN (Manager Nodes): Provide access-control and (fallback)
|
||||
data-provision.
|
||||
}
|
||||
\item {
|
||||
HN (Home Nodes): Provide data-provision. Can be write-back or
|
||||
write-invalidate.
|
||||
}
|
||||
\item {
|
||||
SN (Sharer Nodes): Share data within a reader-only ``epoch''. Can be
|
||||
write-back or write-invalidate.
|
||||
}
|
||||
\item {
|
||||
NSN (Non-sharer Nodes): Nodes in network without sharing the
|
||||
particular page(s).
|
||||
}
|
||||
\item {
|
||||
CN (Commit Node): Node that acquired the single-writer access to the
|
||||
shared page.
|
||||
}
|
||||
\item {
|
||||
Message variants are not finalized:
|
||||
\begin{itemize}
|
||||
\item {
|
||||
Goal: Composable message chains that allow for
|
||||
``piggy-backing'' of multiple procedures.
|
||||
}
|
||||
\end{itemize}
|
||||
}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
\frametitle{Stateful Nodes: Transition Paths}
|
||||
\begin{itemize}
|
||||
\item {
|
||||
Filled line transitions indicate local requests remote to perform
|
||||
state transition.
|
||||
}
|
||||
\item {
|
||||
Dashed line transitions indicate local implicitly transitions prior
|
||||
to sending request to remote.
|
||||
}
|
||||
\item {
|
||||
\textit{Non-committal} path concerns about read-only and
|
||||
copy-on-write sharing. Sharers cannot make global modification to
|
||||
cached local data.
|
||||
}
|
||||
\item {
|
||||
\textit{Invalidation} path is duo with commit operations (due to
|
||||
write-invalidation).
|
||||
}
|
||||
\item {
|
||||
\textit{Committal} path concerns about global write sharing. Only
|
||||
one writer is allowed to write and commit at one time.
|
||||
}
|
||||
\item {
|
||||
Problem: How exactly to integrate RDMA remote read/write into this?
|
||||
}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Part 3: Progress
|
||||
|
|
@ -230,9 +380,25 @@
|
|||
|
||||
\begin{frame}
|
||||
\frametitle{Progress}
|
||||
|
||||
|
||||
|
||||
\begin{itemize}
|
||||
\item {
|
||||
Goal: in-kernel implementation of software cache-coherency via
|
||||
non-coherent RDMA hardware.
|
||||
}
|
||||
\item {
|
||||
Optimistic Goal: in-kernel implementation of memory model in DSM.
|
||||
}
|
||||
\item {
|
||||
Progress: studied and isolated mechanism for data cache
|
||||
invalidation/flushing in ARM64, which allows the DSM to run in
|
||||
heterogeneous ISA clusters.
|
||||
}
|
||||
\item {
|
||||
Integration with kernel \& main DSM kernel module remains at hand:
|
||||
is it absolutely necessary to export new symbols for such an
|
||||
important operation?
|
||||
}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}
|
||||
|
|
@ -266,5 +432,9 @@
|
|||
|
||||
\end{frame}
|
||||
|
||||
% Part 4: Future Work
|
||||
% =============================================================================
|
||||
|
||||
% References
|
||||
|
||||
\end{document}
|
||||
Loading…
Add table
Add a link
Reference in a new issue