...
This commit is contained in:
parent
dbc01147fd
commit
816a29d462
3 changed files with 118 additions and 5 deletions
|
|
@ -451,3 +451,63 @@
|
||||||
organization={IEEE}
|
organization={IEEE}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@inproceedings {Wang_etal.Concordia.2021,
|
||||||
|
author = {Qing Wang and Youyou Lu and Erci Xu and Junru Li and Youmin Chen and Jiwu Shu},
|
||||||
|
title = {Concordia: Distributed Shared Memory with {In-Network} Cache Coherence},
|
||||||
|
booktitle = {19th USENIX Conference on File and Storage Technologies (FAST 21)},
|
||||||
|
year = {2021},
|
||||||
|
isbn = {978-1-939133-20-5},
|
||||||
|
pages = {277--292},
|
||||||
|
url = {https://www.usenix.org/conference/fast21/presentation/wang},
|
||||||
|
publisher = {USENIX Association},
|
||||||
|
month = feb
|
||||||
|
}
|
||||||
|
|
||||||
|
@INPROCEEDINGS{Kim_etal.DeX-upon-Linux.2020,
|
||||||
|
author={Kim, Sang-Hoon and Chuang, Ho-Ren and Lyerly, Robert and Olivier, Pierre and Min, Changwoo and Ravindran, Binoy},
|
||||||
|
booktitle={2020 IEEE 40th International Conference on Distributed Computing Systems (ICDCS)},
|
||||||
|
title={DeX: Scaling Applications Beyond Machine Boundaries},
|
||||||
|
year={2020},
|
||||||
|
volume={},
|
||||||
|
number={},
|
||||||
|
pages={864-876},
|
||||||
|
keywords={Protocols;Instruction sets;Linux;Prototypes;Distributed databases;Programming;Kernel;Thread migration;distributed execution;distributed memory;RDMA},
|
||||||
|
doi={10.1109/ICDCS47774.2020.00021}
|
||||||
|
}
|
||||||
|
|
||||||
|
@inproceedings{Chaiken_Kubiatowicz_Agarwal.LimitLESS-with-Alewife.1991,
|
||||||
|
author = {Chaiken, David and Kubiatowicz, John and Agarwal, Anant},
|
||||||
|
title = {LimitLESS directories: A scalable cache coherence scheme},
|
||||||
|
year = {1991},
|
||||||
|
isbn = {0897913809},
|
||||||
|
publisher = {Association for Computing Machinery},
|
||||||
|
address = {New York, NY, USA},
|
||||||
|
url = {https://doi.org/10.1145/106972.106995},
|
||||||
|
doi = {10.1145/106972.106995},
|
||||||
|
booktitle = {Proceedings of the Fourth International Conference on Architectural Support for Programming Languages and Operating Systems},
|
||||||
|
pages = {224–234},
|
||||||
|
numpages = {11},
|
||||||
|
location = {Santa Clara, California, USA},
|
||||||
|
series = {ASPLOS IV}
|
||||||
|
}
|
||||||
|
|
||||||
|
@INPROCEEDINGS{Ding.vDSM.2018,
|
||||||
|
author={Ding, Zhuocheng},
|
||||||
|
booktitle={2018 IEEE 9th International Conference on Software Engineering and Service Science (ICSESS)},
|
||||||
|
title={vDSM: Distributed Shared Memory in Virtualized Environments},
|
||||||
|
year={2018},
|
||||||
|
volume={},
|
||||||
|
number={},
|
||||||
|
pages={1112-1115},
|
||||||
|
keywords={Virtual machine monitors;Optimization;Protocols;Virtualization;Operating systems;Stress;Analytical models;component;distributed shared memory;virtuali-zation;low-latency network},
|
||||||
|
doi={10.1109/ICSESS.2018.8663720}
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{ARM.ARMv8-A.v1.0.2015,
|
||||||
|
title={ARM® Cortex®-A Series Programmer's Guide for ARMv8-A},
|
||||||
|
url={https://developer.arm.com/documentation/den0024/a},
|
||||||
|
journal={Documentation - arm developer},
|
||||||
|
publisher={ARM},
|
||||||
|
author={ARM},
|
||||||
|
year={2015}
|
||||||
|
}
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -506,20 +506,73 @@ is a major part of many studies in DSM systems throughout history
|
||||||
% and really just serves as a means to increase word count
|
% and really just serves as a means to increase word count
|
||||||
|
|
||||||
\subsection{Consistency Model in DSM}
|
\subsection{Consistency Model in DSM}
|
||||||
While distributed shared memory systems with node-local caching naturally
|
Distributed shared memory systems with node-local caching naturally implies the
|
||||||
implies the existence of a corresponding memory model, only a subset of DSM
|
existence of the consistency problem with regards to contending read/write
|
||||||
studies (cites\dots) characterize their own system to one of the few well-known memory
|
accesses. Indeed, a significant subset of DSM studies explicitly characterize
|
||||||
models
|
themselves as adhering to one of the well-known consistency models to better
|
||||||
|
understand system behavior and to provide optimizations in coherence protocols
|
||||||
|
\cites{Amza_etal.Treadmarks.1996}{Hu_Shi_Tang.JIAJIA.1999}
|
||||||
|
{Carter_Bennett_Zwaenepoel.Munin.1991}{Endo_Sato_Taura.MENPS_DSM.2020}
|
||||||
|
{Wang_etal.Concordia.2021}{Cai_etal.Distributed_Memory_RDMA_Cached.2018}
|
||||||
|
{Kim_etal.DeX-upon-Linux.2020}, each adhering to a different consistency model
|
||||||
|
to balance between communication costs and ease of programming.
|
||||||
|
|
||||||
Notably, \dots % about Munin to Spark to access-pattern vs. consistency
|
In particular, we note that DSM studies tend to conform to either release
|
||||||
|
consistency \cites{Amza_etal.Treadmarks.1996}{Endo_Sato_Taura.MENPS_DSM.2020}
|
||||||
|
{Carter_Bennett_Zwaenepoel.Munin.1991} or weaker \cite{Hu_Shi_Tang.JIAJIA.1999},
|
||||||
|
or sequential consistency
|
||||||
|
\cites{Chaiken_Kubiatowicz_Agarwal.LimitLESS-with-Alewife.1991}
|
||||||
|
{Wang_etal.Concordia.2021}{Kim_etal.DeX-upon-Linux.2020}{Ding.vDSM.2018}, with
|
||||||
|
few works \cite{Cai_etal.Distributed_Memory_RDMA_Cached.2018} pertaining to
|
||||||
|
moderately constrained consistency models in-between. While older works, as
|
||||||
|
well as works which center performance of their proposed DSM systems over
|
||||||
|
existing approaches \cites{Endo_Sato_Taura.MENPS_DSM.2020}
|
||||||
|
{Cai_etal.Distributed_Memory_RDMA_Cached.2018}, favor release consistency due
|
||||||
|
to its performance benefits (e.g., in terms of coherence costs
|
||||||
|
\cite{Endo_Sato_Taura.MENPS_DSM.2020}), newer works tend to adopt stricter
|
||||||
|
consistency models, sometimes due to improved productivity offered to
|
||||||
|
programmers \cite{Kim_etal.DeX-upon-Linux.2020}.
|
||||||
|
|
||||||
|
We especially note the role of balancing productivity and performance in terms
|
||||||
|
of selecting the ideal consistency model for a system. It is common knowledge
|
||||||
|
that weaker consistency models are harder to program with, at the benefit of
|
||||||
|
less (implied) coherence communications resulting in better throughput overall
|
||||||
|
-- provided that the programmer could guarantee correctness, a weaker
|
||||||
|
consistency model allows for less invalidation of node-local cache entries,
|
||||||
|
thereby allowing multiple nodes to compute in parallel on (likely) outdated
|
||||||
|
local copy of data such that the result of the computation remains semantically
|
||||||
|
correct with regards to the program. This point was made explicit in \textit{Munin}
|
||||||
|
\cite{Carter_Bennett_Zwaenepoel.Munin.1991}, where (to reiterate) it introduces
|
||||||
|
the concept of consistency ``protocol parameters'' to annotate shared memory
|
||||||
|
access pattern, in order to reduce the amount of coherence communications
|
||||||
|
necessary between nodes computing in distributed shared memory. For example, a
|
||||||
|
DSM object (memory object accounted for by the DSM system) can be annotated
|
||||||
|
with ``delayed operations'' to delay coherence operations beyond any
|
||||||
|
write-access, or shared without ``write'' annotation to disable write-access
|
||||||
|
over shared nodes, thereby disabling all coherence operations with regards to
|
||||||
|
this DSM object. Via programmer annotation of DSM objects, the Munin DSM system
|
||||||
|
explicates the effect of weaker consistency in relation to the amount of
|
||||||
|
synchronization overhead necessary among shared memory nodes. To our knowledge,
|
||||||
|
no other more recent DSM works have explored this interaction between
|
||||||
|
consistency and coherence costs on DSM objects, though relatedly
|
||||||
|
\textit{Resilient Distributed Dataset (RDD)} \cite{Zaharia_etal.RDD.2012} also
|
||||||
|
highlights its performance and flexibility benefits in opting for an immutable
|
||||||
|
data representation over disaggregated memory over network when compared to
|
||||||
|
contemporary DSM approaches.
|
||||||
|
|
||||||
\subsection{Coherence Protocol}
|
\subsection{Coherence Protocol}
|
||||||
|
Coherence protocols, then, becomes the means over which DSM systems implement
|
||||||
|
their consistency model guarantees.
|
||||||
|
|
||||||
\subsection{DMA and Cache Coherence}
|
\subsection{DMA and Cache Coherence}
|
||||||
|
% Because this thesis specifically studies cache coherence in ARMv8, we
|
||||||
|
|
||||||
\subsection{Cache Coherence in ARMv8}
|
\subsection{Cache Coherence in ARMv8}
|
||||||
|
|
||||||
|
|
||||||
|
% Experiment: ...
|
||||||
|
% Discussion: (1) Linux and DMA and RDMA (2) replacement and other ideas...
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(I need to read more into this. Most of the contribution comes from CPU caches,
|
(I need to read more into this. Most of the contribution comes from CPU caches,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue