This commit is contained in:
Zhengyi Chen 2024-03-01 22:10:34 +00:00
parent dbc01147fd
commit 816a29d462
3 changed files with 118 additions and 5 deletions

View file

@ -451,3 +451,63 @@
organization={IEEE}
}
@inproceedings {Wang_etal.Concordia.2021,
author = {Qing Wang and Youyou Lu and Erci Xu and Junru Li and Youmin Chen and Jiwu Shu},
title = {Concordia: Distributed Shared Memory with {In-Network} Cache Coherence},
booktitle = {19th USENIX Conference on File and Storage Technologies (FAST 21)},
year = {2021},
isbn = {978-1-939133-20-5},
pages = {277--292},
url = {https://www.usenix.org/conference/fast21/presentation/wang},
publisher = {USENIX Association},
month = feb
}
@INPROCEEDINGS{Kim_etal.DeX-upon-Linux.2020,
author={Kim, Sang-Hoon and Chuang, Ho-Ren and Lyerly, Robert and Olivier, Pierre and Min, Changwoo and Ravindran, Binoy},
booktitle={2020 IEEE 40th International Conference on Distributed Computing Systems (ICDCS)},
title={DeX: Scaling Applications Beyond Machine Boundaries},
year={2020},
volume={},
number={},
pages={864-876},
keywords={Protocols;Instruction sets;Linux;Prototypes;Distributed databases;Programming;Kernel;Thread migration;distributed execution;distributed memory;RDMA},
doi={10.1109/ICDCS47774.2020.00021}
}
@inproceedings{Chaiken_Kubiatowicz_Agarwal.LimitLESS-with-Alewife.1991,
author = {Chaiken, David and Kubiatowicz, John and Agarwal, Anant},
title = {LimitLESS directories: A scalable cache coherence scheme},
year = {1991},
isbn = {0897913809},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/106972.106995},
doi = {10.1145/106972.106995},
booktitle = {Proceedings of the Fourth International Conference on Architectural Support for Programming Languages and Operating Systems},
pages = {224234},
numpages = {11},
location = {Santa Clara, California, USA},
series = {ASPLOS IV}
}
@INPROCEEDINGS{Ding.vDSM.2018,
author={Ding, Zhuocheng},
booktitle={2018 IEEE 9th International Conference on Software Engineering and Service Science (ICSESS)},
title={vDSM: Distributed Shared Memory in Virtualized Environments},
year={2018},
volume={},
number={},
pages={1112-1115},
keywords={Virtual machine monitors;Optimization;Protocols;Virtualization;Operating systems;Stress;Analytical models;component;distributed shared memory;virtuali-zation;low-latency network},
doi={10.1109/ICSESS.2018.8663720}
}
@misc{ARM.ARMv8-A.v1.0.2015,
title={ARM® Cortex®-A Series Programmer's Guide for ARMv8-A},
url={https://developer.arm.com/documentation/den0024/a},
journal={Documentation - arm developer},
publisher={ARM},
author={ARM},
year={2015}
}

Binary file not shown.

View file

@ -506,20 +506,73 @@ is a major part of many studies in DSM systems throughout history
% and really just serves as a means to increase word count
\subsection{Consistency Model in DSM}
While distributed shared memory systems with node-local caching naturally
implies the existence of a corresponding memory model, only a subset of DSM
studies (cites\dots) characterize their own system to one of the few well-known memory
models
Distributed shared memory systems with node-local caching naturally implies the
existence of the consistency problem with regards to contending read/write
accesses. Indeed, a significant subset of DSM studies explicitly characterize
themselves as adhering to one of the well-known consistency models to better
understand system behavior and to provide optimizations in coherence protocols
\cites{Amza_etal.Treadmarks.1996}{Hu_Shi_Tang.JIAJIA.1999}
{Carter_Bennett_Zwaenepoel.Munin.1991}{Endo_Sato_Taura.MENPS_DSM.2020}
{Wang_etal.Concordia.2021}{Cai_etal.Distributed_Memory_RDMA_Cached.2018}
{Kim_etal.DeX-upon-Linux.2020}, each adhering to a different consistency model
to balance between communication costs and ease of programming.
Notably, \dots % about Munin to Spark to access-pattern vs. consistency
In particular, we note that DSM studies tend to conform to either release
consistency \cites{Amza_etal.Treadmarks.1996}{Endo_Sato_Taura.MENPS_DSM.2020}
{Carter_Bennett_Zwaenepoel.Munin.1991} or weaker \cite{Hu_Shi_Tang.JIAJIA.1999},
or sequential consistency
\cites{Chaiken_Kubiatowicz_Agarwal.LimitLESS-with-Alewife.1991}
{Wang_etal.Concordia.2021}{Kim_etal.DeX-upon-Linux.2020}{Ding.vDSM.2018}, with
few works \cite{Cai_etal.Distributed_Memory_RDMA_Cached.2018} pertaining to
moderately constrained consistency models in-between. While older works, as
well as works which center performance of their proposed DSM systems over
existing approaches \cites{Endo_Sato_Taura.MENPS_DSM.2020}
{Cai_etal.Distributed_Memory_RDMA_Cached.2018}, favor release consistency due
to its performance benefits (e.g., in terms of coherence costs
\cite{Endo_Sato_Taura.MENPS_DSM.2020}), newer works tend to adopt stricter
consistency models, sometimes due to improved productivity offered to
programmers \cite{Kim_etal.DeX-upon-Linux.2020}.
We especially note the role of balancing productivity and performance in terms
of selecting the ideal consistency model for a system. It is common knowledge
that weaker consistency models are harder to program with, at the benefit of
less (implied) coherence communications resulting in better throughput overall
-- provided that the programmer could guarantee correctness, a weaker
consistency model allows for less invalidation of node-local cache entries,
thereby allowing multiple nodes to compute in parallel on (likely) outdated
local copy of data such that the result of the computation remains semantically
correct with regards to the program. This point was made explicit in \textit{Munin}
\cite{Carter_Bennett_Zwaenepoel.Munin.1991}, where (to reiterate) it introduces
the concept of consistency ``protocol parameters'' to annotate shared memory
access pattern, in order to reduce the amount of coherence communications
necessary between nodes computing in distributed shared memory. For example, a
DSM object (memory object accounted for by the DSM system) can be annotated
with ``delayed operations'' to delay coherence operations beyond any
write-access, or shared without ``write'' annotation to disable write-access
over shared nodes, thereby disabling all coherence operations with regards to
this DSM object. Via programmer annotation of DSM objects, the Munin DSM system
explicates the effect of weaker consistency in relation to the amount of
synchronization overhead necessary among shared memory nodes. To our knowledge,
no other more recent DSM works have explored this interaction between
consistency and coherence costs on DSM objects, though relatedly
\textit{Resilient Distributed Dataset (RDD)} \cite{Zaharia_etal.RDD.2012} also
highlights its performance and flexibility benefits in opting for an immutable
data representation over disaggregated memory over network when compared to
contemporary DSM approaches.
\subsection{Coherence Protocol}
Coherence protocols, then, becomes the means over which DSM systems implement
their consistency model guarantees.
\subsection{DMA and Cache Coherence}
% Because this thesis specifically studies cache coherence in ARMv8, we
\subsection{Cache Coherence in ARMv8}
% Experiment: ...
% Discussion: (1) Linux and DMA and RDMA (2) replacement and other ideas...
(I need to read more into this. Most of the contribution comes from CPU caches,