...

2024-03-01 22:10:34 +00:00 · 2024-03-01 22:10:34 +00:00 · 816a29d462
commit 816a29d462
parent dbc01147fd
3 changed files with 118 additions and 5 deletions
--- a/tex/misc/background_draft.bib
+++ b/tex/misc/background_draft.bib
@ -451,3 +451,63 @@
  organization={IEEE}
 }

+@inproceedings {Wang_etal.Concordia.2021,
+  author = {Qing Wang and Youyou Lu and Erci Xu and Junru Li and Youmin Chen and Jiwu Shu},
+  title = {Concordia: Distributed Shared Memory with {In-Network} Cache Coherence},
+  booktitle = {19th USENIX Conference on File and Storage Technologies (FAST 21)},
+  year = {2021},
+  isbn = {978-1-939133-20-5},
+  pages = {277--292},
+  url = {https://www.usenix.org/conference/fast21/presentation/wang},
+  publisher = {USENIX Association},
+  month = feb
+}
+
+@INPROCEEDINGS{Kim_etal.DeX-upon-Linux.2020,
+  author={Kim, Sang-Hoon and Chuang, Ho-Ren and Lyerly, Robert and Olivier, Pierre and Min, Changwoo and Ravindran, Binoy},
+  booktitle={2020 IEEE 40th International Conference on Distributed Computing Systems (ICDCS)},
+  title={DeX: Scaling Applications Beyond Machine Boundaries},
+  year={2020},
+  volume={},
+  number={},
+  pages={864-876},
+  keywords={Protocols;Instruction sets;Linux;Prototypes;Distributed databases;Programming;Kernel;Thread migration;distributed execution;distributed memory;RDMA},
+  doi={10.1109/ICDCS47774.2020.00021}
+}
+
+@inproceedings{Chaiken_Kubiatowicz_Agarwal.LimitLESS-with-Alewife.1991,
+  author = {Chaiken, David and Kubiatowicz, John and Agarwal, Anant},
+  title = {LimitLESS directories: A scalable cache coherence scheme},
+  year = {1991},
+  isbn = {0897913809},
+  publisher = {Association for Computing Machinery},
+  address = {New York, NY, USA},
+  url = {https://doi.org/10.1145/106972.106995},
+  doi = {10.1145/106972.106995},
+  booktitle = {Proceedings of the Fourth International Conference on Architectural Support for Programming Languages and Operating Systems},
+  pages = {224–234},
+  numpages = {11},
+  location = {Santa Clara, California, USA},
+  series = {ASPLOS IV}
+}
+
+@INPROCEEDINGS{Ding.vDSM.2018,
+  author={Ding, Zhuocheng},
+  booktitle={2018 IEEE 9th International Conference on Software Engineering and Service Science (ICSESS)},
+  title={vDSM: Distributed Shared Memory in Virtualized Environments},
+  year={2018},
+  volume={},
+  number={},
+  pages={1112-1115},
+  keywords={Virtual machine monitors;Optimization;Protocols;Virtualization;Operating systems;Stress;Analytical models;component;distributed shared memory;virtuali-zation;low-latency network},
+  doi={10.1109/ICSESS.2018.8663720}
+}
+
+@misc{ARM.ARMv8-A.v1.0.2015,
+  title={ARM® Cortex®-A Series Programmer's Guide for ARMv8-A},
+  url={https://developer.arm.com/documentation/den0024/a},
+  journal={Documentation - arm developer},
+  publisher={ARM},
+  author={ARM},
+  year={2015}
+}
--- a/tex/misc/background_draft.pdf
+++ b/tex/misc/background_draft.pdf
--- a/tex/misc/background_draft.tex
+++ b/tex/misc/background_draft.tex
@ -506,20 +506,73 @@ is a major part of many studies in DSM systems throughout history
 % and really just serves as a means to increase word count

 \subsection{Consistency Model in DSM}
-While distributed shared memory systems with node-local caching naturally
-implies the existence of a corresponding memory model, only a subset of DSM
-studies (cites\dots) characterize their own system to one of the few well-known memory
-models
+Distributed shared memory systems with node-local caching naturally implies the
+existence of the consistency problem with regards to contending read/write
+accesses. Indeed, a significant subset of DSM studies explicitly characterize
+themselves as adhering to one of the well-known consistency models to better
+understand system behavior and to provide optimizations in coherence protocols
+\cites{Amza_etal.Treadmarks.1996}{Hu_Shi_Tang.JIAJIA.1999}
+{Carter_Bennett_Zwaenepoel.Munin.1991}{Endo_Sato_Taura.MENPS_DSM.2020}
+{Wang_etal.Concordia.2021}{Cai_etal.Distributed_Memory_RDMA_Cached.2018}
+{Kim_etal.DeX-upon-Linux.2020}, each adhering to a different consistency model
+to balance between communication costs and ease of programming.

-Notably, \dots % about Munin to Spark to access-pattern vs. consistency
+In particular, we note that DSM studies tend to conform to either release
+consistency \cites{Amza_etal.Treadmarks.1996}{Endo_Sato_Taura.MENPS_DSM.2020}
+{Carter_Bennett_Zwaenepoel.Munin.1991} or weaker \cite{Hu_Shi_Tang.JIAJIA.1999},
+or sequential consistency
+\cites{Chaiken_Kubiatowicz_Agarwal.LimitLESS-with-Alewife.1991}
+{Wang_etal.Concordia.2021}{Kim_etal.DeX-upon-Linux.2020}{Ding.vDSM.2018}, with
+few works \cite{Cai_etal.Distributed_Memory_RDMA_Cached.2018} pertaining to
+moderately constrained consistency models in-between. While older works, as
+well as works which center performance of their proposed DSM systems over
+existing approaches \cites{Endo_Sato_Taura.MENPS_DSM.2020}
+{Cai_etal.Distributed_Memory_RDMA_Cached.2018}, favor release consistency due
+to its performance benefits (e.g., in terms of coherence costs
+\cite{Endo_Sato_Taura.MENPS_DSM.2020}), newer works tend to adopt stricter
+consistency models, sometimes due to improved productivity offered to
+programmers \cite{Kim_etal.DeX-upon-Linux.2020}.
+
+We especially note the role of balancing productivity and performance in terms
+of selecting the ideal consistency model for a system. It is common knowledge
+that weaker consistency models are harder to program with, at the benefit of
+less (implied) coherence communications resulting in better throughput overall
+-- provided that the programmer could guarantee correctness, a weaker
+consistency model allows for less invalidation of node-local cache entries,
+thereby allowing multiple nodes to compute in parallel on (likely) outdated
+local copy of data such that the result of the computation remains semantically
+correct with regards to the program. This point was made explicit in \textit{Munin}
+\cite{Carter_Bennett_Zwaenepoel.Munin.1991}, where (to reiterate) it introduces
+the concept of consistency ``protocol parameters'' to annotate shared memory
+access pattern, in order to reduce the amount of coherence communications
+necessary between nodes computing in distributed shared memory. For example, a
+DSM object (memory object accounted for by the DSM system) can be annotated
+with ``delayed operations'' to delay coherence operations beyond any
+write-access, or shared without ``write'' annotation to disable write-access
+over shared nodes, thereby disabling all coherence operations with regards to
+this DSM object. Via programmer annotation of DSM objects, the Munin DSM system
+explicates the effect of weaker consistency in relation to the amount of
+synchronization overhead necessary among shared memory nodes. To our knowledge,
+no other more recent DSM works have explored this interaction between
+consistency and coherence costs on DSM objects, though relatedly
+\textit{Resilient Distributed Dataset (RDD)} \cite{Zaharia_etal.RDD.2012} also
+highlights its performance and flexibility benefits in opting for an immutable
+data representation over disaggregated memory over network when compared to
+contemporary DSM approaches.

 \subsection{Coherence Protocol}
+Coherence protocols, then, becomes the means over which DSM systems implement
+their consistency model guarantees.

 \subsection{DMA and Cache Coherence}
+% Because this thesis specifically studies cache coherence in ARMv8, we

 \subsection{Cache Coherence in ARMv8}


+% Experiment: ...
+% Discussion: (1) Linux and DMA and RDMA (2) replacement and other ideas...
+


 (I need to read more into this. Most of the contribution comes from CPU caches,