diff --git a/tex/misc/background_draft.bib b/tex/misc/background_draft.bib index af0c4aa..2c548de 100644 --- a/tex/misc/background_draft.bib +++ b/tex/misc/background_draft.bib @@ -451,3 +451,63 @@ organization={IEEE} } +@inproceedings {Wang_etal.Concordia.2021, + author = {Qing Wang and Youyou Lu and Erci Xu and Junru Li and Youmin Chen and Jiwu Shu}, + title = {Concordia: Distributed Shared Memory with {In-Network} Cache Coherence}, + booktitle = {19th USENIX Conference on File and Storage Technologies (FAST 21)}, + year = {2021}, + isbn = {978-1-939133-20-5}, + pages = {277--292}, + url = {https://www.usenix.org/conference/fast21/presentation/wang}, + publisher = {USENIX Association}, + month = feb +} + +@INPROCEEDINGS{Kim_etal.DeX-upon-Linux.2020, + author={Kim, Sang-Hoon and Chuang, Ho-Ren and Lyerly, Robert and Olivier, Pierre and Min, Changwoo and Ravindran, Binoy}, + booktitle={2020 IEEE 40th International Conference on Distributed Computing Systems (ICDCS)}, + title={DeX: Scaling Applications Beyond Machine Boundaries}, + year={2020}, + volume={}, + number={}, + pages={864-876}, + keywords={Protocols;Instruction sets;Linux;Prototypes;Distributed databases;Programming;Kernel;Thread migration;distributed execution;distributed memory;RDMA}, + doi={10.1109/ICDCS47774.2020.00021} +} + +@inproceedings{Chaiken_Kubiatowicz_Agarwal.LimitLESS-with-Alewife.1991, + author = {Chaiken, David and Kubiatowicz, John and Agarwal, Anant}, + title = {LimitLESS directories: A scalable cache coherence scheme}, + year = {1991}, + isbn = {0897913809}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + url = {https://doi.org/10.1145/106972.106995}, + doi = {10.1145/106972.106995}, + booktitle = {Proceedings of the Fourth International Conference on Architectural Support for Programming Languages and Operating Systems}, + pages = {224–234}, + numpages = {11}, + location = {Santa Clara, California, USA}, + series = {ASPLOS IV} +} + +@INPROCEEDINGS{Ding.vDSM.2018, + author={Ding, Zhuocheng}, + booktitle={2018 IEEE 9th International Conference on Software Engineering and Service Science (ICSESS)}, + title={vDSM: Distributed Shared Memory in Virtualized Environments}, + year={2018}, + volume={}, + number={}, + pages={1112-1115}, + keywords={Virtual machine monitors;Optimization;Protocols;Virtualization;Operating systems;Stress;Analytical models;component;distributed shared memory;virtuali-zation;low-latency network}, + doi={10.1109/ICSESS.2018.8663720} +} + +@misc{ARM.ARMv8-A.v1.0.2015, + title={ARM® Cortex®-A Series Programmer's Guide for ARMv8-A}, + url={https://developer.arm.com/documentation/den0024/a}, + journal={Documentation - arm developer}, + publisher={ARM}, + author={ARM}, + year={2015} +} diff --git a/tex/misc/background_draft.pdf b/tex/misc/background_draft.pdf index 17782b7..4dcc3dc 100644 Binary files a/tex/misc/background_draft.pdf and b/tex/misc/background_draft.pdf differ diff --git a/tex/misc/background_draft.tex b/tex/misc/background_draft.tex index 892e6bd..73e1204 100644 --- a/tex/misc/background_draft.tex +++ b/tex/misc/background_draft.tex @@ -506,20 +506,73 @@ is a major part of many studies in DSM systems throughout history % and really just serves as a means to increase word count \subsection{Consistency Model in DSM} -While distributed shared memory systems with node-local caching naturally -implies the existence of a corresponding memory model, only a subset of DSM -studies (cites\dots) characterize their own system to one of the few well-known memory -models +Distributed shared memory systems with node-local caching naturally implies the +existence of the consistency problem with regards to contending read/write +accesses. Indeed, a significant subset of DSM studies explicitly characterize +themselves as adhering to one of the well-known consistency models to better +understand system behavior and to provide optimizations in coherence protocols +\cites{Amza_etal.Treadmarks.1996}{Hu_Shi_Tang.JIAJIA.1999} +{Carter_Bennett_Zwaenepoel.Munin.1991}{Endo_Sato_Taura.MENPS_DSM.2020} +{Wang_etal.Concordia.2021}{Cai_etal.Distributed_Memory_RDMA_Cached.2018} +{Kim_etal.DeX-upon-Linux.2020}, each adhering to a different consistency model +to balance between communication costs and ease of programming. -Notably, \dots % about Munin to Spark to access-pattern vs. consistency +In particular, we note that DSM studies tend to conform to either release +consistency \cites{Amza_etal.Treadmarks.1996}{Endo_Sato_Taura.MENPS_DSM.2020} +{Carter_Bennett_Zwaenepoel.Munin.1991} or weaker \cite{Hu_Shi_Tang.JIAJIA.1999}, +or sequential consistency +\cites{Chaiken_Kubiatowicz_Agarwal.LimitLESS-with-Alewife.1991} +{Wang_etal.Concordia.2021}{Kim_etal.DeX-upon-Linux.2020}{Ding.vDSM.2018}, with +few works \cite{Cai_etal.Distributed_Memory_RDMA_Cached.2018} pertaining to +moderately constrained consistency models in-between. While older works, as +well as works which center performance of their proposed DSM systems over +existing approaches \cites{Endo_Sato_Taura.MENPS_DSM.2020} +{Cai_etal.Distributed_Memory_RDMA_Cached.2018}, favor release consistency due +to its performance benefits (e.g., in terms of coherence costs +\cite{Endo_Sato_Taura.MENPS_DSM.2020}), newer works tend to adopt stricter +consistency models, sometimes due to improved productivity offered to +programmers \cite{Kim_etal.DeX-upon-Linux.2020}. + +We especially note the role of balancing productivity and performance in terms +of selecting the ideal consistency model for a system. It is common knowledge +that weaker consistency models are harder to program with, at the benefit of +less (implied) coherence communications resulting in better throughput overall +-- provided that the programmer could guarantee correctness, a weaker +consistency model allows for less invalidation of node-local cache entries, +thereby allowing multiple nodes to compute in parallel on (likely) outdated +local copy of data such that the result of the computation remains semantically +correct with regards to the program. This point was made explicit in \textit{Munin} +\cite{Carter_Bennett_Zwaenepoel.Munin.1991}, where (to reiterate) it introduces +the concept of consistency ``protocol parameters'' to annotate shared memory +access pattern, in order to reduce the amount of coherence communications +necessary between nodes computing in distributed shared memory. For example, a +DSM object (memory object accounted for by the DSM system) can be annotated +with ``delayed operations'' to delay coherence operations beyond any +write-access, or shared without ``write'' annotation to disable write-access +over shared nodes, thereby disabling all coherence operations with regards to +this DSM object. Via programmer annotation of DSM objects, the Munin DSM system +explicates the effect of weaker consistency in relation to the amount of +synchronization overhead necessary among shared memory nodes. To our knowledge, +no other more recent DSM works have explored this interaction between +consistency and coherence costs on DSM objects, though relatedly +\textit{Resilient Distributed Dataset (RDD)} \cite{Zaharia_etal.RDD.2012} also +highlights its performance and flexibility benefits in opting for an immutable +data representation over disaggregated memory over network when compared to +contemporary DSM approaches. \subsection{Coherence Protocol} +Coherence protocols, then, becomes the means over which DSM systems implement +their consistency model guarantees. \subsection{DMA and Cache Coherence} +% Because this thesis specifically studies cache coherence in ARMv8, we \subsection{Cache Coherence in ARMv8} +% Experiment: ... +% Discussion: (1) Linux and DMA and RDMA (2) replacement and other ideas... + (I need to read more into this. Most of the contribution comes from CPU caches,