Added comment

2024-02-05 14:39:00 +00:00 · 2024-02-05 14:39:00 +00:00 · 8e0e82f67a
commit 8e0e82f67a
parent 322d7f9ea5
2 changed files with 8 additions and 1 deletions
--- a/model/reverse_perspective.py
+++ b/model/reverse_perspective.py
@ -24,7 +24,7 @@ class PerspectiveEstimator(nn.Module):
    Perspective estimator submodule of the wider reverse-perspective network.

    Input: Pre-processed, uniformly-sized image data
-    Output: Perspective factor
+    Output: Perspective factor :math:`\\in \\mathbb{R}`

    **Note**
    --------
--- a/model/transcrowd_gap.py
+++ b/model/transcrowd_gap.py
@ -21,6 +21,13 @@ from timm.models.registry import register_model
 from timm.models.layers import trunc_normal_

 class VisionTransformer_GAP(VisionTransformer):
+    # [XXX] It might be a bad idea to use vision transformer for small datasets.
+    # ref: ViT paper -- "transformers lack some of the inductive biases inherent
+    # to CNNs, such as translation equivariance and locality".
+    # convolution is specifically equivariant in translation (linear and
+    # shift-equivariant), specifically.
+    # tl;dr: CNNs might perform better for small datasets. Not sure abt performance.
+
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        num_patches = self.patch_embed.num_patches