Added comment
This commit is contained in:
parent
322d7f9ea5
commit
8e0e82f67a
2 changed files with 8 additions and 1 deletions
|
|
@ -24,7 +24,7 @@ class PerspectiveEstimator(nn.Module):
|
|||
Perspective estimator submodule of the wider reverse-perspective network.
|
||||
|
||||
Input: Pre-processed, uniformly-sized image data
|
||||
Output: Perspective factor
|
||||
Output: Perspective factor :math:`\\in \\mathbb{R}`
|
||||
|
||||
**Note**
|
||||
--------
|
||||
|
|
|
|||
|
|
@ -21,6 +21,13 @@ from timm.models.registry import register_model
|
|||
from timm.models.layers import trunc_normal_
|
||||
|
||||
class VisionTransformer_GAP(VisionTransformer):
|
||||
# [XXX] It might be a bad idea to use vision transformer for small datasets.
|
||||
# ref: ViT paper -- "transformers lack some of the inductive biases inherent
|
||||
# to CNNs, such as translation equivariance and locality".
|
||||
# convolution is specifically equivariant in translation (linear and
|
||||
# shift-equivariant), specifically.
|
||||
# tl;dr: CNNs might perform better for small datasets. Not sure abt performance.
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
num_patches = self.patch_embed.num_patches
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue