Added comment
This commit is contained in:
parent
322d7f9ea5
commit
8e0e82f67a
2 changed files with 8 additions and 1 deletions
|
|
@ -24,7 +24,7 @@ class PerspectiveEstimator(nn.Module):
|
||||||
Perspective estimator submodule of the wider reverse-perspective network.
|
Perspective estimator submodule of the wider reverse-perspective network.
|
||||||
|
|
||||||
Input: Pre-processed, uniformly-sized image data
|
Input: Pre-processed, uniformly-sized image data
|
||||||
Output: Perspective factor
|
Output: Perspective factor :math:`\\in \\mathbb{R}`
|
||||||
|
|
||||||
**Note**
|
**Note**
|
||||||
--------
|
--------
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,13 @@ from timm.models.registry import register_model
|
||||||
from timm.models.layers import trunc_normal_
|
from timm.models.layers import trunc_normal_
|
||||||
|
|
||||||
class VisionTransformer_GAP(VisionTransformer):
|
class VisionTransformer_GAP(VisionTransformer):
|
||||||
|
# [XXX] It might be a bad idea to use vision transformer for small datasets.
|
||||||
|
# ref: ViT paper -- "transformers lack some of the inductive biases inherent
|
||||||
|
# to CNNs, such as translation equivariance and locality".
|
||||||
|
# convolution is specifically equivariant in translation (linear and
|
||||||
|
# shift-equivariant), specifically.
|
||||||
|
# tl;dr: CNNs might perform better for small datasets. Not sure abt performance.
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
num_patches = self.patch_embed.num_patches
|
num_patches = self.patch_embed.num_patches
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue