lemme cook alright?

2024-02-25 21:03:32 +00:00 · 2024-02-25 21:03:32 +00:00 · 62df7464e4
commit 62df7464e4
parent b6d2460060
9 changed files with 504 additions and 3 deletions
--- a/model/reverse_perspective.py
+++ b/model/reverse_perspective.py
@ -90,7 +90,7 @@ class PerspectiveEstimator(nn.Module):
                stride=conv_stride,
                dilation=conv_dilation,
            ), # (N, 1, H, W)
-            'revpers_avg_pooling0': nn.AdaptiveAvgPool2d(
+            'revpers_avg_pool0': nn.AdaptiveAvgPool2d(
                output_size=(pool_capacity, 1)
            ), # (N, 1, K, 1)
            # [?] Do we need to explicitly translate to (N, K) here?
@ -108,7 +108,7 @@ class PerspectiveEstimator(nn.Module):
            out = layer.forward(out)

        # Normalize in (0, 1]
-        F.relu_(out) # in-place
+        F.relu(out, inplace=True)
        out = torch.exp(-out) + self.epsilon

        return out
@ -116,4 +116,47 @@ class PerspectiveEstimator(nn.Module):
    # def unsupervised_loss(predictions, targets):

 # [TODO] We need a modified loss -- one that takes advantage of attention instead
-# of feature map. I feel like they should work likewise but who knows
+# of feature map. I feel like they should work likewise but who knows
+# [XXX] no forget it, we are pre-training rev-perspective as told by the 2020 paper
+# i.e., via using CSRNet.
+# Not sure which part is the feature map derived. Maybe after the front-end?
+# In any case we can always just use the CSR output (inferred density map) as feature map --
+# through which we compute, for each image:
+# criterion = Variance([output.sum(axis=W) * effective_pixel_per_row])
+# In other cases we sum over channels i.e., each feature map i.e., over each filter output
+# Not sure what channel means in this case...
+def warped_output_loss(csrnet_pred):
+    N, H, W = csrnet_pred.shape()
+
+
+def transform_coordinates(
+        img: torch.Tensor,      # (C, W, H)
+        factor: float,
+        in_place: bool = True
+):
+    dev_of_img = img.device
+
+    # Normalize X coords to [0, pi]
+    min_x = torch.Tensor([0., 0., 0.]).to(dev_of_img)
+    max_x = torch.Tensor([0., np.pi, 0.]).to(dev_of_img)
+    min_xdim = torch.min(img, dim=1, keepdim=True)[0]
+    max_xdim = torch.max(img, dim=1, keepdim=True)[0]
+    (img.sub_(min_xdim)
+        .div_(max_xdim - min_xdim)
+        .mul_(max_x - min_x)
+        .add_(min_x))
+
+    # Normalize Y coords to [0, 1]
+    min_y = torch.Tensor([0., 0., 0.]).to(dev_of_img)
+    max_y = torch.Tensor([0., 1., 0.]).to(dev_of_img)
+    min_ydim = torch.min(img, dim=2, keepdim=True)[0]
+    max_ydim = torch.max(img, dim=2, keepdim=True)[0]
+    (img.sub_(min_ydim)
+        .div_(max_ydim - min_ydim)
+        .mul_(max_y - min_y)
+        .add_(min_y))
+
+    # Do elliptical transformation
+    tmp = img.clone().detach()
+    
+    pass