Crude impl of glue layer, not sure anything works

2024-03-02 23:16:47 +00:00 · 2024-03-02 23:16:47 +00:00 · f059924b75
commit f059924b75
parent b69727b74f
3 changed files with 66 additions and 8 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,3 @@
 baseline-experiments/
 synchronous/
 npydata/
--- a/make_dataset.py
+++ b/make_dataset.py
@ -10,6 +10,7 @@ import h5py
 CWD = os.getcwd()
 def pre_dataset_sh():
    dataset_name = "ShanghaiTech"
    root = CWD + "/synchronous/dataset/" + dataset_name + "/"
    part_A_train = os.path.join(root, "part_A_final/train_data", "images")
@ -53,8 +54,7 @@ def pre_dataset_sh():
        gt_data[:, 0] = gt_data[:, 0] * rate_x
        gt_data[:, 1] = gt_data[:, 1] * rate_y
-        # Compute gt_count from density map (gt_data)
+        # Compute 0/1 counts from density map
        # XXX: what does it do exactly?
        kpoint = np.zeros((img_data.shape[0], img_data.shape[1]))
        for i in range(len(gt_data)):
            if (    int(gt_data[i][1]) < img_data.shape[0]
@ -65,15 +65,14 @@ def pre_dataset_sh():
        root_path = img_path.split("IMG_")[0].replace("images", "images_crop")
        # Likewise, we do not crop to patched sequences here...
-        # Skip directly to saving fixed-size data & gt_count.
+        # Skip directly to saving fixed-size data & kpoint.
        img_path = img_path.replace("images", "images_crop")
        cv2.imwrite(img_path, img_data)
        gt_count = np.sum(kpoint)
        with h5py.File(
            img_path.replace('.jpg', '.h5').replace('images', 'gt_density_map'),
-            'w'
+            mode='w'
        ) as hf:
-            hf["gt_count"] = gt_count
+            hf["kpoint"] = kpoint
 def make_npydata():
--- a/model/glue.py
+++ b/model/glue.py
@ -1,5 +1,7 @@
-# Glue layer for transforming whole pictures into 384x384 sequence for encoder
+# Glue layer for transforming whole pictures into 384x384 sequence for encoder input
-# input
+from dataclasses import dataclass
 from itertools import product
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@ -7,3 +9,59 @@ import torchvision
 from torchvision import transforms
 import numpy as np
 from torchvision.transforms import v2
 # The v2 way, apparantly. [1]
 class SquareCropTransformLayer(nn.Module):
    def __init__(self, crop_size: int):
        super(SquareCropTransformLayer, self).__init__()
        self.crop_size = crop_size
    def forward(
            self,
            x_: torch.Tensor,
            kpoints_: torch.Tensor
    ) -> (torch.Tensor, torch.Tensor):
        # Here, x_ & kpoints_ already applied affine transform.
        assert len(x_.shape) == 4
        channels, height, width = x_.shape[1:]
        h_split_count = height // self.crop_size
        w_split_count = width // self.crop_size
        # Perform identical splits -- note kpoints_ does not have C dimension!
        ret_x = torch.cat(
            torch.tensor_split(
                torch.cat(
                    torch.tensor_split(
                        x_,
                        h_split_count,
                        dim=2
                    )
                ),
                w_split_count,
                dim=3
            )
        )   # Performance should be acceptable but looks dumb as hell, is there a better way?
        split_t = torch.cat(
            torch.tensor_split(
                torch.cat(
                    torch.tensor_split(
                        t_,
                        h_split_count,
                        dim=1
                    )
                ),
                w_split_count,
                dim=2
            )
        )
        # Sum into gt_count
        ret_gt_count = torch.sum(split_t.view(split_t.size(0), -1), dim=1)
        return ret_x, ret_gt_count
 """
 References:
 [1] https://pytorch.org/vision/stable/auto_examples/transforms/plot_custom_transforms.html
 """