Crude impl of glue layer, not sure anything works

This commit is contained in:
Zhengyi Chen 2024-03-02 23:16:47 +00:00
parent b69727b74f
commit f059924b75
3 changed files with 66 additions and 8 deletions

1
.gitignore vendored
View file

@ -1,2 +1,3 @@
baseline-experiments/ baseline-experiments/
synchronous/ synchronous/
npydata/

View file

@ -10,6 +10,7 @@ import h5py
CWD = os.getcwd() CWD = os.getcwd()
def pre_dataset_sh(): def pre_dataset_sh():
dataset_name = "ShanghaiTech"
root = CWD + "/synchronous/dataset/" + dataset_name + "/" root = CWD + "/synchronous/dataset/" + dataset_name + "/"
part_A_train = os.path.join(root, "part_A_final/train_data", "images") part_A_train = os.path.join(root, "part_A_final/train_data", "images")
@ -53,8 +54,7 @@ def pre_dataset_sh():
gt_data[:, 0] = gt_data[:, 0] * rate_x gt_data[:, 0] = gt_data[:, 0] * rate_x
gt_data[:, 1] = gt_data[:, 1] * rate_y gt_data[:, 1] = gt_data[:, 1] * rate_y
# Compute gt_count from density map (gt_data) # Compute 0/1 counts from density map
# XXX: what does it do exactly?
kpoint = np.zeros((img_data.shape[0], img_data.shape[1])) kpoint = np.zeros((img_data.shape[0], img_data.shape[1]))
for i in range(len(gt_data)): for i in range(len(gt_data)):
if ( int(gt_data[i][1]) < img_data.shape[0] if ( int(gt_data[i][1]) < img_data.shape[0]
@ -65,15 +65,14 @@ def pre_dataset_sh():
root_path = img_path.split("IMG_")[0].replace("images", "images_crop") root_path = img_path.split("IMG_")[0].replace("images", "images_crop")
# Likewise, we do not crop to patched sequences here... # Likewise, we do not crop to patched sequences here...
# Skip directly to saving fixed-size data & gt_count. # Skip directly to saving fixed-size data & kpoint.
img_path = img_path.replace("images", "images_crop") img_path = img_path.replace("images", "images_crop")
cv2.imwrite(img_path, img_data) cv2.imwrite(img_path, img_data)
gt_count = np.sum(kpoint)
with h5py.File( with h5py.File(
img_path.replace('.jpg', '.h5').replace('images', 'gt_density_map'), img_path.replace('.jpg', '.h5').replace('images', 'gt_density_map'),
'w' mode='w'
) as hf: ) as hf:
hf["gt_count"] = gt_count hf["kpoint"] = kpoint
def make_npydata(): def make_npydata():

View file

@ -1,5 +1,7 @@
# Glue layer for transforming whole pictures into 384x384 sequence for encoder # Glue layer for transforming whole pictures into 384x384 sequence for encoder input
# input from dataclasses import dataclass
from itertools import product
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
@ -7,3 +9,59 @@ import torchvision
from torchvision import transforms from torchvision import transforms
import numpy as np import numpy as np
from torchvision.transforms import v2
# The v2 way, apparantly. [1]
class SquareCropTransformLayer(nn.Module):
def __init__(self, crop_size: int):
super(SquareCropTransformLayer, self).__init__()
self.crop_size = crop_size
def forward(
self,
x_: torch.Tensor,
kpoints_: torch.Tensor
) -> (torch.Tensor, torch.Tensor):
# Here, x_ & kpoints_ already applied affine transform.
assert len(x_.shape) == 4
channels, height, width = x_.shape[1:]
h_split_count = height // self.crop_size
w_split_count = width // self.crop_size
# Perform identical splits -- note kpoints_ does not have C dimension!
ret_x = torch.cat(
torch.tensor_split(
torch.cat(
torch.tensor_split(
x_,
h_split_count,
dim=2
)
),
w_split_count,
dim=3
)
) # Performance should be acceptable but looks dumb as hell, is there a better way?
split_t = torch.cat(
torch.tensor_split(
torch.cat(
torch.tensor_split(
t_,
h_split_count,
dim=1
)
),
w_split_count,
dim=2
)
)
# Sum into gt_count
ret_gt_count = torch.sum(split_t.view(split_t.size(0), -1), dim=1)
return ret_x, ret_gt_count
"""
References:
[1] https://pytorch.org/vision/stable/auto_examples/transforms/plot_custom_transforms.html
"""