extract_superoint_independent.py

import os
import h5py
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import cv2
import torch
import torch.nn.functional as F
import argparse
import sys
import yaml
from copy import deepcopy
torch.set_default_tensor_type(torch.FloatTensor)

def str2bool(v):
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False

def save_h5(dict_to_save, filename):
    """Saves dictionary to hdf5 file"""

    with h5py.File(filename, 'w') as f:
        for key in dict_to_save:
            f.create_dataset(key, data=dict_to_save[key])

sys.path.insert(0, f'{os.getcwd()}/third_party/pytorch-superpoint')

import kornia as K
# data loading
from utils.loader import dataLoader_test as dataLoader
from Val_model_heatmap import Val_model_heatmap as model_wrapper


def convert_imc(kps, resps):
    keypoints = kps.reshape(-1, 2)
    nkp = len(keypoints)
    scales = np.ones((nkp, 1)).astype(np.float32)
    angles =  np.zeros((nkp, 1)).astype(np.float32)
    responses = resps.reshape(-1, 1)
    return keypoints, scales, angles, responses


def extract_features(img_fname, keypoint_net, device, MAX_KP, max_size, norm_desc, subpixel=False):
    img = cv2.cvtColor(cv2.imread(img_fname), cv2.COLOR_BGR2RGB)
    timg = K.image_to_tensor(img, False).float()/255.
    timg = timg.to(device)
    #timg_gray = K.color.rgb_to_grayscale(timg)
    H, W = timg.shape[2:]
    if max_size>0:
        if max_size % 16 != 0:
            max_size = int(max_size - (max_size % 16))
        min_size = int(min(H, W) * max_size / float(max(H, W)))
        if min_size % 16 !=0:
            min_size = int(min_size - (min_size % 16))
        if H > W:
            out_size = (max_size, min_size)
        else:
            out_size = (min_size, max_size)
        with torch.no_grad():
            timg_res = K.geometry.resize(timg, out_size)
    else:
        timg_res = timg
    with torch.no_grad():
        H2, W2 = timg_res.shape[2:]
        coef_h = (H/float(H2))
        coef_w = (W/float(W2))
        heatmap_batch = keypoint_net.run(K.color.rgb_to_grayscale(timg_res)) # heatmap: numpy [batch, 1, H, W]
        # heatmap to pts
        pts = val_agent.heatmap_to_pts()
        if subpixel:
            pts_subpixel = val_agent.soft_argmax_points(pts)
            pts = pts_subpixel[0]
        else:
            pts = pts[0]
        # heatmap, pts to desc

        coord_1 = pts.T
        score_1 = deepcopy(coord_1[:, 2])
        coord_1 = deepcopy(coord_1[:, :2])
        desc1 = val_agent.desc_to_sparseDesc()[0].T
        if norm_desc:
            desc1 = F.normalize(torch.from_numpy(desc1), dim=1, p=2).numpy()
        score_1 = score_1.reshape(-1)
        sorted_sc, indices = torch.sort(torch.from_numpy(score_1), descending=True)
        idxs = indices[:MAX_KP].numpy()
        resps = score_1[idxs]
        kps = coord_1[idxs]
        kps[:, 0] *= coef_w
        kps[:, 1] *= coef_h
        descs = desc1[idxs]
    return kps.reshape(-1, 2), resps, descs

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--datasets_folder",
        default=os.path.join('..', 'imw-2020'),
        help="path to datasets folder",
        type=str)
    parser.add_argument(
        '--num_kp',
        type=int,
        default=2048,
        help='number of keypoints')
    parser.add_argument(
        '--resize_image_to',
        type=int,
        default=1024,
        help='Resize the largest image dimension to this value (default: 1024, '
        '0 does nothing).')
    parser.add_argument(
        '--trainset',
        type=str,
        default='coco',
        choices=["coco", "kitty"])
    parser.add_argument(
        '--subpix',
        type=str2bool,
        default=False)
    parser.add_argument(
        '--device',
        type=str,
        default='cpu',
        choices=["cpu", 'cuda', 'mps']
    )
    parser.add_argument(
        "--save_path",
        default=os.path.join('..', 'benchmark-features'),
        type=str,
        help='Path to store the features')
    parser.add_argument(
        "--method_name", default='superpoint_indep', type=str)
    parser.add_argument(
        "--dataset",
        default='all',
        type=str,
        choices=["all", "phototourism", "pragueparks"])
    parser.add_argument(
        "--norm_desc",
        default=False,
        type=str2bool,
        help='L2Norm of descriptors')
    opt, unparsed = parser.parse_known_args()
    device = torch.device(opt.device)
    print(opt)
    if opt.trainset == 'coco':
        conf_filename = 'third_party/pytorch-superpoint/logs/superpoint_coco_heat2_0/config.yml'
        weights_fname = 'third_party/pytorch-superpoint/logs/superpoint_coco_heat2_0/checkpoints/superPointNet_170000_checkpoint.pth.tar'
    elif opt.trainset == 'kitty':

        weights_fname = 'third_party/pytorch-superpoint/logs/superpoint_kitti_heat2_0/checkpoints/superPointNet_50000_checkpoint.pth.tar'
        conf_filename = 'third_party/pytorch-superpoint/logs/superpoint_kitti_heat2_0/config.yml'
    else:
        pass
    with open(conf_filename, 'r') as f:
        config = yaml.load(f)
    config['model']['pretrained']=weights_fname
    config['model']['nn_thresh'] = 1.0
    config['model']['detection_threshold'] = 0.001
    # load frontend
    val_agent = model_wrapper(config['model'], device=device)
    val_agent.loadModel()
    val_agent.net.eval()
    val_agent.net=val_agent.net.to(device)

    INPUT_DIR = opt.datasets_folder
    modelname = f'{opt.method_name}_{opt.trainset}'
    if opt.norm_desc:
        modelname+='_norm'
    if opt.resize_image_to > 0:
        modelname+= f'_{opt.resize_image_to}'
    else:
        modelname+= f'_fullres'
    if opt.subpix:
        modelname+='_subpix'
    OUT_DIR = os.path.join(opt.save_path, modelname)
    os.makedirs(OUT_DIR, exist_ok=True)
    print (f"Will save to {OUT_DIR}")
    if opt.dataset == 'all':
        datasets = ['phototourism', 'pragueparks']#[x for x in os.listdir(INPUT_DIR) if (os.path.isdir(os.path.join(INPUT_DIR, x)))]
    else:
        datasets = [opt.dataset]
    for ds in datasets:
        ds_in_path = os.path.join(INPUT_DIR, ds)
        ds_out_path = os.path.join(OUT_DIR, ds)
        os.makedirs(ds_out_path, exist_ok=True)
        seqs = [x for x in os.listdir(ds_in_path) if os.path.isdir(os.path.join(ds_in_path, x))]
        for seq in seqs:
            print (seq)
            if os.path.isdir(os.path.join(ds_in_path, seq, 'set_100')):
                seq_in_path = os.path.join(ds_in_path, seq, 'set_100', 'images')
            else:
                seq_in_path = os.path.join(ds_in_path, seq)
            seq_out_path = os.path.join(ds_out_path, seq)
            os.makedirs(seq_out_path, exist_ok=True)
            img_fnames = os.listdir(seq_in_path)
            num_kp = []
            with h5py.File(f'{seq_out_path}/keypoints.h5', mode='w') as f_kp, \
                 h5py.File(f'{seq_out_path}/descriptors.h5', mode='w') as f_desc, \
                 h5py.File(f'{seq_out_path}/scores.h5', mode='w') as f_score, \
                 h5py.File(f'{seq_out_path}/angles.h5', mode='w') as f_ang, \
                 h5py.File(f'{seq_out_path}/scales.h5', mode='w') as f_scale:
                for img_fname in tqdm(img_fnames):
                    img_fname_full = os.path.join(seq_in_path, img_fname)
                    key = os.path.splitext(os.path.basename(img_fname))[0]
                    kps, resps, descs = extract_features(img_fname_full, val_agent, device,
                                                         opt.num_kp,
                                                         opt.resize_image_to,
                                                         opt.norm_desc,
                                                         opt.subpix)
                    keypoints, scales, angles, responses = convert_imc(kps, resps)
                    f_desc[key] = descs.reshape(-1, 256)
                    f_score[key] = responses
                    f_ang[key] = angles
                    f_scale[key] = scales
                    f_kp[key] = keypoints
                    num_kp.append(len(keypoints))
                print(f'Finished processing "{ds}/{seq}" -> {np.array(num_kp).mean()} features/image')
    print (f"Result is saved to {OUT_DIR}")