可视化 anchor-target 赋值¶

[1]:

import os
import random
from pathlib import Path
import numpy as np
import torch

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="4"

device = torch.device('cpu')

[2]:

# Deterministic REPRODUCIBILITY
torch.manual_seed(24)
random.seed(5)
np.random.seed(95)

[3]:

import io
import contextlib
import cv2
from torchvision.ops import box_convert

[4]:

from yolort.data.data_module import COCODetectionDataModule
from yolort.models.transform import YOLOTransform
from yolort.utils.image_utils import (
    color_list,
    plot_one_box,
    cv2_imshow,
    load_names,
    parse_single_image,
    parse_images,
)

为测试设定 coco128 dataset 和 dataloader¶

[5]:

# Get COCO label names and COLORS list
LABELS = (
    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
    'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
    'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
    'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
    'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
    'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
    'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
    'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
    'hair drier', 'toothbrush',
)

COLORS = color_list()

[8]:

# Acquire the images and labels from the coco128 dataset
data_path = Path('/media/pc/data/4tb/lxw/tests/datasets')
coco128_dirname = 'coco'
coco128_path = data_path / coco128_dirname
image_root = coco128_path / 'images' / 'train2017'
annotation_path = coco128_path / 'annotations'

batch_size = 8

with contextlib.redirect_stdout(io.StringIO()):
    datamodule = COCODetectionDataModule(
        image_root,
        anno_path=annotation_path,
        skip_val_set=True,
        batch_size=batch_size,
    )

---------------------------------------------------------------------------
JSONDecodeError                           Traceback (most recent call last)
/media/pc/data/4tb/lxw/books/yolov5-rt-stack/docs/source/notebooks/anchor-label-assignment-visualization.ipynb Cell 8' in <cell line: 10>()
      <a href='vscode-notebook-cell://ssh-remote%2Bxin/media/pc/data/4tb/lxw/books/yolov5-rt-stack/docs/source/notebooks/anchor-label-assignment-visualization.ipynb#ch0000007vscode-remote?line=7'>8</a> batch_size = 8
     <a href='vscode-notebook-cell://ssh-remote%2Bxin/media/pc/data/4tb/lxw/books/yolov5-rt-stack/docs/source/notebooks/anchor-label-assignment-visualization.ipynb#ch0000007vscode-remote?line=9'>10</a> with contextlib.redirect_stdout(io.StringIO()):
---> <a href='vscode-notebook-cell://ssh-remote%2Bxin/media/pc/data/4tb/lxw/books/yolov5-rt-stack/docs/source/notebooks/anchor-label-assignment-visualization.ipynb#ch0000007vscode-remote?line=10'>11</a>     datamodule = COCODetectionDataModule(
     <a href='vscode-notebook-cell://ssh-remote%2Bxin/media/pc/data/4tb/lxw/books/yolov5-rt-stack/docs/source/notebooks/anchor-label-assignment-visualization.ipynb#ch0000007vscode-remote?line=11'>12</a>         image_root,
     <a href='vscode-notebook-cell://ssh-remote%2Bxin/media/pc/data/4tb/lxw/books/yolov5-rt-stack/docs/source/notebooks/anchor-label-assignment-visualization.ipynb#ch0000007vscode-remote?line=12'>13</a>         anno_path=annotation_path,
     <a href='vscode-notebook-cell://ssh-remote%2Bxin/media/pc/data/4tb/lxw/books/yolov5-rt-stack/docs/source/notebooks/anchor-label-assignment-visualization.ipynb#ch0000007vscode-remote?line=13'>14</a>         skip_val_set=True,
     <a href='vscode-notebook-cell://ssh-remote%2Bxin/media/pc/data/4tb/lxw/books/yolov5-rt-stack/docs/source/notebooks/anchor-label-assignment-visualization.ipynb#ch0000007vscode-remote?line=14'>15</a>         batch_size=batch_size,
     <a href='vscode-notebook-cell://ssh-remote%2Bxin/media/pc/data/4tb/lxw/books/yolov5-rt-stack/docs/source/notebooks/anchor-label-assignment-visualization.ipynb#ch0000007vscode-remote?line=15'>16</a>     )

File /media/pc/data/4tb/lxw/anaconda3/envs/torch/lib/python3.10/site-packages/yolort/data/data_module.py:111, in COCODetectionDataModule.__init__(self, data_path, anno_path, num_classes, data_task, train_set, val_set, skip_train_set, skip_val_set, train_transform, val_transform, batch_size, num_workers, *args, **kwargs)
    107 train_ann_file = anno_path / f"{data_task}_{train_set}.json"
    108 val_ann_file = anno_path / f"{data_task}_{val_set}.json"
    110 train_dataset = (
--> 111     None if skip_train_set else COCODetection(data_path, train_ann_file, train_transform())
    112 )
    113 val_dataset = None if skip_val_set else COCODetection(data_path, val_ann_file, val_transform())
    115 super().__init__(
    116     train_dataset=train_dataset,
    117     val_dataset=val_dataset,
   (...)
    121     **kwargs,
    122 )

File /media/pc/data/4tb/lxw/anaconda3/envs/torch/lib/python3.10/site-packages/yolort/data/coco.py:16, in COCODetection.__init__(self, img_folder, ann_file, transforms, return_masks)
     15 def __init__(self, img_folder, ann_file, transforms, return_masks=False):
---> 16     super().__init__(img_folder, ann_file)
     17     self._transforms = transforms
     19     json_category_id_to_contiguous_id = {v: i for i, v in enumerate(self.coco.getCatIds())}

File /media/pc/data/4tb/lxw/anaconda3/envs/torch/lib/python3.10/site-packages/torchvision/datasets/coco.py:36, in CocoDetection.__init__(self, root, annFile, transform, target_transform, transforms)
     33 super().__init__(root, transforms, transform, target_transform)
     34 from pycocotools.coco import COCO
---> 36 self.coco = COCO(annFile)
     37 self.ids = list(sorted(self.coco.imgs.keys()))

File /media/pc/data/4tb/lxw/anaconda3/envs/torch/lib/python3.10/site-packages/pycocotools-2.0.4-py3.10-linux-x86_64.egg/pycocotools/coco.py:82, in COCO.__init__(self, annotation_file)
     80 tic = time.time()
     81 with open(annotation_file, 'r') as f:
---> 82     dataset = json.load(f)
     83 assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
     84 print('Done (t={:0.2f}s)'.format(time.time()- tic))

File /media/pc/data/4tb/lxw/anaconda3/envs/torch/lib/python3.10/json/__init__.py:293, in load(fp, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    274 def load(fp, *, cls=None, object_hook=None, parse_float=None,
    275         parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
    276     """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
    277     a JSON document) to a Python object.
    278
   (...)
    291     kwarg; otherwise ``JSONDecoder`` is used.
    292     """
--> 293     return loads(fp.read(),
    294         cls=cls, object_hook=object_hook,
    295         parse_float=parse_float, parse_int=parse_int,
    296         parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)

File /media/pc/data/4tb/lxw/anaconda3/envs/torch/lib/python3.10/json/__init__.py:346, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    341     s = s.decode(detect_encoding(s), 'surrogatepass')
    343 if (cls is None and object_hook is None and
    344         parse_int is None and parse_float is None and
    345         parse_constant is None and object_pairs_hook is None and not kw):
--> 346     return _default_decoder.decode(s)
    347 if cls is None:
    348     cls = JSONDecoder

File /media/pc/data/4tb/lxw/anaconda3/envs/torch/lib/python3.10/json/decoder.py:337, in JSONDecoder.decode(self, s, _w)
    332 def decode(self, s, _w=WHITESPACE.match):
    333     """Return the Python representation of ``s`` (a ``str`` instance
    334     containing a JSON document).
    335
    336     """
--> 337     obj, end = self.raw_decode(s, idx=_w(s, 0).end())
    338     end = _w(s, end).end()
    339     if end != len(s):

File /media/pc/data/4tb/lxw/anaconda3/envs/torch/lib/python3.10/json/decoder.py:355, in JSONDecoder.raw_decode(self, s, idx)
    353     obj, end = self.scan_once(s, idx)
    354 except StopIteration as err:
--> 355     raise JSONDecodeError("Expecting value", s, err.value) from None
    356 return obj, end

JSONDecodeError: Expecting value: line 1 column 455147521 (char 455147520)

[ ]:

test_dataloader = iter(datamodule.train_dataloader())

采样图片和目标¶

[ ]:

images, annotations = next(test_dataloader)

[ ]:

idx = random.randrange(batch_size)
img_raw = cv2.cvtColor(parse_single_image(images[idx]), cv2.COLOR_RGB2BGR)  # For visualization

for box, label in zip(annotations[idx]['boxes'].tolist(), annotations[idx]['labels'].tolist()):
    img_raw = plot_one_box(box, img_raw, color=COLORS[label % len(COLORS)], label=LABELS[label])

cv2_imshow(img_raw, imshow_scale=0.5)

Training Batch in Pipeline¶

[ ]:

from yolort.models import yolov5s

model = yolov5s()
model = model.train()

[ ]:

samples, targets = model.transform(images, annotations)

[ ]:

inputs = parse_images(samples.tensors)

[ ]:

attach_idx = torch.where(targets[:, 0].to(dtype=torch.int32) == idx)[0]

img_training = cv2.cvtColor(inputs[idx], cv2.COLOR_RGB2BGR)  # For visualization
img_h, img_w = img_training.shape[:2]
targets_training = targets[attach_idx]
for box, label in zip(targets_training[:, 2:], targets[attach_idx][:, 1]):
    label = int(label.tolist())
    box = box_convert(box, in_fmt='cxcywh', out_fmt='xyxy')
    box = (box * torch.tensor([img_h, img_w, img_h, img_w])).tolist()
    img_training = plot_one_box(box, img_training, color=COLORS[label % len(COLORS)], label=LABELS[label])

cv2_imshow(img_training, imshow_scale=0.5)

提取中间特征¶

[ ]:

from yolort.utils import FeatureExtractor

[ ]:

yolo_features = FeatureExtractor(model.model, return_layers=['backbone', 'head'])

intermediate_features = yolo_features(samples.tensors, targets)

features = intermediate_features['backbone']
head_outputs = intermediate_features['head']

获取 Anchors 和 Strides¶

[ ]:

num_layers = len(head_outputs)

anchors = torch.as_tensor(model.model.anchor_generator.anchor_grids, dtype=torch.float32, device=device)
strides = torch.as_tensor(model.model.anchor_generator.strides, dtype=torch.float32, device=device)
anchors = anchors.view(num_layers, -1, 2) / strides.view(-1, 1, 1)

Assign Targets to Anchors¶

[ ]:

# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
num_anchors = len(model.model.anchor_generator.anchor_grids)  # number of anchors
num_targets = len(targets)  # number of targets

targets_cls, targets_box, anchors_encode = [], [], []
indices = []
grid_assigner = []  # Anchor Visulization
gain = torch.ones(7, device=device)  # normalized to gridspace gain
# same as .repeat_interleave(num_targets)
ai = torch.arange(num_anchors, device=device).float().view(num_anchors, 1).repeat(1, num_targets)
targets_append = torch.cat((targets.repeat(num_anchors, 1, 1), ai[:, :, None]), 2)  # append anchor indices

g = 0.5  # bias
off = torch.tensor([[0, 0],
                    [1, 0], [0, 1], [-1, 0], [0, -1],  # j,k,l,m
                    # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
                    ], device=device).float() * g  # offsets

[ ]:

anchor_threshold = 4.0

What’s actually going on is the image is subdivided into a grid of squares, and the coordinates in grid[] are the coordinates of the upper-left corner of that square.

The neural network provides $x$ , $y$ coordinates in the range $(0, 1)$ (enforced by sigmoid) which covers the square, centered at 0.5. Multiplying by two allows detected $x$ , $y$ coordinates to cover a larger range, slightly outside the square – otherwise it’s difficult to detect objects centered at grid boundaries. Subtracting 0.5 shifts the resulting range to $(- 0.5, 1.5)$ which is centered around $(0, 1)$ .

[ ]:

for i in range(num_layers):
    anchors_per_layer = anchors[i]
    gain[2:6] = torch.tensor(head_outputs[i].shape)[[3, 2, 3, 2]]  # xyxy gain

    # Match targets to anchors
    targets_with_gain = targets_append * gain
    if num_targets:
        # Matches
        ratios_wh = targets_with_gain[:, :, 4:6] / anchors_per_layer[:, None]  # wh ratio
        ratios_filtering = torch.max(ratios_wh, 1. / ratios_wh).max(2)[0]
        inds = torch.where(ratios_filtering < anchor_threshold)
        targets_with_gain = targets_with_gain[inds]  # filter

        # Offsets
        grid_xy = targets_with_gain[:, 2:4]  # grid xy
        grid_xy_inverse = gain[[2, 3]] - grid_xy  # inverse
        inds_jk = (grid_xy % 1. < g) & (grid_xy > 1.)
        inds_lm = (grid_xy_inverse % 1. < g) & (grid_xy_inverse > 1.)
        inds_ones = torch.ones_like(inds_jk[:, 0])[:, None]
        inds = torch.cat((inds_ones, inds_jk, inds_lm), dim=1).T
        targets_with_gain = targets_with_gain.repeat((5, 1, 1))[inds]
        offsets = (torch.zeros_like(grid_xy)[None] + off[:, None])[inds]
    else:
        targets_with_gain = targets_append[0]
        offsets = torch.tensor(0, device=device)

    # Define
    bc = targets_with_gain[:, :2].long().T  # image, class
    grid_xy = targets_with_gain[:, 2:4]  # grid xy
    grid_wh = targets_with_gain[:, 4:6]  # grid wh
    grid_ij = (grid_xy - offsets).long()

    # Append
    a = targets_with_gain[:, 6].long()  # anchor indices
    # image, anchor, grid indices
    indices.append((bc[0], a, grid_ij[:, 1].clamp_(0, gain[3] - 1),
                    grid_ij[:, 0].clamp_(0, gain[2] - 1)))
    targets_box.append(torch.cat((grid_xy - grid_ij, grid_wh), 1))  # box
    grid_assigner.append(torch.cat((grid_xy, grid_wh), 1))
    anchors_encode.append(anchors_per_layer[a])  # anchors
    targets_cls.append(bc[1])  # class

Visulization Anchor¶

[ ]:

from yolort.utils.image_utils import anchor_match_visualize

[ ]:

images_with_anchor = anchor_match_visualize(samples.tensors, grid_assigner, indices, anchors_encode, head_outputs)

[ ]:

cv2_imshow(images_with_anchor[idx], imshow_scale=0.5)

View this document as a notebook: https://github.com/zhiqwang/yolov5-rt-stack/blob/main/notebooks/anchor-label-assignment-visualization.ipynb