admin管理员组

文章数量:1186520

I'm working on training a custom Keypoint R-CNN model using PyTorch, and I’m encountering issues while implementing the custom dataset and dataloader for COCO-style annotations. My dataset includes images, keypoints, bounding boxes, and categories. However, I’m getting an IndexError: index 3 is out of bounds for dimension 0 with size 2, and I’m not sure how to fix it.

Here is my current code implementation:

import os
import torch
import cv2
import numpy as np
import torchvision
from pycocotools.coco import COCO
import albumentations as A
from albumentations.pytorch import ToTensorV2

class Cocokeypoint(torchvision.datasets.VisionDataset):
    def __init__(self, root=None, transform=None):
        super().__init__(root=root, transform=transform)
        self.transforms = transform
        self.root = root
        self.coco = COCO(os.path.join(root, "_annotations.coco.json"))
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.ids = [id for id in self.ids if len(self._load_target(id)) > 0]

    def _load_image(self, id: int):
        path = self.coco.loadImgs(id)[0]['file_name']
        image = cv2.imread(os.path.join(self.root, path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        return image

    def _load_target(self, id):
        annotations = self.coco.loadAnns(self.coco.getAnnIds(id))
        keypoint_list = [ann['keypoints'] for ann in annotations]  # Extract keypoints
        return annotations, keypoint_list

    def __getitem__(self, index):
        id = self.ids[index]
        images = self._load_image(id)
        annotations, keypoint_list = self._load_target(id)

        boxes = [t['bbox'] + [t['category_id']] for t in annotations]
        keypoint = np.array(keypoint_list) if keypoint_list else np.array([])

        transformed = self.transforms(image=images, bboxes=boxes, keypoints=keypoint)

        image = transformed['image']
        boxe = transformed['bboxes']
        keypoint = transformed['keypoints']

        new_box = []
        for box in boxe:
            xmin = box[0]
            xmax = xmin + box[2]
            ymin = box[1]
            ymax = ymin + box[3]
            new_box.append([xmin, ymin, xmax, ymax])

        bboxes = torch.as_tensor(new_box, dtype=torch.float32)
        keypoints = torch.tensor(keypoint, dtype=torch.float32)

        labels = torch.tensor([t['category_id'] for t in annotations], dtype=torch.int64)
        area = torch.tensor([t['area'] for t in annotations], dtype=torch.float32)
        id = torch.tensor([id])
        iscrowd = torch.tensor([t['iscrowd'] for t in annotations], dtype=torch.int64)

        args = {
            'boxes': bboxes,
            'labels': labels,
            'image_id': id,
            'area': area,
            'iscrowd': iscrowd,
            'keypoints': keypoints
        }

        return image / 255.0, args

    def __len__(self):
        return len(self.ids)

# Transformations
trans = A.Compose([
    A.PadIfNeeded(640, 640),
    A.Resize(640, 640),
    ToTensorV2(),
], bbox_params=A.BboxParams(format='coco'), keypoint_params=A.KeypointParams(format='xy'))

# Create Dataset Instance
train_data = Cocokeypoint(root='/content/drive/MyDrive/DATA/glue_hank.v1i.coco/train/', transform=trans)
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-251-b167053efd94> in <cell line: 0>()
      4 
      5 for epoch in tqdm(range(epochs)):
----> 6     best_model,last_model,model_score=train_one_loop(model=model,optimizer=optimizer,schedular=schedular,loader=train_dataloader,device=device,epoch=epoch)

8 frames
/usr/local/lib/python3.11/dist-packages/torchvision/models/detection/roi_heads.py in assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels)
    586                 clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0)
    587 
--> 588                 labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image]
    589                 labels_in_image = labels_in_image.to(dtype=torch.int64)
    590 

IndexError: index 3 is out of bounds for dimension 0 with size 2

本文标签: