Shritesh99
diff --git a/‎.DS_Store‎
0 Bytes b/‎.DS_Store‎
0 Bytes
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎Computer-Vision/Object-Detection/data/__init__.py‎
Lines changed: 21 additions & 0 deletions b/‎Computer-Vision/Object-Detection/data/__init__.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎Computer-Vision/Object-Detection/data/config.py‎
Lines changed: 64 additions & 0 deletions b/‎Computer-Vision/Object-Detection/data/config.py‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎Computer-Vision/Object-Detection/data/scripts/VOC2007.sh‎
Lines changed: 42 additions & 0 deletions b/‎Computer-Vision/Object-Detection/data/scripts/VOC2007.sh‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎Computer-Vision/Object-Detection/data/scripts/VOC2012.sh‎
Lines changed: 38 additions & 0 deletions b/‎Computer-Vision/Object-Detection/data/scripts/VOC2012.sh‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎Computer-Vision/Object-Detection/data/voc0712.py‎
Lines changed: 206 additions & 0 deletions b/‎Computer-Vision/Object-Detection/data/voc0712.py‎
Lines changed: 206 additions & 0 deletions
diff --git a/‎Computer-Vision/Object-Detection/funny_dog.mp4‎
6.17 MB b/‎Computer-Vision/Object-Detection/funny_dog.mp4‎
6.17 MB
diff --git a/‎Computer-Vision/Object-Detection/layers/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎Computer-Vision/Object-Detection/layers/__init__.py‎
Lines changed: 2 additions & 0 deletions
@@ -6,4 +6,5 @@ Machine_Learning_A-Z.Rproj
 .DS_Store
 Deep-Learning/Convolutional-Neural-Networks(CNN)/dataset
 .gitignore
-*.pyc
+*.pyc
+*.pth
@@ -0,0 +1,21 @@
+from .voc0712 import VOCDetection, AnnotationTransform, detection_collate, VOC_CLASSES
+from .config import *
+import cv2
+import numpy as np
+
+
+def base_transform(image, size, mean):
+    x = cv2.resize(image, (size, size)).astype(np.float32)
+    # x = cv2.resize(np.array(image), (size, size)).astype(np.float32)
+    x -= mean
+    x = x.astype(np.float32)
+    return x
+
+
+class BaseTransform:
+    def __init__(self, size, mean):
+        self.size = size
+        self.mean = np.array(mean, dtype=np.float32)
+
+    def __call__(self, image, boxes=None, labels=None):
+        return base_transform(image, self.size, self.mean), boxes, labels
@@ -0,0 +1,64 @@
+# config.py
+import os.path
+
+# gets home dir cross platform
+home = os.path.expanduser("~")
+ddir = os.path.join(home,"data/VOCdevkit/")
+
+# note: if you used our download scripts, this should be right
+VOCroot = ddir # path to VOCdevkit root dir
+
+# default batch size
+BATCHES = 32
+# data reshuffled at every epoch
+SHUFFLE = True
+# number of subprocesses to use for data loading
+WORKERS = 4
+
+
+#SSD300 CONFIGS
+# newer version: use additional conv11_2 layer as last layer before multibox layers
+v2 = {
+    'feature_maps' : [38, 19, 10, 5, 3, 1],
+
+    'min_dim' : 300,
+
+    'steps' : [8, 16, 32, 64, 100, 300],
+
+    'min_sizes' : [30, 60, 111, 162, 213, 264],
+
+    'max_sizes' : [60, 111, 162, 213, 264, 315],
+
+    # 'aspect_ratios' : [[2, 1/2], [2, 1/2, 3, 1/3], [2, 1/2, 3, 1/3],
+    #                    [2, 1/2, 3, 1/3], [2, 1/2], [2, 1/2]],
+    'aspect_ratios' : [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
+
+    'variance' : [0.1, 0.2],
+
+    'clip' : True,
+
+    'name' : 'v2',
+}
+
+# use average pooling layer as last layer before multibox layers
+v1 = {
+    'feature_maps' : [38, 19, 10, 5, 3, 1],
+
+    'min_dim' : 300,
+
+    'steps' : [8, 16, 32, 64, 100, 300],
+
+    'min_sizes' : [30, 60, 114, 168, 222, 276],
+
+    'max_sizes' : [-1, 114, 168, 222, 276, 330],
+
+    # 'aspect_ratios' : [[2], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]],
+    'aspect_ratios' : [[1,1,2,1/2],[1,1,2,1/2,3,1/3],[1,1,2,1/2,3,1/3],
+                        [1,1,2,1/2,3,1/3],[1,1,2,1/2,3,1/3],[1,1,2,1/2,3,1/3]],
+
+    'variance' : [0.1, 0.2],
+
+    'clip' : True,
+
+    'name' : 'v1',
+}
@@ -0,0 +1,42 @@
+#!/bin/bash
+# Ellis Brown
+
+start=`date +%s`
+
+# handle optional download dir
+if [ -z "$1" ]
+  then
+    # navigate to ~/data
+    echo "navigating to ~/data/ ..." 
+    mkdir -p ~/data
+    cd ~/data/
+  else
+    # check if is valid directory
+    if [ ! -d $1 ]; then
+        echo $1 "is not a valid directory"
+        exit 0
+    fi
+    echo "navigating to" $1 "..."
+    cd $1
+fi
+
+echo "Downloading VOC2007 trainval ..."
+# Download the data.
+curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
+echo "Downloading VOC2007 test data ..."
+curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
+echo "Done downloading."
+
+# Extract data
+echo "Extracting trainval ..."
+tar -xvf VOCtrainval_06-Nov-2007.tar
+echo "Extracting test ..."
+tar -xvf VOCtest_06-Nov-2007.tar
+echo "removing tars ..."
+rm VOCtrainval_06-Nov-2007.tar
+rm VOCtest_06-Nov-2007.tar
+
+end=`date +%s`
+runtime=$((end-start))
+
+echo "Completed in" $runtime "seconds"
@@ -0,0 +1,38 @@
+#!/bin/bash
+# Ellis Brown
+
+start=`date +%s`
+
+# handle optional download dir
+if [ -z "$1" ]
+  then
+    # navigate to ~/data
+    echo "navigating to ~/data/ ..." 
+    mkdir -p ~/data
+    cd ~/data/
+  else
+    # check if is valid directory
+    if [ ! -d $1 ]; then
+        echo $1 "is not a valid directory"
+        exit 0
+    fi
+    echo "navigating to" $1 "..."
+    cd $1
+fi
+
+echo "Downloading VOC2012 trainval ..."
+# Download the data.
+curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
+echo "Done downloading."
+
+
+# Extract data
+echo "Extracting trainval ..."
+tar -xvf VOCtrainval_11-May-2012.tar
+echo "removing tar ..."
+rm VOCtrainval_11-May-2012.tar
+
+end=`date +%s`
+runtime=$((end-start))
+
+echo "Completed in" $runtime "seconds"
@@ -0,0 +1,206 @@
+"""VOC Dataset Classes
+
+Original author: Francisco Massa
+https://github.com/fmassa/vision/blob/voc_dataset/torchvision/datasets/voc.py
+
+Updated by: Ellis Brown, Max deGroot
+"""
+
+import os
+import os.path
+import sys
+import torch
+import torch.utils.data as data
+import torchvision.transforms as transforms
+from PIL import Image, ImageDraw, ImageFont
+import cv2
+import numpy as np
+if sys.version_info[0] == 2:
+    import xml.etree.cElementTree as ET
+else:
+    import xml.etree.ElementTree as ET
+
+VOC_CLASSES = (  # always index 0
+    'aeroplane', 'bicycle', 'bird', 'boat',
+    'bottle', 'bus', 'car', 'cat', 'chair',
+    'cow', 'diningtable', 'dog', 'horse',
+    'motorbike', 'person', 'pottedplant',
+    'sheep', 'sofa', 'train', 'tvmonitor')
+
+# for making bounding boxes pretty
+COLORS = ((255, 0, 0, 128), (0, 255, 0, 128), (0, 0, 255, 128),
+          (0, 255, 255, 128), (255, 0, 255, 128), (255, 255, 0, 128))
+
+
+class AnnotationTransform(object):
+    """Transforms a VOC annotation into a Tensor of bbox coords and label index
+    Initilized with a dictionary lookup of classnames to indexes
+
+    Arguments:
+        class_to_ind (dict, optional): dictionary lookup of classnames -> indexes
+            (default: alphabetic indexing of VOC's 20 classes)
+        keep_difficult (bool, optional): keep difficult instances or not
+            (default: False)
+        height (int): height
+        width (int): width
+    """
+
+    def __init__(self, class_to_ind=None, keep_difficult=False):
+        self.class_to_ind = class_to_ind or dict(
+            zip(VOC_CLASSES, range(len(VOC_CLASSES))))
+        self.keep_difficult = keep_difficult
+
+    def __call__(self, target, width, height):
+        """
+        Arguments:
+            target (annotation) : the target annotation to be made usable
+                will be an ET.Element
+        Returns:
+            a list containing lists of bounding boxes  [bbox coords, class name]
+        """
+        res = []
+        for obj in target.iter('object'):
+            difficult = int(obj.find('difficult').text) == 1
+            if not self.keep_difficult and difficult:
+                continue
+            name = obj.find('name').text.lower().strip()
+            bbox = obj.find('bndbox')
+
+            pts = ['xmin', 'ymin', 'xmax', 'ymax']
+            bndbox = []
+            for i, pt in enumerate(pts):
+                cur_pt = int(bbox.find(pt).text) - 1
+                # scale height or width
+                cur_pt = cur_pt / width if i % 2 == 0 else cur_pt / height
+                bndbox.append(cur_pt)
+            label_idx = self.class_to_ind[name]
+            bndbox.append(label_idx)
+            res += [bndbox]  # [xmin, ymin, xmax, ymax, label_ind]
+            # img_id = target.find('filename').text[:-4]
+
+        return res  # [[xmin, ymin, xmax, ymax, label_ind], ... ]
+
+
+class VOCDetection(data.Dataset):
+    """VOC Detection Dataset Object
+
+    input is image, target is annotation
+
+    Arguments:
+        root (string): filepath to VOCdevkit folder.
+        image_set (string): imageset to use (eg. 'train', 'val', 'test')
+        transform (callable, optional): transformation to perform on the
+            input image
+        target_transform (callable, optional): transformation to perform on the
+            target `annotation`
+            (eg: take in caption string, return tensor of word indices)
+        dataset_name (string, optional): which dataset to load
+            (default: 'VOC2007')
+    """
+
+    def __init__(self, root, image_sets, transform=None, target_transform=None,
+                 dataset_name='VOC0712'):
+        self.root = root
+        self.image_set = image_sets
+        self.transform = transform
+        self.target_transform = target_transform
+        self.name = dataset_name
+        self._annopath = os.path.join('%s', 'Annotations', '%s.xml')
+        self._imgpath = os.path.join('%s', 'JPEGImages', '%s.jpg')
+        self.ids = list()
+        for (year, name) in image_sets:
+            rootpath = os.path.join(self.root, 'VOC' + year)
+            for line in open(os.path.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
+                self.ids.append((rootpath, line.strip()))
+
+    def __getitem__(self, index):
+        im, gt, h, w = self.pull_item(index)
+
+        return im, gt
+
+    def __len__(self):
+        return len(self.ids)
+
+    def pull_item(self, index):
+        img_id = self.ids[index]
+
+        target = ET.parse(self._annopath % img_id).getroot()
+        img = cv2.imread(self._imgpath % img_id)
+        height, width, channels = img.shape
+
+        if self.target_transform is not None:
+            target = self.target_transform(target, width, height)
+
+        if self.transform is not None:
+            target = np.array(target)
+            img, boxes, labels = self.transform(img, target[:, :4], target[:, 4])
+            # to rgb
+            img = img[:, :, (2, 1, 0)]
+            # img = img.transpose(2, 0, 1)
+            target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
+        return torch.from_numpy(img).permute(2, 0, 1), target, height, width
+        # return torch.from_numpy(img), target, height, width
+
+    def pull_image(self, index):
+        '''Returns the original image object at index in PIL form
+
+        Note: not using self.__getitem__(), as any transformations passed in
+        could mess up this functionality.
+
+        Argument:
+            index (int): index of img to show
+        Return:
+            PIL img
+        '''
+        img_id = self.ids[index]
+        return cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
+
+    def pull_anno(self, index):
+        '''Returns the original annotation of image at index
+
+        Note: not using self.__getitem__(), as any transformations passed in
+        could mess up this functionality.
+
+        Argument:
+            index (int): index of img to get annotation of
+        Return:
+            list:  [img_id, [(label, bbox coords),...]]
+                eg: ('001718', [('dog', (96, 13, 438, 332))])
+        '''
+        img_id = self.ids[index]
+        anno = ET.parse(self._annopath % img_id).getroot()
+        gt = self.target_transform(anno, 1, 1)
+        return img_id[1], gt
+
+    def pull_tensor(self, index):
+        '''Returns the original image at an index in tensor form
+
+        Note: not using self.__getitem__(), as any transformations passed in
+        could mess up this functionality.
+
+        Argument:
+            index (int): index of img to show
+        Return:
+            tensorized version of img, squeezed
+        '''
+        return torch.Tensor(self.pull_image(index)).unsqueeze_(0)
+
+
+def detection_collate(batch):
+    """Custom collate fn for dealing with batches of images that have a different
+    number of associated object annotations (bounding boxes).
+
+    Arguments:
+        batch: (tuple) A tuple of tensor images and lists of annotations
+
+    Return:
+        A tuple containing:
+            1) (tensor) batch of images stacked on their 0 dim
+            2) (list of tensors) annotations for a given image are stacked on 0 dim
+    """
+    targets = []
+    imgs = []
+    for sample in batch:
+        imgs.append(sample[0])
+        targets.append(torch.FloatTensor(sample[1]))
+    return torch.stack(imgs, 0), targets
@@ -0,0 +1,2 @@
+from .functions import *
+from .modules import *
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .functions import *`
	`2`	`+from .modules import *`