-
Notifications
You must be signed in to change notification settings - Fork 0
/
shapes.py
238 lines (213 loc) · 9.37 KB
/
shapes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import os
import sys
import re
import time
import cv2
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import math
import random
#root directory of the project
ROOT_DIR=os.path.abspath("../../")
#import mask rcnn
sys.path.append(ROOT_DIR)
from mrcnn.config import Config
from mrcnn import utils
from mrcnn.model import modellib
from mrcnn import visualize
from mrcnn.model import log
#directory to save logs and trained model
MODEL_DIR=os.path.join(ROOT_DIR,"logs")
#Local path to trained weights file
COCO_MODEL_PATH=os.path.join(ROOT_DIR,"mask_rcnn_coco.h5")
#download COCO trained weights from releases if needed
if not os.path.exists(COCO_MODEL_PATH):
utils.download_trained_weights(COCO_MODEL_PATH)
class ShapesConfig(Config):
#configuration for training on the toy shapes dataset
#give the config a name
NAME="shapes"
#no of gpu is 1 and images per gpu is 8
GPU_COUNT=1
IMAGES_PER_GPU=8
#give size to images
IMAGE_MAX_DIM=128
IMAGE_MIN_DIM=128
#setting the anchor scales
RPN_ANCHOR_SCALES=(8,16,32,64,128)
#train ROIS per image
TRAIN_ROIS_PER_IMAGE=32
#using a small epoch
STEPS_PER_EPOCH=100
#use small validation steps since the epoch is small
VALIDATION_STEPS=5
#making an object for config class
config=ShapesConfig()
#for displaying the configuration
config.display()
class ShapesDataset(utils.Dataset):
#regenerates the shapes synthetic dataset.the dataset consists of simple triangles, circles and rectangles placed randomly on a blank surface
def load_shapes(self,count,width,height):
# generate classes
self.add_class("shapes",1,"square")
self.add_class("shapes",2,"circle")
self.add_class("shapes",3,"triangle")
#taking random height and random width and random background color to create a circle/triangle/rectangle, you guessed it, RANDOMLY!
for i in range(count):
bg_color,shapes=self.random_image(height,width)
self.add_image("shapes",imageid=i,path=None.shapes=shapes,color=color,height=height,width=width)
def load_image(self,image_id)
# creates an image for shapes to be placed in
info=self.info[image_id]
bg_color=np.array(info['bg_color']).reshape([1,1,3])
image=np.ones([info['height'],info['width'],3],dtype=uint8)
image=image*bg_color.astype(np.uint8)
for shape,color,dims in info['shapes']:
# images are added to the blank image created by the given funtion
image=self.draw_shape(image,shape,dims,color)
return image # image is the final image (an image the shapes are embedded in)
def image_reference(self,image_id):
info=self.info[image_id]
if info["source"]=="shapes":
return info["shapes"]
else:
super(self._class_)image.reference(self,image_id)
def load_mask(self,image_id):
# creates the mask that is the correct one
info=self.image_info[image_id]
shapes = info['shapes']
count = len(shapes)
mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
for i, (shape, _, dims) in enumerate(info['shapes']):
mask[:, :, i:i+1] = self.draw_shape(mask[:, :, i:i+1].copy(),shape, dims, 1)
# Handle occlusions (So that the masks don't overlap)
occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
for i in range(count-2, -1, -1):
mask[:, :, i] = mask[:, :, i] * occlusion
occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))
# Map class names to class IDs.
class_ids = np.array([self.class_names.index(s[0]) for s in shapes])
return mask.astype(np.bool), class_ids.astype(np.int32)
def draw_shape(self,image,shape,dims,color):
#draw the shape from the given specifications
x,y,s=dims
if shape=='square'
cv2.rectangle(image,(x-s,y-s),(x+s,y+s),color,-1)
elif shape=="circle":
cv2.circle(image,(x,y),s,color.-1)
elif shape=="triangle":
points=np.array([[(x,y-s),(x-s/math.sin(math.radians(60)),y+s),(x+s/math.sin(math.radians(60)).y+s)]],dtype=np.uint.32)
cv2.fillPoly(image,points,color)
return image
def random_shape(self,height,width):
#generate random specification of the shapes that lie within the
#given height and width boundaries
#also determines the shape of the shape
shape=random.choice(["square","circle","triangle"])
color=tuple([random.randint(0,255) for _ in range(3)])
buffer=20
y=random.randint(buffer,height-buffer-1)
x=random.randint(buffer,width-buffer-1)
s=random.randint(buffer,height//4)
return color,shape,(x,y,s)
def random_image(self,height,width):
#creates random specifications of an image with multiple shapes
#returns the background color of the image
bg_color=np.array([random.randint(0,255) for _in range(3)])
shapes=[]
boxes=[]
N = random.randint(1, 4)
# decides the number of the shapes in the image (yes....randomly)
for _ in range(N):
shape, color, dims = self.random_shape(height, width)
shapes.append((shape, color, dims))
x, y, s = dims
boxes.append([y-s, x-s, y+s, x+s])\
# Apply non-max suppression with 0.3 threshold to avoid
# shapes covering each other (occlusion)
keep_ixs = utils.non_max_suppression(np.array(boxes), np.arange(N), 0.3)
shapes = [s for i, s in enumerate(shapes) if i in keep_ixs]
return bg_color, shapes
# Preparing Different Datasets
#Forming an object for the Dataset
dataset_train=ShapesDataset()
# forming dataset for the algo to train on
dataset_train.load_shapes(500,config,IMAGE_SHAPE[0],config.IMAGE_SHAPE[1])
dataset_train.prepare()
# Preparing dataset for validation as well, just like above (we are not here to spoonfeed you)
validation_train=ShapesDataset()
validation_train.load_shapes(500,config,IMAGE_SHAPE[0],config.IMAGE_SHAPE[1])
validation_train.prepare()
# Displaying the images with the masks
image_ids=np.random.choice(dataset_train.image_ids,4)
for image_id in image_ids:
image=dataset_train.load_image(image_id)
mask,class_ids=dataset_train.load_mask(image_id)
visualize.display_top_masks(image,mask,class_ids,dataset_train.class_names)
# Create model in training mode
model=modellib.MaskRCNN(mode="training",config=config,model_dir=MODEL_DIR)
#which weights to start with
init_with = "coco" # imagenet, coco, or last
if init_with == "imagenet":
model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
# Load weights trained on MS COCO, but skip layers that
# are different due to the different number of classes
# See README for instructions to download the COCO weights
model.load_weights(COCO_MODEL_PATH, by_name=True,
exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",
"mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
# Load the last model you trained and continue training
model.load_weights(model.find_last(), by_name=True)
#Training the model
model.train(dataset_train,dataset_val,learning_rate=config.LEARNING_RATE,epochs=1,layers='heads')
# Fine Tuning the model
model.train(dataset_train, dataset_val,learning_rate=config.LEARNING_RATE / 10,epochs=2,layers="all")
class InferenceConfig(ShapesConfig):
GPU_COUNT=1 # We change the Infrastucture now because we are no longer training hence the needs are different
IMAGES_PER_GPU=1
inference_config=InferenceConfig()
model=modellib.MaskRCNN(mode="inference",config=inference_config,model_dir=MODEL_DIR)
#get path to saved weights
model_path=model.find_last()
#load trained weights
print("Loading weights from",model_path)
model.load_weights(model_path,by_name=True)
#test on a random image
image_id=random.choice(dataset.val_image_ids)
original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\
modellib.load_image_gt(dataset_val, inference_config,image_id, use_mini_mask=False)
# Save The logs
log("original_image", original_image)
log("image_meta", image_meta)
log("gt_class_id", gt_class_id)
log("gt_bbox", gt_bbox)
log("gt_mask", gt_mask)
visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id,
dataset_train.class_names, figsize=(8, 8))
#testing with output giving also the class_ids
results=model.detect([original_image],verbose=1)
r=results[0]
visualize.display_instances(original_image,r['rois'],r['masks'],r['cass_ids'],
dataset_val.class_names,r['scores'],ax=get_ax())
# Compute VOC-Style mAP @ IoU=0.5
# Running on 10 images. Increase for better accuracy.
image_ids = np.random.choice(dataset_val.image_ids, 10)
APs = []
for image_id in image_ids:
# Load image and ground truth data
image, image_meta, gt_class_id, gt_bbox, gt_mask =\
modellib.load_image_gt(dataset_val, inference_config,
image_id, use_mini_mask=False)
molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)
# Run object detection
results = model.detect([image], verbose=0)
r = results[0]
# Compute AP
AP, precisions, recalls, overlaps =\
utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
r["rois"], r["class_ids"], r["scores"], r['masks'])
APs.append(AP)
print("mAP: ", np.mean(APs))