Psst.. new poll here.
Psst.. new forums here.
Microsoft is blocking us again (TY IP Reputation!) so just use oauth login instead. :)
Paste
Pasted as Python by registered user JackLee ( 6 years ago )
import json
from sklearn.model_selection import KFold
from PIL import Image
import pandas as pd
def df_to_coco(samples, img_root=None):
"""Convert dataframe to coco format dictionary
Args:
samples(pd.DataFrame): dataframe containing id, frame_name, label_id, bbox
img_root(str): path to image root. default to ../data/amap_traffic_final_train_data/
Return:
dict: coco format data
"""
coco_data = dict(
images=[
# license, url, file_name, height, width, id
],
type='instances',
annotations=[
# segmentation, area, iscrowd, image_id, bbox, category_id, id
],
categories=[
# supercategory, id, name
dict(supercategory=None, id=1, name='barrier'),
dict(supercategory=None, id=2, name='barricade'),
],
)
img_root = img_root or '../data/amap_traffic_final_train_data/'
for id, sample in samples.iterrows():
im_name = osp.join(img_root, sample['id'], sample['frame_name'])
file_name = osp.join(sample['id'], sample['frame_name'])
w, h = Image.open(im_name).size
coco_data['images'].append(dict(
file_name=file_name,
height=h,
width=w,
id=id))
for ann in sample['obstacles']:
if ann['label_id'] <= 2:
x1, y1, x2, y2 = ann['bbox']
x1 = int(pd.np.clip(x1, 0, w - 1))
x2 = int(pd.np.clip(x2, 0, w - 1))
y1 = int(pd.np.clip(y1, 0, h - 1))
y2 = int(pd.np.clip(y2, 0, h - 1))
coco_data['annotations'].append(
dict(
segmentation=[[x1, y1, x2, y1, x2, y2, x1, y2]],
area=(x2 - x1) * (y2 - y1),
iscrowd=0,
image_id=id,
bbox=[x1, y1, x2 - x1, y2 - y1],
category_id=ann['label_id'],
id=len(coco_data['annotations'])))
return coco_data
with open('../../../data/amap_traffic_final_train_0906.json') as f:
ds = json.load(f)
samples = [] # image-level annotations
for ann in ds['annotations']:
for frame in ann['frames']:
if 'obstacles' in frame:
samples.append(dict(id=ann['id'], **frame))
samples = pd.DataFrame(samples)
samples = samples.loc[samples['obstacles'].apply(lambda x: len([_ for _ in x if _['label_id'] <= 2]) > 0)]
ids = list(samples.groupby(by='id').groups.keys())
# n_val = len(ids) // 5
# train_ids = ids[n_val:]
# val_ids = ids[:n_val]
k = 5 # k-fold
for i, (train_inds, val_inds) in enumerate(KFold(k, shuffle=True, random_state=666).split(ids)):
train_ids = pd.np.array(ids)[train_inds].tolist()
val_ids = pd.np.array(ids)[val_inds].tolist()
train_samples = samples.loc[samples['id'].apply(lambda x:x in train_ids)]
val_samples = samples.loc[samples['id'].apply(lambda x:x in val_ids)]
print(f'{k}-fold: {i+1}')
print(f'len(train_samples): {len(train_samples)}, len(val_samples): {len(val_samples)}')
print(f'len(train_ids): {len(train_ids)}, len(val_ids): {len(val_ids)}')
train_data = df_to_coco(train_samples)
val_data = df_to_coco(val_samples)
with open(f'../user_data/barrier_barricade_coco_train_{i+1}.json', 'w') as f1, \
open(f'../user_data/barrier_barricade_coco_val_{i+1}.json', 'w') as f2:
json.dump(train_data, f1)
json.dump(val_data, f2)
Revise this Paste
Parent: 110312
Children: 110314