Getting Started with Semantic Segmentation using IceVision

Install

Install from pypi...

# Torch - Torchvision - IceVision - IceData - MMDetection - YOLOv5 - EfficientDet Installation
!wget https://raw.githubusercontent.com/airctic/icevision/master/icevision_install.sh

# Choose your installation target: cuda11 or cuda10 or cpu
!bash icevision_install.sh cuda11

... or from icevision master

# # Torch - Torchvision - IceVision - IceData - MMDetection - YOLOv5 - EfficientDet Installation
# !wget https://raw.githubusercontent.com/airctic/icevision/master/icevision_install.sh

# # Choose your installation target: cuda11 or cuda10 or cpu
# !bash icevision_install.sh cuda11 master

Imports

from icevision.all import *

[1m[1mINFO    [0m[1m[0m - [1mThe mmdet config folder already exists. No need to downloaded it. Path : /home/ubuntu/.icevision/mmdetection_configs/mmdetection_configs-2.16.0/configs[0m | [36micevision.models.mmdet.download_configs[0m:[36mdownload_mmdet_configs[0m:[36m17[0m

Getting and parsing the data

data_url = 'https://s3.amazonaws.com/fast-ai-sample/camvid_tiny.tgz'
data_dir = icedata.load_data(data_url, 'camvid_tiny') / 'camvid_tiny'

codes = np.loadtxt(data_dir/'codes.txt', dtype=str)
class_map = ClassMap(list(codes))

images_dir = data_dir/'images'
labels_dir = data_dir/'labels'

image_files = get_image_files(images_dir)

records = RecordCollection(SemanticSegmentationRecord)

for image_file in pbar(image_files):
    record = records.get_by_record_id(image_file.stem)

    if record.is_new:
        record.set_filepath(image_file)
        record.set_img_size(get_img_size(image_file))
        record.segmentation.set_class_map(class_map)

    mask_file = SemanticMaskFile(labels_dir / f'{image_file.stem}_P.png')
    record.segmentation.set_mask(mask_file)

records = records.autofix()
train_records, valid_records = records.make_splits(RandomSplitter([0.8, 0.2]))

sample_records = random.choices(records, k=3)
show_records(sample_records, ncols=3)

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

png

Transforms and datasets

presize, size = 512, 384
presize, size = ImgSize(presize, int(presize*.75)), ImgSize(size, int(size*.75))

aug_tfms = tfms.A.aug_tfms(presize=presize, size=size, pad=None,
                           crop_fn=partial(tfms.A.RandomCrop, p=0.5),
                           shift_scale_rotate=tfms.A.ShiftScaleRotate(rotate_limit=2),
                          )
train_tfms = tfms.A.Adapter([*aug_tfms, tfms.A.Normalize()])
valid_tfms = tfms.A.Adapter([tfms.A.resize(size), tfms.A.Normalize()])

train_ds = Dataset(train_records, train_tfms)
valid_ds = Dataset(valid_records, valid_tfms)

ds_samples = [train_ds[0] for _ in range(3)]
show_samples(ds_samples, ncols=3)

png

UNET model and dataloaders

model_type = models.fastai.unet

train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=4, shuffle=True)
valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)

backbone = model_type.backbones.resnet34()
model = model_type.model(backbone=backbone, num_classes=class_map.num_classes, img_size=size)

Defining and training the `fastai` learner

def accuracy_camvid(pred, target):
    # ignores void pixels
    keep_idxs = target != class_map.get_by_name('Void')
    target = target[keep_idxs]
    pred = pred.argmax(dim=1)[keep_idxs]

    return (pred==target).float().mean()

learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=[accuracy_camvid])

learn.lr_find()

SuggestedLRs(valley=9.120108734350652e-05)

png

learn.fine_tune(10, 1e-4)

epoch	train_loss	valid_loss	accuracy_camvid	time
0	3.392160	2.630378	0.255401	00:09

epoch	train_loss	valid_loss	accuracy_camvid	time
0	2.602331	2.325062	0.440709	00:07
1	2.381318	1.799831	0.530444	00:07
2	2.142349	1.332237	0.668531	00:07
3	1.917440	1.123698	0.693745	00:07
4	1.747154	0.993772	0.751023	00:07
5	1.598631	0.996451	0.762109	00:07
6	1.491493	0.948187	0.774137	00:07
7	1.395746	0.869413	0.793335	00:07
8	1.311806	0.876654	0.794053	00:07
9	1.256938	0.868124	0.796351	00:07

model_type.show_results(model, valid_ds, num_samples=2)

png

Inference

preds = model_type.predict(model, valid_ds)
show_preds(preds=preds[:3])

png

infer_dl = model_type.infer_dl([valid_ds[0]], batch_size=4, shuffle=False)
preds = model_type.predict_from_dl(model, infer_dl, keep_images=True)

show_sample(preds[0].pred)

  0%|          | 0/1 [00:00<?, ?it/s]

png

Getting Started with Semantic Segmentation using IceVision

Install

Imports

Getting and parsing the data

Transforms and datasets

UNET model and dataloaders

Defining and training the fastai learner

Inference

Defining and training the `fastai` learner