Skip to content

Open In Colab

Getting Started with Semantic Segmentation using IceVision

Install

Install from pypi...

# Torch - Torchvision - IceVision - IceData - MMDetection - YOLOv5 - EfficientDet Installation
!wget https://raw.githubusercontent.com/airctic/icevision/master/icevision_install.sh

# Choose your installation target: cuda11 or cuda10 or cpu
!bash icevision_install.sh cuda11

... or from icevision master

# # Torch - Torchvision - IceVision - IceData - MMDetection - YOLOv5 - EfficientDet Installation
# !wget https://raw.githubusercontent.com/airctic/icevision/master/icevision_install.sh

# # Choose your installation target: cuda11 or cuda10 or cpu
# !bash icevision_install.sh cuda11 master

Imports

from icevision.all import *
INFO     - The mmdet config folder already exists. No need to downloaded it. Path : /home/ubuntu/.icevision/mmdetection_configs/mmdetection_configs-2.16.0/configs | icevision.models.mmdet.download_configs:download_mmdet_configs:17

Getting and parsing the data

data_url = 'https://s3.amazonaws.com/fast-ai-sample/camvid_tiny.tgz'
data_dir = icedata.load_data(data_url, 'camvid_tiny') / 'camvid_tiny'
codes = np.loadtxt(data_dir/'codes.txt', dtype=str)
class_map = ClassMap(list(codes))
images_dir = data_dir/'images'
labels_dir = data_dir/'labels'
image_files = get_image_files(images_dir)
records = RecordCollection(SemanticSegmentationRecord)

for image_file in pbar(image_files):
    record = records.get_by_record_id(image_file.stem)

    if record.is_new:
        record.set_filepath(image_file)
        record.set_img_size(get_img_size(image_file))
        record.segmentation.set_class_map(class_map)

    mask_file = SemanticMaskFile(labels_dir / f'{image_file.stem}_P.png')
    record.segmentation.set_mask(mask_file)

records = records.autofix()
train_records, valid_records = records.make_splits(RandomSplitter([0.8, 0.2]))
sample_records = random.choices(records, k=3)
show_records(sample_records, ncols=3)
  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

png

Transforms and datasets

presize, size = 512, 384
presize, size = ImgSize(presize, int(presize*.75)), ImgSize(size, int(size*.75))

aug_tfms = tfms.A.aug_tfms(presize=presize, size=size, pad=None,
                           crop_fn=partial(tfms.A.RandomCrop, p=0.5),
                           shift_scale_rotate=tfms.A.ShiftScaleRotate(rotate_limit=2),
                          )
train_tfms = tfms.A.Adapter([*aug_tfms, tfms.A.Normalize()])
valid_tfms = tfms.A.Adapter([tfms.A.resize(size), tfms.A.Normalize()])
train_ds = Dataset(train_records, train_tfms)
valid_ds = Dataset(valid_records, valid_tfms)
ds_samples = [train_ds[0] for _ in range(3)]
show_samples(ds_samples, ncols=3)

png

UNET model and dataloaders

model_type = models.fastai.unet
train_dl = model_type.train_dl(train_ds, batch_size=8, num_workers=4, shuffle=True)
valid_dl = model_type.valid_dl(valid_ds, batch_size=8, num_workers=4, shuffle=False)
backbone = model_type.backbones.resnet34()
model = model_type.model(backbone=backbone, num_classes=class_map.num_classes, img_size=size)

Defining and training the fastai learner

def accuracy_camvid(pred, target):
    # ignores void pixels
    keep_idxs = target != class_map.get_by_name('Void')
    target = target[keep_idxs]
    pred = pred.argmax(dim=1)[keep_idxs]

    return (pred==target).float().mean() 
learn = model_type.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=[accuracy_camvid])
learn.lr_find()
SuggestedLRs(valley=9.120108734350652e-05)

png

learn.fine_tune(10, 1e-4)
epoch train_loss valid_loss accuracy_camvid time
0 3.392160 2.630378 0.255401 00:09
epoch train_loss valid_loss accuracy_camvid time
0 2.602331 2.325062 0.440709 00:07
1 2.381318 1.799831 0.530444 00:07
2 2.142349 1.332237 0.668531 00:07
3 1.917440 1.123698 0.693745 00:07
4 1.747154 0.993772 0.751023 00:07
5 1.598631 0.996451 0.762109 00:07
6 1.491493 0.948187 0.774137 00:07
7 1.395746 0.869413 0.793335 00:07
8 1.311806 0.876654 0.794053 00:07
9 1.256938 0.868124 0.796351 00:07
model_type.show_results(model, valid_ds, num_samples=2)

png

Inference

preds = model_type.predict(model, valid_ds)
show_preds(preds=preds[:3])

png

infer_dl = model_type.infer_dl([valid_ds[0]], batch_size=4, shuffle=False)
preds = model_type.predict_from_dl(model, infer_dl, keep_images=True)
show_sample(preds[0].pred)
  0%|          | 0/1 [00:00<?, ?it/s]

png