Source code for datadings.sets.Vaihingen_write

"""Create Vaihingen data set files.

This tool will look for the ISPRS_semantic_labeling_Vaihingen directory in the
input directory.

See also:
    http://www2.isprs.org/commissions/comm3/wg4/2d-sem-label-vaihingen.html

Note:
    Requests to download the dataset can be made by filling out the form on
    the website.

Warning:
    Images, labels, and masks are numpy arrays, not images!

"""
import numpy as np
import os.path as pt

from ..writer import FileWriter
from . import MaskedImageSegmentationData
from .Vaihingen import CROP_SIZE
from .Vaihingen import COLOR_TO_CLASS_MAP
from ..tools import split_array
from ..tools import tiff_to_nd_array
from ..tools import document_keys


__doc__ += document_keys(MaskedImageSegmentationData)


[docs]def write(writer, img, labels, mask, filename):
    writer.write(MaskedImageSegmentationData(
        filename,
        img,
        labels,
        mask,
    ))


[docs]def map_color_values_to_class_indices(img):
    arr_2d = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
    for c, i in COLOR_TO_CLASS_MAP.items():
        m = np.all(img == np.array(c).reshape((1, 1, 3)), axis=2)
        arr_2d[m] = i
    return arr_2d


[docs]def images_label_dsm_iter(indir, image_ids):
    dataset_dir = pt.join(indir, 'ISPRS_semantic_labeling_Vaihingen')
    img_dir = pt.join(dataset_dir, 'top')
    dsm_dir = pt.join(dataset_dir, 'dsm')
    label_dir = pt.join(dataset_dir, 'gts_for_participants')

    for id in image_ids:
        fn = "top_mosaic_09cm_area%s.tif" % (id,)
        img_path = pt.join(img_dir, fn)
        label_path = pt.join(label_dir, fn)
        dsm_path = pt.join(dsm_dir, fn.replace("top_", "dsm_"))

        img = tiff_to_nd_array(img_path)
        train_img = img.astype(np.uint8)

        labels = tiff_to_nd_array(label_path, type=np.uint8)
        labels = labels.transpose(1, 2, 0)
        train_labels = map_color_values_to_class_indices(labels) \
            .astype(np.int64)

        dsm = tiff_to_nd_array(dsm_path, type=np.int32)
        train_dsm = dsm[0]
        yield fn, train_img, train_labels, train_dsm


[docs]def write_sets(indir, outdir, args, crop_size=(CROP_SIZE, CROP_SIZE)):
    # Training-Split -> give whole image
    train_file = pt.join(outdir, 'Vaihingen_train.msgpack')
    image_ids = [1, 3, 5, 7, 13, 17, 21, 23, 26, 32, 37]
    try:
        with FileWriter(train_file, overwrite=args.no_confirm) as writer:
            for fn, train_img, train_labels, train_dsm in \
                    images_label_dsm_iter(indir, image_ids):
                write(writer, train_img, train_labels, train_dsm, fn)
    except FileExistsError:
        pass

    # Validation-Split -> give splitted images
    val_file = pt.join(outdir, 'Vaihingen_val.msgpack')
    try:
        with FileWriter(val_file) as writer:
            for fn, train_img, train_labels, train_dsm in \
                    images_label_dsm_iter(indir, [11, 15, 28, 30, 34]):
                train_labels = np.expand_dims(train_labels, axis=0)
                train_dsm = np.expand_dims(train_dsm, axis=0)
                gen = enumerate(zip(
                    split_array(train_img, *crop_size),
                    split_array(train_labels, *crop_size),
                    split_array(train_dsm, *crop_size),
                ))
                for idx, (sub_img, sub_label, sub_dsm) in gen:
                    sub_img = np.array(sub_img).astype(np.uint8)
                    sub_label = np.array(sub_label[0]).astype(np.int64)
                    sub_dsm = np.array(sub_dsm[0])
                    write(writer, sub_img, sub_label, sub_dsm, "%s_%s" % (fn, idx))
    except FileExistsError:
        pass


[docs]def main():
    from ..tools.argparse import make_parser

    parser = make_parser(__doc__, skip_verification=False, shuffle=False)
    args = parser.parse_args()
    outdir = args.outdir or args.indir
    write_sets(args.indir, outdir, args)


if __name__ == '__main__':
    try:
        main()
    except KeyboardInterrupt:
        pass
    finally:
        print()