Source code for datadings.sets.MIT1003_write

"""Create MIT1003 data set files.

This tool will look for the following files in the input directory
and download them if necessary:

- ALLSTIMULI.zip
- DATA.zip

See also:
    http://people.csail.mit.edu/tjudd/WherePeopleLook/index.html
"""
import io
import os
import os.path as pt
from zipfile import ZipFile
import random
from collections import defaultdict

import numpy as np

from ..writer import FileWriter
from ..tools.matlab import loadmat
from . import SaliencyData
from . import SaliencyExperiment
from ..tools import document_keys


__doc__ += document_keys(
    SaliencyData,
    postfix=document_keys(
        SaliencyExperiment,
        block='',
        prefix='Each experiment has the following keys:'
    )
)


BASE_URL = 'http://people.csail.mit.edu/tjudd/WherePeopleLook/'
FILES = {
    'stimuli': {
        'path': 'ALLSTIMULI.zip',
        'url': BASE_URL+'ALLSTIMULI.zip',
        'md5': '0d7df8b954ecba69b6796e77b9afe4b6',
    },
    'data': {
        'path': 'DATA.zip',
        'url': BASE_URL+'DATA.zip',
        'md5': 'ea19d74ad0a0144428c53e9d75c2d71c',
    }
}


def __iter_fixpoints(datazip, mat_files, stimuluspath):
    stimulus = stimuluspath.split(os.sep)[1]
    for exp in mat_files[stimulus]:
        mat_data = datazip.read(exp)
        buf = io.BytesIO(mat_data)
        mat = [
            v for k, v in loadmat(buf).items()
            if not k.startswith('__')
        ][0]
        try:
            yield mat[0][0][4][0][0][2].astype(np.float32)
        except IndexError:
            yield mat[0][0][0][0][0][2].astype(np.float32)


[docs]def write_image(imagezip, datazip, mat_files, stimuluspath, writer): stimulusdata = imagezip.read(stimuluspath) experiments = [ SaliencyExperiment(exp, None) for exp in __iter_fixpoints(datazip, mat_files, stimuluspath) ] filename = os.sep.join(stimuluspath.split(os.sep)[-2:]) item = SaliencyData( filename, stimulusdata, experiments, ) writer.write(item)
def __find_all_experiments(datazip): matfiles = [f for f in datazip.namelist() if f.endswith('.mat')] mapping = defaultdict(lambda: []) for mat in matfiles: parts = mat.split(os.sep) if len(parts) == 3: mapping[parts[2].split('.')[0] + '.jpeg'].append(mat) return mapping
[docs]def write_sets(files, outdir, args): with ZipFile(files['stimuli']['path']) as imagezip, \ ZipFile(files['data']['path']) as datazip: experiments = __find_all_experiments(datazip) names = [f for f in imagezip.namelist() if f.endswith('.jpeg')] with FileWriter(pt.join(outdir, 'MIT1003.msgpack'), total=len(names), overwrite=args.no_confirm) as writer: if args.shuffle: random.shuffle(names) for path in names: write_image(imagezip, datazip, experiments, path, writer)
[docs]def main(): from ..tools.argparse import make_parser from ..tools import prepare_indir parser = make_parser(__doc__) args = parser.parse_args() outdir = args.outdir or args.indir files = prepare_indir(FILES, args) try: write_sets(files, outdir, args) except FileExistsError: pass
if __name__ == '__main__': try: main() except KeyboardInterrupt: pass finally: print()