Source code for pywsi.cli

# -*- coding: utf-8 -*-
"""Console script for pywsi."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from pywsi.io.operations import get_annotation_bounding_boxes
from pywsi.io.operations import get_annotation_polygons
from pywsi.io.operations import path_leaf
from pywsi.io.operations import read_as_rgb
from pywsi.io.operations import WSIReader
from pywsi.io.tiling import get_all_patches_from_slide

from pywsi.morphology.patch_extractor import TissuePatch
from pywsi.morphology.mask import get_common_interior_polygons
from tqdm import tqdm
import warnings
from multiprocessing import Pool
from pywsi.segmentation import label_nuclei, summarize_region_properties

from collections import defaultdict
import os
import numpy as np
from six import iteritems

import click
from shapely.geometry import Polygon as shapelyPolygon
from click_help_colors import HelpColorsGroup
import glob
from PIL import Image
click.disable_unicode_literals_warning = True
CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
import pandas as pd
warnings.filterwarnings('ignore')


@click.group(
    cls=HelpColorsGroup,
    help_headers_color='yellow',
    help_options_color='green')
def cli():
    """pywsi: tool for processing WSIs"""
    pass


@cli.command(
    'create-tissue-masks',
    context_settings=CONTEXT_SETTINGS,
    help='Extract tissue masks')
@click.option(
    '--indir', help='Root directory with all tumor WSIs', required=True)
@click.option(
    '--level',
    type=int,
    help='Level at which to extract patches',
    required=True)
@click.option(
    '--savedir',
    help='Root directory to save extract images to',
    required=True)
def extract_tissue_masks_cmd(indir, level, savedir):
    """Extract tissue only patches from tumor WSIs.
    """
    tumor_wsis = glob.glob(os.path.join(indir, '*.tif'), recursive=False)
    for tumor_wsi in tqdm(tumor_wsis):
        wsi = WSIReader(tumor_wsi, 40)
        tissue_patch = TissuePatch(wsi, level=level)
        uid = wsi.uid.replace('.tif', '')
        out_file = os.path.join(savedir, 'level_{}'.format(level),
                                uid + '_TissuePatch.npy')
        os.makedirs(os.path.dirname(out_file), exist_ok=True)
        np.save(out_file, tissue_patch.otsu_thresholded)


@cli.command(
    'create-annotation-masks',
    context_settings=CONTEXT_SETTINGS,
    help='Extract annotation masks')
@click.option(
    '--indir', help='Root directory with all tumor WSIs', required=True)
@click.option('--jsondir', help='Root directory with all jsons', required=True)
@click.option(
    '--level',
    type=int,
    help='Level at which to extract patches',
    required=True)
@click.option(
    '--savedir',
    help='Root directory to save extract images to',
    required=True)
def extract_annotation_masks_cmd(indir, jsondir, level, savedir):
    """Extract annotation patches

    We assume the masks have already been generated at level say x.
    We also assume the files are arranged in the following heirerachy:

        raw data (indir): tumor_wsis/tumor001.tif
        json data (jsondir): tumor_jsons/tumor001.json

    """
    tumor_wsis = glob.glob(os.path.join(indir, '*.tif'), recursive=False)
    for tumor_wsi in tqdm(tumor_wsis):
        wsi = WSIReader(tumor_wsi, 40)
        uid = wsi.uid.replace('.tif', '')
        json_filepath = os.path.join(jsondir, uid + '.json')
        if not os.path.exists(json_filepath):
            print('Skipping {} as annotation json not found'.format(uid))
            continue
        out_dir = os.path.join(savedir, 'level_{}'.format(level))
        wsi.annotation_masked(
            json_filepath=json_filepath, level=level, savedir=out_dir)


@cli.command(
    'extract-tumor-patches',
    context_settings=CONTEXT_SETTINGS,
    help='Extract tumor patches from tumor WSIs')
@click.option(
    '--indir', help='Root directory with all tumor WSIs', required=True)
@click.option(
    '--annmaskdir',
    help='Root directory with all annotation mask WSIs',
    required=True)
@click.option(
    '--tismaskdir',
    help='Root directory with all annotation mask WSIs',
    required=True)
@click.option(
    '--level',
    type=int,
    help='Level at which to extract patches',
    required=True)
@click.option(
    '--patchsize',
    type=int,
    default=128,
    help='Patch size which to extract patches')
@click.option(
    '--stride', type=int, default=128, help='Stride to generate next patch')
@click.option(
    '--savedir',
    help='Root directory to save extract images to',
    required=True)
@click.option(
    '--threshold',
    help='Threshold for a cell to be called tumor',
    default=0,
    type=int)
def extract_tumor_patches_cmd(indir, annmaskdir, tismaskdir, level, patchsize,
                              stride, savedir, threshold):
    """Extract tumor only patches from tumor WSIs.

    We assume the masks have already been generated at level say x.
    We also assume the files are arranged in the following heirerachy:

        raw data (indir): tumor_wsis/tumor001.tif
        masks (maskdir): tumor_masks/level_x/tumor001_AnnotationTumorMask.npy';
                         tumor_masks/level_x/tumor001_AnnotationNormalMask.npy';

    We create the output in a similar fashion:
        output (outdir): patches/tumor/level_x/tumor001_xcenter_ycenter.png


    Strategy:

        1. Load tumor annotated masks
        2. Load normal annotated masks
        3. Do subtraction tumor-normal to ensure only tumor remains.

        Truth table:

            tumor_mask  normal_mask  tumour_for_sure
                1           0            1
                1           1            0
                1           1            0
                0           1            0
    """
    tumor_wsis = glob.glob(os.path.join(indir, '*.tif'), recursive=False)

    # Assume that we want to generate these patches at level 0
    # So in order to ensure stride at a lower level
    # this needs to be discounted
    #stride = int(patchsize / (2**level))
    stride = min(int(patchsize / (2**level)), 4)
    for tumor_wsi in tqdm(tumor_wsis):
        last_used_x = None
        last_used_y = None
        wsi = WSIReader(tumor_wsi, 40)
        uid = wsi.uid.replace('.tif', '')
        filepath = os.path.join(annmaskdir, 'level_{}'.format(level),
                                uid + '_AnnotationColored.npy')
        if not os.path.exists(filepath):
            print('Skipping {} as mask not found'.format(uid))
            continue
        normal_mask = np.load(
            os.path.join(annmaskdir, 'level_{}'.format(level),
                         uid + '_AnnotationNormalMask.npy'))
        tumor_mask = np.load(
            os.path.join(annmaskdir, 'level_{}'.format(level),
                         uid + '_AnnotationTumorMask.npy'))
        tissue_mask = np.load(
            os.path.join(tismaskdir, 'level_{}'.format(level),
                         uid + '_TissuePatch.npy'))

        colored_patch = np.load(
            os.path.join(annmaskdir, 'level_{}'.format(level),
                         uid + '_AnnotationColored.npy'))
        subtracted_mask = tumor_mask * 1 - normal_mask * 1
        subtracted_mask[np.where(subtracted_mask < 0)] = 0
        subtracted_mask = np.logical_and(subtracted_mask, tissue_mask)
        x_ids, y_ids = np.where(subtracted_mask)
        for x_center, y_center in zip(x_ids, y_ids):
            out_file = '{}/level_{}/{}_{}_{}_{}.png'.format(
                savedir, level, uid, x_center, y_center, patchsize)
            x_topleft = int(x_center - patchsize / 2)
            y_topleft = int(y_center - patchsize / 2)
            x_topright = x_topleft + patchsize
            y_bottomright = y_topleft + patchsize
            #print((x_topleft, x_topright, y_topleft, y_bottomright))
            mask = subtracted_mask[x_topleft:x_topright, y_topleft:
                                   y_bottomright]
            # Feed only complete cancer cells
            # Feed if more thatn 50% cells are cancerous!
            if threshold <= 0:
                threshold = 0.5 * (patchsize * patchsize)
            if np.sum(mask) > threshold:
                if last_used_x is None:
                    last_used_x = x_center
                    last_used_y = y_center
                    diff_x = stride
                    diff_y = stride
                else:
                    diff_x = np.abs(x_center - last_used_x)
                    diff_y = np.abs(y_center - last_used_y)
                if diff_x >= stride and diff_y >= stride:
                    patch = colored_patch[x_topleft:x_topright, y_topleft:
                                          y_bottomright, :]
                    os.makedirs(os.path.dirname(out_file), exist_ok=True)
                    img = Image.fromarray(patch)
                    img.save(out_file)
                    last_used_x = x_center
                    last_used_y = y_center


@cli.command(
    'extract-normal-patches',
    context_settings=CONTEXT_SETTINGS,
    help='Extract normal patches from tumor WSIs')
@click.option(
    '--indir', help='Root directory with all tumor WSIs', required=True)
@click.option(
    '--annmaskdir',
    help='Root directory with all annotation mask WSIs',
    required=False)
@click.option(
    '--tismaskdir',
    help='Root directory with all annotation mask WSIs',
    required=True)
@click.option(
    '--level',
    type=int,
    help='Level at which to extract patches',
    required=True)
@click.option(
    '--patchsize',
    type=int,
    default=128,
    help='Patch size which to extract patches')
@click.option(
    '--stride', type=int, default=128, help='Stride to generate next patch')
@click.option(
    '--savedir',
    help='Root directory to save extract images to',
    required=True)
def extract_normal_patches_cmd(indir, annmaskdir, tismaskdir, level, patchsize,
                               stride, savedir):
    """Extract tumor only patches from tumor WSIs.

    We assume the masks have already been generated at level say x.
    We also assume the files are arranged in the following heirerachy:

        raw data (indir): tumor_wsis/tumor001.tif
        masks (maskdir): tumor_masks/level_x/tumor001_AnnotationTumorMask.npy';
                         tumor_masks/level_x/tumor001_AnnotationNormalMask.npy';

    We create the output in a similar fashion:
        output (outdir): patches/tumor/level_x/tumor001_xcenter_ycenter.png


    Strategy:

        1. Load tumor annotated masks
        2. Load normal annotated masks
        3. Do subtraction tumor-normal to ensure only tumor remains.

        Truth table:

            tumor_mask  normal_mask  tumour_for_sure
                1           0            1
                1           1            0
                1           1            0
                0           1            0
    """
    all_wsis = glob.glob(os.path.join(indir, '*.tif'), recursive=True)

    # Assume that we want to generate these patches at level 0
    # So in order to ensure stride at a lower level
    # this needs to be discounted
    stride = min(int(patchsize / (2**level)), 4)
    for wsi in tqdm(all_wsis):
        last_used_x = None
        last_used_y = None
        wsi = WSIReader(wsi, 40)
        uid = wsi.uid.replace('.tif', '')
        tissue_mask = np.load(
            os.path.join(tismaskdir, 'level_{}'.format(level),
                         uid + '_TissuePatch.npy'))
        if 'normal' in uid:
            # Just extract based on tissue patches
            x_ids, y_ids = np.where(tissue_mask)
            subtracted_mask = tissue_mask
            colored_patch = wsi.get_patch_by_level(0, 0, level)
        elif 'tumor' in uid or 'test' in uid:
            if not os.path.isfile(
                    os.path.join(annmaskdir, 'level_{}'.format(level),
                                 uid + '_AnnotationNormalMask.npy')):
                print('Skipping {}'.format(uid))
                continue
            normal_mask = np.load(
                os.path.join(annmaskdir, 'level_{}'.format(level),
                             uid + '_AnnotationNormalMask.npy'))
            tumor_mask = np.load(
                os.path.join(annmaskdir, 'level_{}'.format(level),
                             uid + '_AnnotationTumorMask.npy'))
            colored_patch = np.load(
                os.path.join(annmaskdir, 'level_{}'.format(level),
                             uid + '_AnnotationColored.npy'))

            subtracted_mask = normal_mask * 1 - tumor_mask * 1
            subtracted_mask[np.where(subtracted_mask < 0)] = 0
            subtracted_mask = np.logical_and(subtracted_mask, tissue_mask)
            x_ids, y_ids = np.where(subtracted_mask)
        for x_center, y_center in zip(x_ids, y_ids):
            out_file = '{}/level_{}/{}_{}_{}_{}.png'.format(
                savedir, level, uid, x_center, y_center, patchsize)
            x_topleft = int(x_center - patchsize / 2)
            y_topleft = int(y_center - patchsize / 2)
            x_topright = x_topleft + patchsize
            y_bottomright = y_topleft + patchsize
            mask = subtracted_mask[x_topleft:x_topright, y_topleft:
                                   y_bottomright]
            # Feed if more thatn 50% masks are positive
            if np.sum(mask) > 0.5 * (patchsize * patchsize):
                if last_used_x is None:
                    last_used_x = x_center
                    last_used_y = y_center
                    diff_x = stride
                    diff_y = stride
                else:
                    diff_x = np.abs(x_center - last_used_x)
                    diff_y = np.abs(y_center - last_used_y)
                if diff_x >= stride and diff_y >= stride:
                    patch = colored_patch[x_topleft:x_topright, y_topleft:
                                          y_bottomright, :]
                    os.makedirs(os.path.dirname(out_file), exist_ok=True)
                    img = Image.fromarray(patch)
                    img.save(out_file)
                    last_used_x = x_center
                    last_used_y = y_center


@cli.command(
    'patches-from-coords',
    context_settings=CONTEXT_SETTINGS,
    help='Extract patches from coordinates file')
@click.option('--indir', help='Root directory with all WSIs', required=True)
@click.option('--csv', help='Path to csv with coordinates', required=True)
@click.option(
    '--level',
    type=int,
    help='Level at which to extract patches',
    required=True)
@click.option(
    '--patchsize',
    type=int,
    default=128,
    help='Patch size which to extract patches')
@click.option(
    '--savedir',
    help='Root directory to save extract images to',
    required=True)
def extract_patches_from_coords_cmd(indir, csv, level, patchsize, savedir):
    """Extract patches from coordinates file at a particular level.

    Assumption: Coordinates are assumed to be provided at level 0.
    """
    patches_to_extract = defaultdict(list)
    with open(csv) as fh:
        for line in fh:
            try:
                filename, x0, y0 = line.split(',')
            except:
                splitted = line.split('_')
                # test files have name like test_001
                if len(splitted) == 5:
                    fileprefix, fileid, x0, y0, _ = splitted
                    filename = '{}_{}'.format(fileprefix, fileid)
                elif len(splitted) == 4:
                    filename, x0, y0, _ = splitted
                else:
                    raise RuntimeError(
                        'Unable to find parsable format. Mustbe filename,x0,y-'
                    )
                # other files have name like normal001

            filename = filename.lower()
            x0 = int(x0)
            y0 = int(y0)
            patches_to_extract[filename].append((x0, y0))

    for filename, coordinates in tqdm(patches_to_extract.items()):
        if 'normal' in filename:
            filepath = os.path.join(indir, 'normal', filename + '.tif')
        elif 'tumor' in filename:
            filepath = os.path.join(indir, 'tumor', filename + '.tif')
        elif 'test' in filename:
            filepath = os.path.join(indir, filename + '.tif')
        else:
            raise RuntimeError('Malformed filename?: {}'.format(filename))
        wsi = WSIReader(filepath, 40)
        uid = wsi.uid.replace('.tif', '')
        for x0, y0 in coordinates:
            patch = wsi.get_patch_by_level(x0, y0, level, patchsize)
            out_file = '{}/level_{}/{}_{}_{}_{}.png'.format(
                savedir, level, uid, x0, y0, patchsize)
            os.makedirs(os.path.dirname(out_file), exist_ok=True)
            img = Image.fromarray(patch)
            img.save(out_file)


@cli.command(
    'extract-test-patches',
    context_settings=CONTEXT_SETTINGS,
    help='Extract patches from  testing dataset')
@click.option('--indir', help='Root directory with all WSIs', required=True)
@click.option(
    '--tismaskdir',
    help='Root directory with all annotation mask WSIs',
    required=True)
@click.option(
    '--level',
    type=int,
    help='Level at which to extract patches',
    required=True)
@click.option(
    '--patchsize',
    type=int,
    default=128,
    help='Patch size which to extract patches')
@click.option(
    '--stride',
    default=64,
    help='Slide windows by this much to get the next [atj]',
    required=True)
@click.option(
    '--savedir',
    help='Root directory to save extract images to',
    required=True)
def extract_test_patches_cmd(indir, tismaskdir, level, patchsize, stride,
                             savedir):
    wsis = glob.glob(os.path.join(indir, '*.tif'), recursive=False)
    for wsi in tqdm(wsis):
        last_used_y = None
        last_used_x = None
        wsi = WSIReader(wsi, 40)
        uid = wsi.uid.replace('.tif', '')
        tissue_mask = np.load(
            os.path.join(tismaskdir, 'level_{}'.format(level),
                         uid + '_TissuePatch.npy'))

        x_ids, y_ids = np.where(tissue_mask)
        for x_center, y_center in zip(x_ids, y_ids):
            out_file = '{}/level_{}/{}_{}_{}_{}.png'.format(
                savedir, level, uid, x_center, y_center, patchsize)
            x_topleft = int(x_center - patchsize / 2)
            y_topleft = int(y_center - patchsize / 2)
            x_topright = x_topleft + patchsize
            y_bottomright = y_topleft + patchsize
            mask = tissue_mask[x_topleft:x_topright, y_topleft:y_bottomright]
            if np.sum(mask) > 0.5 * (patchsize * patchsize):
                if last_used_x is None:
                    last_used_x = x_center
                    last_used_y = y_center
                    diff_x = stride
                    diff_y = stride
                else:
                    diff_x = np.abs(x_center - last_used_x)
                    diff_y = np.abs(y_center - last_used_y)
                if diff_x >= stride or diff_y >= stride:
                    colored_patch = wsi.get_patch_by_level(0, 0, level)
                    patch = colored_patch[x_topleft:x_topright, y_topleft:
                                          y_bottomright, :]
                    os.makedirs(os.path.dirname(out_file), exist_ok=True)
                    img = Image.fromarray(patch)
                    img.save(out_file)
                    last_used_x = x_center
                    last_used_y = y_center


@cli.command(
    'estimate-patches',
    context_settings=CONTEXT_SETTINGS,
    help='Estimate number of extractable tumor patches from tumor WSIs')
@click.option(
    '--indir', help='Root directory with all tumor WSIs', required=True)
@click.option('--jsondir', help='Root directory with all jsons', required=True)
@click.option(
    '--level',
    type=int,
    help='Level at which to extract patches',
    required=True)
@click.option(
    '--patchsize',
    type=int,
    default=128,
    help='Patch size which to extract patches')
@click.option(
    '--stride', type=int, default=128, help='Stride to generate next patch')
@click.option(
    '--savedir',
    help='Root directory to save extract images to',
    required=True)
def estimate_patches_cmd(indir, jsondir, level, patchsize, stride, savedir):
    all_wsis = glob.glob(os.path.join(indir, '*.tif'), recursive=False)
    out_dir = os.path.join(savedir, 'level_{}'.format(level))
    os.makedirs(out_dir, exist_ok=True)
    for wsi in tqdm(all_wsis):
        wsi = WSIReader(wsi, 40)
        uid = wsi.uid.replace('.tif', '')
        json_filepath = os.path.join(jsondir, uid + '.json')
        if not os.path.exists(json_filepath):
            print('Skipping {} as annotation json not found'.format(uid))
            continue
        bounding_boxes = get_annotation_bounding_boxes(json_filepath)
        polygons = get_annotation_polygons(json_filepath)
        tumor_bb = bounding_boxes['tumor']
        normal_bb = bounding_boxes['normal']

        normal_polygons = polygons['normal']
        tumor_polygons = polygons['tumor']
        polygons_dict = {'normal': normal_polygons, 'tumor': tumor_polygons}
        rectangles_dict = {'normal': normal_bb, 'tumor': tumor_bb}
        for polygon_key, polygons in iteritems(polygons_dict):
            bb = rectangles_dict[polygon_key]
            to_write = ''
            with open(os.path.join(savedir, '{}.txt', 'w')) as fh:
                for rectangle, polygon in zip(bb, polygons):
                    """
                    Sample points from rectangle. We will assume we are sampling the
                    centers of our patch. So if we sample x_center, y_center
                    from this rectangle, we need to ensure (x_center +/- patchsize/2, y_center +- patchsize/2)
                    lie inside the polygon
                    """
                    xmin, ymax = rectangle['top_left']
                    xmax, ymin = rectangle['bottom_right']
                    path = polygon.get_path()
                    for x_center in np.arange(xmin, xmax, patchsize):
                        for y_center in np.arange(ymin, ymax, patchsize):
                            x_topleft = int(x_center - patchsize / 2)
                            y_topleft = int(y_center - patchsize / 2)
                            x_bottomright = x_topleft + patchsize
                            y_bottomright = y_topleft + patchsize

                            if path.contains_points([(x_topleft, y_topleft),
                                                     (x_bottomright,
                                                      y_bottomright)]).all():
                                to_write = '{}_{}_{}_{}\n'.format(
                                    uid, x_center, y_center, patchsize)
                                fh.write(to_write)


[docs]def process_wsi(data): wsi, jsondir, patchsize, stride, level, dirs, write_image = data wsi = WSIReader(wsi, 40) uid = wsi.uid.replace('.tif', '') scale_factor = wsi.get_level_scale_factor(level) json_filepath = os.path.join(jsondir, uid + '.json') if not os.path.isfile(json_filepath): return boxes = get_annotation_bounding_boxes(json_filepath) polygons = get_annotation_polygons(json_filepath) polygons_to_exclude = {'tumor': [], 'normal': []} for polygon in polygons['tumor']: # Does this have any of the normal polygons inside it? polygons_to_exclude['tumor'].append( get_common_interior_polygons(polygon, polygons['normal'])) for polygon in polygons['normal']: # Does this have any of the tumor polygons inside it? polygons_to_exclude['normal'].append( get_common_interior_polygons(polygon, polygons['tumor'])) for polygon_key in polygons.keys(): last_used_x = None last_used_y = None annotated_polygons = polygons[polygon_key] annotated_boxes = boxes[polygon_key] # iterate through coordinates in the bounding rectangle # tand check if they overlap with any other annoations and # if not fetch a patch at that coordinate from the wsi annotation_index = 0 for annotated_polygon, annotated_box in zip(annotated_polygons, annotated_boxes): annotation_index += 1 minx, miny = annotated_box['top_left'] maxx, miny = annotated_box['top_right'] maxx, maxy = annotated_box['bottom_right'] minx, maxy = annotated_box['bottom_left'] width = int(maxx) - int(minx) height = int(maxy) - int(miny) #(minx, miny), width, height = annotated_box['top_left'], annotated_box['top'].get_xy() # Should scale? # No. Do not scale here as the patch is always # fetched from things at level0 minx = int(minx) # * scale_factor) miny = int(miny) # * scale_factor) maxx = int(maxx) # * scale_factor) maxy = int(maxy) # * scale_factor) width = int(width * scale_factor) height = int(height * scale_factor) annotated_polygon = np.array(annotated_polygon.get_xy()) annotated_polygon = annotated_polygon * scale_factor # buffer ensures the resulting polygon is clean # http://toblerity.org/shapely/manual.html#object.buffer try: annotated_polygon_scaled = shapelyPolygon( np.round(annotated_polygon).astype(int)).buffer(0) except: warnings.warn( 'Skipping creating annotation index {} for {}'.format( annotation_index, uid)) continue assert annotated_polygon_scaled.is_valid, 'Found invalid annotated polygon: {} {}'.format( uid, shapelyPolygon(annotated_polygon).is_valid) for x_left in np.arange(minx, maxx, 1): for y_top in np.arange(miny, maxy, 1): x_right = x_left + patchsize y_bottom = y_top + patchsize if last_used_x is None: last_used_x = x_left last_used_y = y_top diff_x = stride diff_y = stride else: diff_x = np.abs(x_left - last_used_x) diff_y = np.abs(y_top - last_used_y) #print(last_used_x, last_used_y, x_left, y_top, diff_x, diff_y) if diff_x <= stride or diff_y <= stride: continue else: last_used_x = x_left last_used_y = y_top patch_polygon = shapelyPolygon( [(x_left, y_top), (x_right, y_top), (x_right, y_bottom), (x_left, y_bottom)]).buffer(0) assert patch_polygon.is_valid, 'Found invalid polygon: {}_{}_{}'.format( uid, x_left, y_top) try: is_inside = annotated_polygon_scaled.contains( patch_polygon) except: # Might raise an exception when the two polygons # are the same warnings.warn( 'Skipping: {}_{}_{}_{}.png | Equals: {} | Almost equals: {}'. format(uid, x_left, y_top, patchsize), annotated_polygon_scaled.equals(patch_polygon), annotated_polygon_scaled.almost_equals( patch_polygon)) continue if write_image: out_file = os.path.join( dirs[polygon_key], '{}_{}_{}_{}.png'.format( uid, x_left, y_top, patchsize)) patch = wsi.get_patch_by_level(x_left, y_top, level, patchsize) os.makedirs(os.path.dirname(out_file), exist_ok=True) img = Image.fromarray(patch) img.save(out_file) else: # Just write the coordinates to_write = '{}_{}_{}_{}\n'.format( uid, x_left, y_top, patchsize) out_file = os.path.join(dirs[polygon_key], '{}.txt'.format(polygon_key)) with open(out_file, 'a') as fh: fh.write(to_write)
@cli.command( 'extract-test-both-patches', context_settings=CONTEXT_SETTINGS, help='Extract both normal and tumor patches from tissue masks') @click.option( '--indir', help='Root directory with all test WSIs', required=True) @click.option( '--patchsize', type=int, default=128, help='Patch size which to extract patches') @click.option( '--stride', type=int, default=128, help='Stride to generate next patch') @click.option('--jsondir', help='Root directory with all jsons', required=True) @click.option( '--level', type=int, help='Level at which to extract patches', required=True) @click.option( '--savedir', help='Root directory to save extract images to', required=True) @click.option('--write_image', help='Should output images', is_flag=True) def extract_test_both_cmd(indir, patchsize, stride, jsondir, level, savedir, write_image): """Extract tissue only patches from tumor WSIs. """ wsis = glob.glob(os.path.join(indir, '*.tif'), recursive=False) out_dir = os.path.join(savedir, 'level_{}'.format(level)) normal_dir = os.path.join(out_dir, 'normal') tumor_dir = os.path.join(out_dir, 'tumor') os.makedirs(out_dir, exist_ok=True) os.makedirs(normal_dir, exist_ok=True) os.makedirs(tumor_dir, exist_ok=True) dirs = {'normal': normal_dir, 'tumor': tumor_dir} total_wsi = len(wsis) data = [(wsi, jsondir, patchsize, stride, level, dirs, write_image) for wsi in wsis] with tqdm(total=total_wsi) as pbar: with Pool(processes=16) as p: for i, _ in enumerate(p.imap_unordered(process_wsi, data)): #print(i / total_wsi * 100) pbar.update() # for i, wsi in tqdm(enumerate(list(wsis))): # process_wsi(wsi) # pbar.update()
[docs]def process_segmentation(data): """ Parameters ---------- data: tuple (png_location, tsv_outpath) """ png, saveto = data patch = read_as_rgb(png) region_properties, _ = label_nuclei(patch, draw=False) summary = summarize_region_properties(region_properties, patch) df = pd.DataFrame([summary]) df.to_csv(saveto, index=False, header=True, sep='\t')
@cli.command( 'segment', context_settings=CONTEXT_SETTINGS, help='Performs segmentation and extract-features') @click.option('--indir', help='Root directory with all pngs', required=True) @click.option('--outdir', help='Output directory to out tsv', required=True) def segementation_cmd(indir, outdir): """Perform segmentation and store the tsvs """ print(indir) list_of_pngs = list(glob.glob(indir + '/*.png')) print(os.path.join(indir, '/{}*.png')) data = [] for f in list_of_pngs: tsv = f.replace(os.path.dirname(f), outdir).replace('.png', '.tsv') if not os.path.isfile(tsv): data.append((f, tsv)) elif os.stat(tsv).st_size == 0: data.appen((f, tsv)) os.makedirs(outdir, exist_ok=True) with tqdm(total=len(data)) as pbar: with Pool(processes=16) as p: for i, _ in enumerate( p.imap_unordered(process_segmentation, data)): pbar.update() def _process_patches_df(data): slide_path, json_filepath, patch_size, saveto = data df = get_all_patches_from_slide( slide_path, json_filepath=json_filepath, filter_non_tissue=True, patch_size=patch_size, saveto=saveto) return df @cli.command( 'patches-df', context_settings=CONTEXT_SETTINGS, help='Extract all patches summarized as dataframes') @click.option( '--indir', help='Root directory with all tumor WSIs', required=True) @click.option('--jsondir', help='Root directory with all jsons') @click.option( '--patchsize', type=int, default=256, help='Patch size which to extract patches') @click.option( '--savedir', help='Root directory to save extract images to', required=True) def extract_mask_df_cmd(indir, jsondir, patchsize, savedir): """Extract tissue only patches from tumor WSIs. """ wsis = glob.glob(os.path.join(indir, '*.tif'), recursive=False) data = [] df = pd.DataFrame() for wsi in wsis: basename = path_leaf(wsi).replace('.tif', '') if jsondir: json_filepath = os.path.join(jsondir, basename + '.json') else: json_filepath = None if not os.path.isfile(json_filepath): json_filepath = None saveto = os.path.join(savedir, basename + '.tsv') data.append((wsi, json_filepath, patchsize, saveto)) os.makedirs(savedir, exist_ok=True) with tqdm(total=len(wsis)) as pbar: with Pool(processes=16) as p: for i, temp_df in enumerate( p.imap_unordered(_process_patches_df, data)): df = pd.concat([df, temp_df]) pbar.update() if 'is_tumor' in df.columns: df = df.sort_values(by=['uid', 'is_tumor']) else: df = df.sort_values(by=['uid']) df.to_csv( os.path.join(savedir, 'master_df.tsv'), sep='\t', index=False, header=True)