Source code for cntk.io.transforms

# Copyright (c) Microsoft. All rights reserved.

# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================

from .. import cntk_py
from cntk.internal import sanitize_2d_number, sanitize_range

[docs]def crop(crop_type='center', crop_size=0, side_ratio=0.0, area_ratio=0.0, aspect_ratio=1.0, jitter_type='none'): ''' Crop transform that can be used to pass to `map_features` Args: crop_type (str, default 'center'): 'center', 'randomside', 'randomarea', or 'multiview10'. 'randomside' and 'randomarea' are usually used during training. If either 'randomside' and 'randomarea' are set, Horizontal flipping will be enabled and the image will be randomly flipped in horizontal direction. Horizontal flipping is another popular data augmentation technique and should be used if images exhibit vertical symmetry, for example, like many real-world objects. 'center' and 'multiview10' are usually used during testing. Random cropping is a popular data augmentation technique used to improve generalization of the DNN. crop_size (`int`, default 0): crop size in pixels. Ignored if set to 0. When crop_size is non-zero, for example, crop_size=256, it means a cropping window of size 256x256 pixels will be taken. If one want to crop with non-square shapes, specify crop_size=(256,224) will crop 256x224 (width x height) pixels. `When crop_size is specified, side_ratio, area_ratio and aspect_ratio will be ignored.` side_ratio (`float`, default 0.0): It specifies the ratio of final image side (width or height) with respect to the original image. Ignored if set to 0.0. Otherwise, must be set within `(0,1]`. For example, with an input image size of 640x480, side_ratio of 0.5 means we crop a square region (if aspect_ratio is 1.0) of the input image, whose width and height are equal to 0.5*min(640, 480) = 240. To enable scale jitter (a popular data augmentation technique), use tuple like side_ratio=(0.5,0.75), which means the crop will have size between 240 (0.5*min(640, 480)) and 360 (0.75*min(640, 480)). area_ratio (`float`, default 0.0): It specifies the area ratio of final image with respect to the original image. Ignored if set to 0.0. Otherwise, must be set within `(0,1]`. For example, for an input image size of 200x150 pixels, the area is 30,000. If area_ratio is 0.3333, we crop a square region (if aspect_ratio is 1.0) with width and height equal to sqrt(30,000*0.3333)=100. To enable scale jitter, use tuple such as area_ratio=(0.3333,0.8), which means the crop will have size between 100 (sqrt(30,000*0.3333)) and 155 (sqrt(30,000*0.8)). aspect_ratio (`float`, default 1.0): It specifies the aspect ratio (width/height or height/width) of the crop window. It is recommended to set it within `(0,1]`, although a value greater than 1 is also allowed. In practice, values of 1.333(4/3) or 0.75(3/4) should cause the same aspect deformation effect. For example, if due to size_ratio the crop size is 240x240, an aspect_ratio of 0.64 will change the window size to non-square: 192x300 or 300x192, each having 50% chance. Note the area of the crop window does not change. To enable aspect ratio jitter, use tuple such as aspect_ratio=(0.64,1.0), which means the crop will have size between 192x300 (or equally likely 300x192) and 240x240. One can also use aspect_ratio=(0.64,1.5625), which will create rectangles in the same aspect ratio range, although there is a subtle difference due to uniratio sampling between the boundary of the specified ratio range. jitter_type (str, default 'none'): crop scale jitter type, possible values are 'none' and 'uniratio'. 'uniratio' means uniform distributed jitter scale between the minimum and maximum ratio values. Returns: A dictionary-like object describing the crop transform ''' crop_size = sanitize_2d_number(crop_size) side_ratio = sanitize_range(side_ratio) area_ratio = sanitize_range(area_ratio) aspect_ratio = sanitize_range(aspect_ratio) return cntk_py.reader_crop(crop_type, crop_size, side_ratio, area_ratio, aspect_ratio, jitter_type)
[docs]def scale(width, height, channels, interpolations='linear', scale_mode="fill", pad_value=-1): ''' Scale transform that can be used to pass to `map_features` for data augmentation. Args: width (int): width of the image in pixels height (int): height of the image in pixels channels (int): channels of the image interpolations (str, default 'linear'): possible values are 'nearest', 'linear', 'cubic', and 'lanczos' scale_mode (str, default 'fill'): 'fill', 'crop' or 'pad'. 'fill' - warp the image to the given target size. 'crop' - resize the image's shorter side to the given target size and crop the overlap. 'pad' - resize the image's larger side to the given target size, center it and pad the rest pad_value (int, default -1): -1 or int value. The pad value used for the 'pad' mode. If set to -1 then the border will be replicated. Returns: A dictionary-like object describing the scale transform ''' return cntk_py.reader_scale(width, height, channels, interpolations, scale_mode, pad_value)
[docs]def mean(filename): ''' Mean transform that can be used to pass to `map_features` for data augmentation. Args: filename (str): file that stores the mean values for each pixel in OpenCV matrix XML format Returns: dict: A dictionary-like object describing the mean transform ''' return cntk_py.reader_mean(filename)
[docs]def color(brightness_radius=0.0, contrast_radius=0.0, saturation_radius=0.0): ''' Color transform that can be used to pass to `map_features` for data augmentation. Args: brightness_radius (float, default 0.0): Radius for brightness change. Must be set within [0.0, 1.0]. For example, assume brightness_radius = 0.2, a random number `x` is uniformly drawn from [-0.2, 0.2], and every pixel's value is added by `x*meanVal`, where meanVal is the mean of the image pixel intensity combining all color channels. contrast_radius (float, default 0.0): Radius for contrast change. Must be set within [0.0, 1.0]. For example, assume contrast_radius = 0.2, a random number `x` is uniformly drawn from [-0.2, 0.2], and every pixel's value is multiplied by `1+x`. saturation_radius (float, default 0.0): Radius for saturation change. Only for color images and must be set within [0.0, 1.0]. For example, assume saturation_radius = 0.2, a random number `x` is uniformly drawn from [-0.2, 0.2], and every pixel's saturation is multiplied by `1+x`. Returns: A dictionary-like object describing the mean transform ''' return cntk_py.reader_color(brightness_radius, contrast_radius, saturation_radius)
#@staticmethod #def intensity(intensity_stddev, intensity_file): # ''' # Intensity transform that can be used to pass to `map_features` for data augmentation. # Intensity jittering based on PCA transform as described in original `AlexNet paper # <http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf>`_ # Currently uses precomputed values from # https://github.com/facebook/fb.resnet.torch/blob/master/datasets/imagenet.lua # Args: # intensity_stddev (float): intensity standard deviation. # intensity_file (str): intensity file. # Returns: # dict describing the mean transform ''' # return dict(type='Intensity', intensityStdDev=intensity_stddev, intensityFile=intensity_file)