# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
from .. import cntk_py
from cntk.internal import sanitize_2d_number, sanitize_range
[docs]def crop(crop_type='center', crop_size=0, side_ratio=0.0, area_ratio=0.0, aspect_ratio=1.0, jitter_type='none'):
'''
Crop transform that can be used to pass to `map_features`
Args:
crop_type (str, default 'center'): 'center', 'randomside', 'randomarea',
or 'multiview10'. 'randomside' and 'randomarea' are usually used during
training. If either 'randomside' and 'randomarea' are set, Horizontal
flipping will be enabled and the image will be randomly flipped in
horizontal direction. Horizontal flipping is another popular data augmentation
technique and should be used if images exhibit vertical symmetry,
for example, like many real-world objects.
'center' and 'multiview10' are usually used during testing.
Random cropping is a popular data augmentation technique used to improve
generalization of the DNN.
crop_size (`int`, default 0): crop size in pixels. Ignored if set to 0.
When crop_size is non-zero, for example, crop_size=256, it means a cropping
window of size 256x256 pixels will be taken. If one want to crop with
non-square shapes, specify crop_size=(256,224) will crop 256x224 (width x height)
pixels. `When crop_size is specified, side_ratio, area_ratio and aspect_ratio
will be ignored.`
side_ratio (`float`, default 0.0): It specifies the ratio of final image
side (width or height) with respect to the original image. Ignored if set
to 0.0. Otherwise, must be set within `(0,1]`. For example, with an input
image size of 640x480, side_ratio of 0.5 means we crop a square region
(if aspect_ratio is 1.0) of the input image, whose width and height are
equal to 0.5*min(640, 480) = 240. To enable scale jitter (a popular data
augmentation technique), use tuple like side_ratio=(0.5,0.75),
which means the crop will have size between 240 (0.5*min(640, 480)) and 360
(0.75*min(640, 480)).
area_ratio (`float`, default 0.0): It specifies the area ratio of final image
with respect to the original image. Ignored if set to 0.0. Otherwise, must be
set within `(0,1]`. For example, for an input image size of 200x150 pixels,
the area is 30,000. If area_ratio is 0.3333, we crop a square region (if
aspect_ratio is 1.0) with width and height equal to sqrt(30,000*0.3333)=100.
To enable scale jitter, use tuple such as area_ratio=(0.3333,0.8),
which means the crop will have size between 100 (sqrt(30,000*0.3333)) and
155 (sqrt(30,000*0.8)).
aspect_ratio (`float`, default 1.0): It specifies the aspect ratio (width/height
or height/width) of the crop window. It is recommended to set it within `(0,1]`,
although a value greater than 1 is also allowed. In practice, values of 1.333(4/3)
or 0.75(3/4) should cause the same aspect deformation effect. For example, if
due to size_ratio the crop size is 240x240, an aspect_ratio of 0.64 will change
the window size to non-square: 192x300 or 300x192, each having 50% chance. Note
the area of the crop window does not change. To enable aspect ratio jitter, use
tuple such as aspect_ratio=(0.64,1.0), which means the crop will have size
between 192x300 (or equally likely 300x192) and 240x240. One can also use
aspect_ratio=(0.64,1.5625), which will create rectangles in the same aspect ratio
range, although there is a subtle difference due to uniratio sampling between
the boundary of the specified ratio range.
jitter_type (str, default 'none'): crop scale jitter type, possible
values are 'none' and 'uniratio'. 'uniratio' means uniform distributed jitter
scale between the minimum and maximum ratio values.
Returns:
A dictionary-like object describing the crop transform
'''
crop_size = sanitize_2d_number(crop_size)
side_ratio = sanitize_range(side_ratio)
area_ratio = sanitize_range(area_ratio)
aspect_ratio = sanitize_range(aspect_ratio)
return cntk_py.reader_crop(crop_type, crop_size, side_ratio,
area_ratio, aspect_ratio, jitter_type)
[docs]def scale(width, height, channels, interpolations='linear', scale_mode="fill", pad_value=-1):
'''
Scale transform that can be used to pass to `map_features` for data augmentation.
Args:
width (int): width of the image in pixels
height (int): height of the image in pixels
channels (int): channels of the image
interpolations (str, default 'linear'): possible values are
'nearest', 'linear', 'cubic', and 'lanczos'
scale_mode (str, default 'fill'): 'fill', 'crop' or 'pad'.
'fill' - warp the image to the given target size.
'crop' - resize the image's shorter side to the given target size and crop the overlap.
'pad' - resize the image's larger side to the given target size, center it and pad the rest
pad_value (int, default -1): -1 or int value. The pad value used for the 'pad' mode.
If set to -1 then the border will be replicated.
Returns:
A dictionary-like object describing the scale transform
'''
return cntk_py.reader_scale(width, height, channels,
interpolations, scale_mode, pad_value)
[docs]def mean(filename):
'''
Mean transform that can be used to pass to `map_features` for data augmentation.
Args:
filename (str): file that stores the mean values for each pixel
in OpenCV matrix XML format
Returns:
dict:
A dictionary-like object describing the mean transform
'''
return cntk_py.reader_mean(filename)
[docs]def color(brightness_radius=0.0, contrast_radius=0.0, saturation_radius=0.0):
'''
Color transform that can be used to pass to `map_features` for data augmentation.
Args:
brightness_radius (float, default 0.0): Radius for brightness change. Must be
set within [0.0, 1.0]. For example, assume brightness_radius = 0.2, a random
number `x` is uniformly drawn from [-0.2, 0.2], and every pixel's value is
added by `x*meanVal`, where meanVal is the mean of the image pixel intensity
combining all color channels.
contrast_radius (float, default 0.0): Radius for contrast change. Must be
set within [0.0, 1.0]. For example, assume contrast_radius = 0.2, a random
number `x` is uniformly drawn from [-0.2, 0.2], and every pixel's value is
multiplied by `1+x`.
saturation_radius (float, default 0.0): Radius for saturation change. Only for
color images and must be set within [0.0, 1.0]. For example, assume
saturation_radius = 0.2, a random number `x` is uniformly drawn from [-0.2, 0.2],
and every pixel's saturation is multiplied by `1+x`.
Returns:
A dictionary-like object describing the mean transform
'''
return cntk_py.reader_color(brightness_radius, contrast_radius, saturation_radius)
#@staticmethod
#def intensity(intensity_stddev, intensity_file):
# '''
# Intensity transform that can be used to pass to `map_features` for data augmentation.
# Intensity jittering based on PCA transform as described in original `AlexNet paper
# <http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf>`_
# Currently uses precomputed values from
# https://github.com/facebook/fb.resnet.torch/blob/master/datasets/imagenet.lua
# Args:
# intensity_stddev (float): intensity standard deviation.
# intensity_file (str): intensity file.
# Returns:
# dict describing the mean transform '''
# return dict(type='Intensity', intensityStdDev=intensity_stddev, intensityFile=intensity_file)