Source code for cntk.contrib.deeprl.agent.shared.customized_models

# Copyright (c) Microsoft. All rights reserved.

# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
"""Customized Q function or (unnormalized) log of policy function.

If models from cntk.contrib.deeprl.agent.shared.models are not adequate, write
your own model as a function, which takes two required arguments
'shape_of_inputs', 'number_of_outputs', and two optional arguments
'loss_function', 'use_placeholder_for_input', and outputs a dictionary
containing 'inputs', 'outputs', 'f' and 'loss'. In the config file, set
QRepresentation or PolicyRepresentation to path (module_name.function_name) of
the function. QLearning/PolicyGradient will then automatically search for it.
"""

import cntk as C
import numpy as np


[docs]def conv_dqn(shape_of_inputs, number_of_outputs, loss_function=None, use_placeholder_for_input=False): """Example convolutional neural network for approximating the Q value function. This is the model used in the original DQN paper https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf. Args: shape_of_inputs: tuple of array (input) dimensions. number_of_outputs: dimension of output, equals the number of possible actions. loss_function: if not specified, use squared loss by default. use_placeholder_for_input: if true, inputs have to be replaced later with actual input_variable. Returns: a Python dictionary with string-valued keys including 'inputs', 'outputs', 'loss' and 'f'. """ # input/output inputs = C.ops.placeholder(shape=shape_of_inputs) \ if use_placeholder_for_input \ else C.ops.input_variable(shape=shape_of_inputs, dtype=np.float32) outputs = C.ops.input_variable( shape=(number_of_outputs,), dtype=np.float32) # network structure centered_inputs = inputs - 128 scaled_inputs = centered_inputs / 256 with C.layers.default_options(activation=C.ops.relu): q = C.layers.Sequential([ C.layers.Convolution((8, 8), 32, strides=4), C.layers.Convolution((4, 4), 64, strides=2), C.layers.Convolution((3, 3), 64, strides=2), C.layers.Dense((512,)), C.layers.Dense(number_of_outputs, activation=None) ])(scaled_inputs) if loss_function is None: loss = C.losses.squared_error(q, outputs) else: loss = loss_function(q, outputs) return { 'inputs': inputs, 'outputs': outputs, 'f': q, 'loss': loss }