Source code for cntk.debugging.debug

# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================

import sys
from collections import defaultdict

from cntk import cntk_py, user_function, output_variable, CloneMethod

from cntk.ops.functions import UserFunction
from cntk.internal import map_if_possible

DEBUG_USAGE = '''\
    Commands:
        n - execute the next node
        n <number> - execute the next <number> nodes

        u f - execute until forward pass (like 'n' when already in forward pass)
        u b - execute until backward pass (like 'n' when already in backward pass)
        u name - execute until a node with that name is hit
        u <lambda> - execute until the lambda expression is True. Examples:
                     Until a Times node is hit:
                         lambda arg, node: node.op_name == 'Times'
                     Until a node is hit that has 3 dimensions:
                         lambda arg, node: len(node.shape) == 3
                     Until the variance of the input exceeds 1 (np = numpy):
                         lambda arg, node: np.var(arg) > 1

        c - execute until end
        p - print input (forward) or root gradients (backward)
        d - drop into a pdb shell
        q - quit\
'''

__doc__ = '''\
In order to debug a graph one simply needs to wrap the root node as follows::

    # ... setting up the model in z
    from cntk.debugging import debug_model
    z = debug_model(z)

Then, when ``z`` is evaluated or trained (i.e. when either
:meth:`~cntk.ops.functions.Function.forward` or
:meth:`~cntk.ops.functions.Function.backward` is called, you will see the
following command-line interface::

    =================================== forward  ===================================
    Parameter node with uid='Parameter28' shape=[](2,)
    [CNTK forward] >>> help
    %s

    [CNTK backward] >>> n

    Times node with uid='Times29' shape=[*,*](2,)
    [CNTK forward] >>> n
    =================================== backward ===================================
    Times node with uid='Times29' shape=[*,*](2,)
    [CNTK backward] >>> p
    State: None
    Root gradients:
    [[[-0.79412955  0.79412955]]
     [[-0.79412955  0.79412955]]
     [[ 0.20587046 -0.20587045]]
     [[ 0.20587046 -0.20587045]]
     [[ 0.20587046 -0.20587045]]
     [[ 0.20587046 -0.20587045]]
     [[-0.79412955  0.79412955]]
     [[ 0.20587046 -0.20587045]]
     [[ 0.20587039 -0.20587039]]
     [[-0.79412961  0.79412961]]]

At every stop the following information is given:
 * Forward or backward pass
 * Node type (e.g. 'Times')
 * Name if given, otherwise it is omitted
 * uid, which is a unique reference within the graph
 * shape having the format [dynamic axis](static axes). E.g. ``[*,*](2,)``
   means that the node's output has two dynamic axes (batch and sequence) and
   one static axis (2 dimensions)
''' % DEBUG_USAGE


[docs]def save_as_legacy_model(root_op, filename):
    '''
    Save the network of ``root_op`` in ``filename``.
    For debugging purposes only, very likely to be deprecated in the future.

    Args:
        root_op (:class:`~cntk.ops.functions.Function`): op of the graph to save
        filename (str): filename to store the model in.
    '''
    cntk_py.save_as_legacy_model(root_op, filename)


class _DebugState(object):

    def __init__(self, all_nodes):
        self.commands = []
        self.last_pass = '<start>'
        self.all_nodes = all_nodes
        self.name_to_node = defaultdict(lambda: [])
        for n in self.all_nodes:
            self.name_to_node[n.name].append(n)


[docs]def set_checked_mode(enable):
    '''
    Checked mode enables additional runtime verification such as:
        - Tracking NaN occurrences in sequence gaps.
        - Function graph verification after binding of free static axes to actual values at runtime

    Enabling checked mode incurs additional runtime costs and is meant to be used as a debugging aid.

    Args:
        enable (bool): whether to enable checked mode (with performance impact)
    '''
    cntk_py.set_checked_mode(enable)


[docs]def set_computation_network_trace_level(level):
    '''
    Set trace level to the computation network. Currently supported values:

       0
         turn off trace
       1
         output nodes' dimensions and some other static info
       1000
         output each node's abs sum of elements in its value matrix for every forward/backward
       1000000
         output each node's full matrix for every forward/backward

    Args:
        level (int): trace level
    '''
    cntk_py.set_computation_network_trace_level(level)

[docs]def set_node_timing(enable):
    '''
    Node-timing records per-node average execution time per-minibatch.
    Enabling checked mode incurs a little runtime costs and is meant to be used as a debugging aid.

    Args:
        enable (bool): whether to enable per-node timing
    '''
    cntk_py.enable_node_timing() if enable else cntk_py.disable_node_timing()

class _DebugNode(UserFunction):
    '''
    A user function node that exposes a command line interface. With that one can
    step through the graph and investigate data, shapes, etc.

    In order to use it, call :func:`debug_model` on the model.

    Args:
       arg (graph node): the node in the graph after which this Debug Node is to
        be inserted
      debug_state (:class:`_DebugState`): state that is shared among all debug
       nodes
      in_stream (object behaving like sys.stdin): `readline()` will be called on it
       to obtain user input
      out_stream (object behaving like sys.stdout): `write()` and `flush()` will
       be called on it to output debug info to the user
      exit_func (callable): callable that takes an exit code and is called,
       when the user exits the debugging process
      name (str): name of the node
    '''
    _commands = []

    PROMPT_FORWARD = '[CNTK forward] >>> '
    PROMPT_BACKWARD = '[CNTK backward] >>> '

    def __init__(self, arg, debug_state,
                 in_stream=sys.stdin, out_stream=sys.stdout,
                 exit_func=sys.exit,
                 name='D'):
        if hasattr(arg, 'is_composite') and arg.is_composite:
            arg = arg.root_function

        # Shorten the name a bit
        arg_uid_parts = arg.uid.split('_')
        if len(arg_uid_parts)>2 and arg_uid_parts[-2] == 'Output':
            del arg_uid_parts[-2]
        name += '_%s' % '_'.join(arg_uid_parts)

        super(_DebugNode, self).__init__([arg], as_numpy=True, name=name)
        self.after = arg
        self.debug_state = debug_state

        self._in, self._out = in_stream, out_stream
        self._exit = exit_func

    def clone(self, cloned_inputs):
        arg = cloned_inputs[0]
        map_if_possible(arg)
        return _DebugNode(arg, self.debug_state, self._in,self._out, self._exit)

    # TODO:
    # Breakopint handling
    # u h - until here
    def _wait_for_input(self, prompt):
        understood = False
        while not understood:
            self._out.write(prompt)
            self._out.flush()
            new_input = self._in.readline().strip()
            if not new_input:
                continue

            if len(new_input) == 1 and new_input in 'bcdfp':
                understood = [new_input]
            elif new_input[0] == 'n':
                if len(new_input) > 1:
                    remainder = new_input[1:]
                    try:
                        number = int(remainder)
                        understood = ['n'] * number
                    except ValueError:
                        pass
                else:
                    understood = ['n']

            elif new_input[0] == 'u':
                try:
                    what = new_input[1:].strip()
                    if what.startswith('lambda'):
                        code = eval(what)
                        understood = [code]
                    else:
                        if what in self.debug_state.name_to_node:
                            def code(arg, n):
                                return n.name == what
                            understood = [code]
                        elif not understood:
                            if "backward".startswith(what):
                                understood = ['ub']
                            elif "forward".startswith(what):
                                understood = ['uf']
                        else:
                            self._out.write('Your model does not contain a '
                                            'node with name "%s"\n' % what)
                            self._out.flush()

                except SyntaxError:
                    understood = False

            elif new_input == 'q':
                self._exit(0)

            if not understood:
                self._out.write(DEBUG_USAGE + '\n')
                self._out.flush()

        return understood

    def _print_status(self, current_pass):
        if current_pass != self.debug_state.last_pass:
            if current_pass == 'f':
                self._out.write('\n')
                self._out.write('=' * 35 + ' forward  ' + '=' * 35 + '\n')
            else:
                self._out.write('=' * 35 + ' backward ' + '=' * 35 + '\n')
            self._out.flush()

        after = self.after.owner if self.after.is_output else self.after
        self._out.write("\n%s with uid '%s'\n" % (str(after), after.uid))
        self._out.flush()

    def forward(self, argument, device=None, outputs_to_retain=None):
        self._print_status('f')

        done = False
        while not done:
            if not self.debug_state.commands:
                self.debug_state.commands = self._wait_for_input(
                    _DebugNode.PROMPT_FORWARD)

            commands = self.debug_state.commands

            next_command = commands[-1]
            if next_command == 'c':
                done = True

            elif isinstance(next_command, str) and next_command.startswith('n'):
                if len(next_command) == 1:
                    commands.pop()
                    done = True

            elif isinstance(next_command, str) and next_command.startswith('u'):
                if next_command == "uf":
                    commands.pop()
                    if self.debug_state.last_pass == 'b':
                        self.debug_state.commands = self._wait_for_input(
                            _DebugNode.PROMPT_FORWARD)
                        done = False
                    else:
                        done = True
                elif next_command == "ub":
                    done = True

            elif next_command == 'p':
                self._out.write('Input with shape %s: \n' % str(argument.shape))
                self._out.write(str(argument))
                self._out.write('\n')
                self._out.flush()
                commands.pop()

            elif next_command == 'd':
                commands.pop()
                import pdb
                pdb.set_trace()
                done = True

            elif callable(next_command):
                if next_command(argument, self.after):
                    commands.pop()
                else:
                    done = True

        self.debug_state.last_pass = 'f'

        return None, argument

    def backward(self, state, root_gradients):
        self._print_status('b')

        done = False
        while not done:
            if not self.debug_state.commands:
                self.debug_state.commands = self._wait_for_input(
                    _DebugNode.PROMPT_BACKWARD)

            commands = self.debug_state.commands

            next_command = commands[-1]
            if next_command == 'c':
                done = True

            elif isinstance(next_command, str) and next_command.startswith('n'):
                if len(next_command) == 1:
                    commands.pop()
                    done = True

            elif isinstance(next_command, str) and next_command.startswith('u'):
                if next_command == "uf":
                    done = True
                elif next_command == "ub":
                    commands.pop()
                    if self.debug_state.last_pass == 'f':
                        self.debug_state.commands = self._wait_for_input(
                            _DebugNode.PROMPT_FORWARD)
                        done = False
                    else:
                        done = True

            elif next_command == 'p':
                if state is not None:
                    self._out.write('State: %s\n' % str(state))
                self._out.write('Root gradients with shape %s: \n' %
                                str(root_gradients.shape))
                self._out.write(str(root_gradients))
                self._out.write('\n')
                self._out.flush()
                commands.pop()

            elif next_command == 'd':
                import pdb
                pdb.set_trace()
                done = True

            elif callable(next_command):
                if next_command(root_gradients, self.after):
                    commands.pop()
                else:
                    done = True

        self.debug_state.last_pass = 'b'

        return root_gradients

    def infer_outputs(self):
        return [output_variable(self.inputs[0].shape, self.inputs[0].dtype,
                                self.inputs[0].dynamic_axes)]

    def __str__(self):
        return "_DebugNode(after=%s)" % str(self.after)


def _nodes_to_debug(model):
    from cntk.logging.graph import depth_first_search

    def node_filter(x):
        if hasattr(x, 'op_name') and x.op_name in ['NoOp']:
            return False
        else:
            return True

    nodes = set(depth_first_search(model, lambda x: True))

    uf_nodes = [n for n in nodes if hasattr(n, 'op_name')
                and n.op_name == 'UserFunction']

    already_covered = [n.inputs[0].owner if n.inputs[0].is_output else
                       n.inputs[0] for n in uf_nodes]
    to_remove = [n.uid for n in (already_covered + uf_nodes)]

    return [n for n in nodes if n.uid not in to_remove]


[docs]def debug_model(model, in_stream=sys.stdin, out_stream=sys.stdout,
                exit_func=sys.exit):
    '''
    debug_model(model, in_stream=sys.stdin, out_stream=sys.stdout, exit_func=sys.exit)

    Returns a cloned model that has debug nodes inserted everywhere. When the
    graph is evaluated or trained, those nodes will allow to inspect the graph.

    Args:
      model (root node): root node until which the nodes are to be debugged
      in_stream (object behaving like sys.stdin, default stdin): `readline()`
       will be called on it to obtain user input
      out_stream (object behaving like sys.stdout, default stdout): `write()`
       and `flush()` will be called on it to output debug info to the user
      exit_func (callable, default sys.exit): callable that takes an exit code and is called,
       when the user exits the debugging process

    Returns:
      a clone of the model that has debugging enabled
    '''
    nodes = _nodes_to_debug(model)
    dbg_state = _DebugState(nodes)

    orig_node_count = len(nodes)
    mod_counter = 0

    # We cannot add the DebugNodes in one clone because the replacements will
    # hide parent nodes.
    while len(nodes) > 0:
        modifications = {n: user_function(_DebugNode(n, dbg_state,
                                                     in_stream, out_stream,
                                                     exit_func))
                         for n in nodes}

        model = model.clone(CloneMethod.share, modifications)

        mod_counter += 1

        if mod_counter > orig_node_count:
            raise ValueError('cannot debug this graph')

        nodes = _nodes_to_debug(model)

    return model