Source code for opveclib.expression

# Copyright 2016 Hewlett Packard Enterprise Development LP
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for
# the specific language governing permissions and limitations under the License.

import ctypes
import string
import re
import six

import numpy as np
from . import language_pb2 as lang


[docs]class DType(object):
    """
    Data type object which indicates a low level type.
    """
    _np_lookup = {
        np.float16: lang.FLOAT16,
        np.float32: lang.FLOAT32,
        np.float64: lang.FLOAT64,
        np.int8: lang.INT8,
        np.int16: lang.INT16,
        np.int32: lang.INT32,
        np.int64: lang.INT64,
        np.uint8: lang.UINT8,
        np.uint16: lang.UINT16,
        np.uint32: lang.UINT32,
        np.uint64: lang.UINT64}

    _ctypes_lookup = {
        lang.FLOAT32: ctypes.c_float,
        lang.FLOAT64: ctypes.c_double,
        lang.INT8: ctypes.c_int8,
        lang.INT16: ctypes.c_int16,
        lang.INT32: ctypes.c_int32,
        lang.INT64: ctypes.c_int64,
        lang.UINT8: ctypes.c_uint8,
        lang.UINT16: ctypes.c_uint16,
        lang.UINT32: ctypes.c_uint32,
        lang.UINT64: ctypes.c_uint64
    }

    _cstr_lookup = {
        lang.FLOAT32: 'float',
        lang.FLOAT64: 'double',
        lang.INT8: 'int8_t',
        lang.INT16: 'int16_t',
        lang.INT32: 'int32_t',
        lang.INT64: 'int64_tf',
        lang.UINT8: 'uint8_t',
        lang.UINT16: 'uint16_t',
        lang.UINT32: 'uint32_t',
        lang.UINT64: 'uint64_t'
    }

    _tensorflow_lookup = {
        lang.FLOAT32: 'float',
        lang.FLOAT64: 'double',
        lang.INT8: 'int8',
        lang.INT16: 'int16',
        lang.INT32: 'int32',
        lang.INT64: 'int64',
        lang.UINT8: 'uint8',
        lang.UINT16: 'uint16'
    }

    def __init__(self, dtype):

        if type(dtype) is DType:
            self.proto_dtype = dtype.proto_dtype
        elif dtype in list(DType._np_lookup.keys()):
            # find index by equality, rather than equivalency
            index = list(DType._np_lookup.keys()).index(dtype)
            self.proto_dtype = list(DType._np_lookup.values())[index]
        elif isinstance(dtype, six.integer_types):
            if dtype not in list(lang.DType.values()):
                raise ValueError('dtype ' + str(dtype) + ' is not valid.')
            else:
                self.proto_dtype = dtype
        else:
            raise TypeError('dtype ' + str(dtype) + ' is not valid.')

    def __eq__(self, other):
        if type(other) is not DType:
            return False
        else:
            return self.proto_dtype == other.proto_dtype

    def __hash__(self):
        return id(self.proto_dtype)

    def __ne__(self, other):
        return not self == other

    def __str__(self):
        return lang.DType.Name(self.proto_dtype)

    def as_numpy(self):
        return list(DType._np_lookup.keys())[list(DType._np_lookup.values()).index(self.proto_dtype)]

    def as_ctypes(self):
        return DType._ctypes_lookup[self.proto_dtype]

    def as_cstr(self):
        return DType._cstr_lookup[self.proto_dtype]

[docs]    def as_tensorflow(self):
        return DType._tensorflow_lookup[self.proto_dtype]

    def as_proto(self):
        return self.proto_dtype

#: The half-precision floating point DType
float16 = DType(lang.FLOAT16)
#: The single precision floating point DType
float32 = DType(lang.FLOAT32)
#: The double precision floating point DType
float64 = DType(lang.FLOAT64)
#: The 8 bit signed integer DType
int8 = DType(lang.INT8)
#: The 16 bit signed integer DType
int16 = DType(lang.INT16)
#: The 32 bit signed integer DType
int32 = DType(lang.INT32)
#: The 64 bit signed integer DType
int64 = DType(lang.INT64)
#: The 8 bit unsigned integer DType
uint8 = DType(lang.UINT8)
#: The 16 bit unsigned integer DType
uint16 = DType(lang.UINT16)
#: The 32 bit unsigned integer DType
uint32 = DType(lang.UINT32)
#: The 64 bit unsigned integer DType
uint64 = DType(lang.UINT64)

supported_types = [float16, float32, float64, int8, int16, int32, int64, uint8, uint16, uint32, uint64]


[docs]class TensorType(object):
    """
    A tensor is defined by its data type and its shape.
    """
    def __init__(self, shape, dtype):
        """

        :param shape: The tensor shape
        :param dtype: The data type
        :return: A tensor type object
        """
        self.dtype = DType(dtype)

        if isinstance(shape, six.integer_types):
            self.shape = [shape]
        else:
            self.shape = []
            for elem in shape:
                if elem is None:
                    raise TypeError('All dimensions must be defined.')
                elif not isinstance(elem, six.integer_types):
                    raise TypeError('Shape must be an iterable of ints.')
                else:
                    self.shape.append(elem)

        for elem in self.shape:
            if elem <= 0:
                raise ValueError('All tensor dimensions must be positive, but got: ' + str(self.shape))

        self.size = 1
        for elem in self.shape:
            self.size *= elem

        self.rank = len(self.shape)

        self._proto_tensor_type = lang.TensorType()
        self._proto_tensor_type.dtype = self.dtype.proto_dtype
        self._proto_tensor_type.shape.extend(self.shape)

    def __eq__(self, other):
        if not isinstance(other, TensorType):
            return False
        else:
            return self._proto_tensor_type == other.as_proto()

    def __ne__(self, other):
        if type(other) is not TensorType:
            return True
        else:
            return self._proto_tensor_type != other.as_proto()

    def __str__(self):
        return str(self._proto_tensor_type)

    @staticmethod
[docs]    def like(other):
        """
        Resolve the TensorType the argument

        :param other: The input object
        :return: A TensorType like the input object
        """
        try:
            other_shape = other.shape
        except AttributeError:
            other_shape = other.get_shape().as_list()

        try:
            other_dtype = other.dtype.as_numpy_dtype().dtype
        except AttributeError:
            other_dtype = other.dtype

        return TensorType(other_shape, other_dtype)

    @staticmethod
    def from_proto(proto):
        """
        Recover TensorType object from protocol buffer serialization

        :param proto: the protobuf
        :return: A TensorType object
        """
        return TensorType(proto.shape, proto.dtype)

    def as_proto(self):
        """
        Serialize this object as a protocol buffer

        :return: A protobuf
        """
        tt = lang.TensorType()
        tt.CopyFrom(self._proto_tensor_type)
        return tt


def _list_to_str(x):
    out = ''
    for i, cur in enumerate(x):
        out += str(cur)
        if i < len(x) - 1:
            out += ', '
    return out


class ExpressionDAG(object):
    """
    Singleton object for keeping track of expressions in the order in which they are defined. Expressions must register themselves
     with the ExpressionDAG upon construction with the append() method.
    """
    exprs = []
    expr_ids = []
    workgroup_shape = None
    num_outputs = 0
    num_inputs = 0

    @staticmethod
    def io_types():
        input_types = ExpressionDAG.num_inputs*[None]
        output_types = ExpressionDAG.num_outputs*[None]

        found_count = 0
        for expr in ExpressionDAG.exprs:
            if expr.proto_expr.code == lang.INPUT:
                input_types[expr.proto_expr.io_index] = TensorType.from_proto(expr.proto_expr.tensor_type)
                found_count += 1
            elif expr.proto_expr.code == lang.OUTPUT:
                output_types[expr.proto_expr.io_index] = TensorType.from_proto(expr.proto_expr.tensor_type)
                found_count += 1
            if found_count == ExpressionDAG.num_inputs + ExpressionDAG.num_outputs:
                break

        return input_types, output_types


    @staticmethod
    def clear():
        """
        Clear all currently tracked expressions.
        :return:
        """
        ExpressionDAG.exprs = []
        ExpressionDAG.expr_ids = []
        ExpressionDAG.workgroup_shape = None
        ExpressionDAG.num_outputs = 0
        ExpressionDAG.num_inputs = 0

    @staticmethod
    def append(item):
        """
        Append an item to the expression list.
        :param item: The expression to append.
        :return: None
        """
        if not issubclass(item.__class__, _Expression):
            raise TypeError('Can only append expressions.')

        if type(item) is PositionTensor:
            if ExpressionDAG.workgroup_shape is None:
                ExpressionDAG.workgroup_shape = item.proto_expr.uint32_data
            else:
                raise ValueError('Already defined the position tensor.')

        if type(item) is OutputTensor:
            if item.proto_expr.io_index != ExpressionDAG.num_outputs:
                raise ValueError('Trying to add outputs to expression dag out of order')

            ExpressionDAG.num_outputs += 1

        if type(item) is InputTensor:
            if item.proto_expr.io_index != ExpressionDAG.num_inputs:
                raise ValueError('Trying to add inputs to expression dag out of order')

            ExpressionDAG.num_inputs += 1

        # Assign names to each expression as they get appended
        if item.name is None:
            if type(item) is InputTensor:
                item.name = '(*in'+str(item.proto_expr.io_index)+')'
            elif type(item) is OutputTensor:
                item.name = '(*out'+str(item.proto_expr.io_index)+')'
            elif type(item) is PositionTensor:
                item.name = 'position'
            elif type(item) is _ConstScalar:
                item.name = str(item.value())
            elif type(item) is _ConstTensor:
                item.name = '{'+_list_to_str(item.to_array().tolist())+'}'
            else:
                item.name = 'e'+str(len(ExpressionDAG.exprs))

        ExpressionDAG.exprs.append(item)
        ExpressionDAG.expr_ids.append(id(item))

    @staticmethod
    def remove_endif():
        """
        find and remove the most recent _EndIf expression, used for continuing if blocks
        :return: None
        """
        found_endif = False
        for i in range(len(ExpressionDAG.exprs)):
            if type(ExpressionDAG.exprs[-i-1]) is _EndIf:
                found_endif = True
                del(ExpressionDAG.exprs[-i-1])
                del(ExpressionDAG.expr_ids[-i-1])
                break
        if found_endif is False:
            raise SyntaxError('Could not find prior if block')

    @staticmethod
    def as_proto():
        """
        Serialize the current ExpressionDAG as a protocol buffer
        :return: the protobuf
        """
        if ExpressionDAG.workgroup_shape is None:
            raise ValueError('Workgroup shape must be defined with "position_in" function.')

        proto = lang.ExpressionDAG()
        proto.workgroup_shape.extend(ExpressionDAG.workgroup_shape)
        for i, expr in enumerate(ExpressionDAG.exprs):
            proto.expressions.add().CopyFrom(expr.proto_expr)

            operand_indices = []
            for input_expr in expr.input_exprs:
                operand_indices.append(ExpressionDAG.expr_index(input_expr))

            proto.references.add().operand_indices.extend(operand_indices)

        # Reorder op dag to make sure that all elseif conditionals are positioned
        # before entering the if block
        if_block_start = []
        needs_reordering = []

        for i, expr in enumerate(proto.expressions):

            if expr.code is lang.IF:
                if_block_start.append(i)
                needs_reordering.append([])
            elif expr.code is lang.ELSEIF:
                # recursively find all conditional dependencies that need to be reordered
                def find_reorders(x):
                    for ref in proto.references[x].operand_indices:
                        if ref > if_block_start[-1]:
                            find_reorders(ref)
                    needs_reordering[-1].append(x)
                conditional_index = proto.references[i].operand_indices[0]
                find_reorders(conditional_index)
            elif expr.code is lang.ENDIF:
                new_to_old_index = {}
                num_reorders = len(needs_reordering[-1])
                if num_reorders > 0:
                    reorder_count = 0
                    for cur_index in range(if_block_start[-1], needs_reordering[-1][-1] + 1):
                        if cur_index in needs_reordering[-1]:
                            new_to_old_index[if_block_start[-1] + reorder_count] = cur_index
                            reorder_count += 1
                        else:
                            new_to_old_index[cur_index + num_reorders - reorder_count] = cur_index

                    def new_to_old(x):
                        if x in list(new_to_old_index.keys()):
                            return new_to_old_index[x]
                        else:
                            return x

                    old_to_new_index = dict((v, k) for k, v in new_to_old_index.items())

                    def old_to_new(x):
                        if x in list(old_to_new_index.keys()):
                            return old_to_new_index[x]
                        else:
                            return x

                    # perform the reordering
                    new_dag = lang.ExpressionDAG()
                    num_expressions = len(proto.expressions)
                    for cur_index in range(num_expressions):
                        # copy expressions from old spot to new spot
                        cur_expr = proto.expressions[new_to_old(cur_index)]
                        head_expr = new_dag.expressions.add()
                        head_expr.CopyFrom(cur_expr)

                        # copy and update references from old spot to new spot
                        cur_refs = []
                        for ref in proto.references[new_to_old(cur_index)].operand_indices:
                            cur_refs.append(old_to_new(ref))
                        head_reference = new_dag.references.add()
                        head_reference.operand_indices.extend(cur_refs)

                    proto = new_dag

                # finished reordering conditionals, get rid of reordering info
                if_block_start = if_block_start[:-1]
                needs_reordering = needs_reordering[:-1]

        return proto

    @staticmethod
    def from_proto(expression_dag):
        """
        Clear the current ExpressionDAG and build up a fresh one from a serialized protocol buffer.
        :param expression_dag: the serialized protobuf
        :return: None
        """
        code_to_class = {
            lang.INPUT: InputTensor,
            lang.OUTPUT: OutputTensor,
            lang.CONST_SCALAR: _ConstScalar,
            lang.CONST_TENSOR: _ConstTensor,
            lang.POSITION: PositionTensor,
            lang.VARIABLE: Variable,
            lang.CAST: _Cast,
            lang.TENSOR: LocalTensor,
            lang.ASSIGN_VARIABLE: _AssignVariable,
            lang.ASSIGN_TENSOR: _AssignTensor,
            lang.READ_TENSOR: _ReadTensor,
            lang.RANGE: _Range,
            lang.ENDRANGE: _EndRange,
            lang.IF: _If,
            lang.ELSEIF: _ElseIf,
            lang.ELSE: _Else,
            lang.ENDIF: _EndIf,
            lang.ACOS: _UnaryMath,
            lang.ASIN: _UnaryMath,
            lang.ATAN: _UnaryMath,
            lang.COS: _UnaryMath,
            lang.COSH: _UnaryMath,
            lang.SIN: _UnaryMath,
            lang.SINH: _UnaryMath,
            lang.TAN: _UnaryMath,
            lang.TANH: _UnaryMath,
            lang.EXP: _UnaryMath,
            lang.LOG: _UnaryMath,
            lang.LOG10: _UnaryMath,
            lang.SQRT: _UnaryMath,
            lang.CEIL: _UnaryMath,
            lang.FLOOR: _UnaryMath,
            lang.ABS: _UnaryMath,
            lang.NEGATE: _UnaryMath,
            lang.NOT: _UnaryMath,
            lang.ADD: _BinaryMath,
            lang.SUBTRACT: _BinaryMath,
            lang.MULTIPLY: _BinaryMath,
            lang.DIVIDE: _BinaryMath,
            lang.MODULO: _BinaryMath,
            lang.AND: _BinaryMath,
            lang.OR: _BinaryMath,
            lang.EQUAL: _BinaryMath,
            lang.NOTEQUAL: _BinaryMath,
            lang.LESS: _BinaryMath,
            lang.LESS_EQ: _BinaryMath,
            lang.GREATER: _BinaryMath,
            lang.GREATER_EQ: _BinaryMath,
            lang.MIN: _BinaryMath,
            lang.MAX: _BinaryMath,
            lang.POW: _BinaryMath,
            lang.ATAN2: _BinaryMath,
            lang.ISINF: _UnaryMath,
            lang.ISFINITE: _UnaryMath,
            lang.ISNAN: _UnaryMath,
            lang.MIN_VALUE: _Limits,
            lang.MAX_VALUE: _Limits,
            lang.EPSILON: _Limits
        }

        ExpressionDAG.clear()

        # iterate through each proto expression and build up the graph
        for i, expr in enumerate(expression_dag.expressions):
            cur_refs = expression_dag.references[i].operand_indices
            input_exprs = []
            for cur_ref in cur_refs:
                input_exprs.append(ExpressionDAG.exprs[cur_ref])
            code_to_class[expr.code].from_proto(expr, input_exprs)

    @staticmethod
    def generate(expression_dag, function_name):
        """
        Generate C and CUDA code for evaluating the operation defined in the supplied serialized expression dag
        protocol buffer.
        :param expression_dag: The protobuf
        :param function_name: The name of the function to use
        :return: a tuple containing the source for: the generic c++ interface, and the generic cuda interface
        """
        def _strip_margin(s):
            return re.sub('\n[ \t]*\|', '\n', s)
        ExpressionDAG.from_proto(expression_dag)

        inputs = list()
        outputs = list()
        position = None
        for expr in ExpressionDAG.exprs:
            if type(expr) is InputTensor:
                inputs.append(expr)
            elif type(expr) is OutputTensor:
                outputs.append(expr)
            elif type(expr) is PositionTensor:
                position = expr

        num_inputs = len(inputs)
        num_outputs = len(outputs)

        args = list()
        for arg in inputs + outputs:
            args.append(arg.gen_ptr())

        args_str = _list_to_str(args)
        workgroup_shape = position.proto_expr.uint32_data
        workgroup_block_size = [1]
        num_workers = 1
        for cur_dim in workgroup_shape:
            num_workers *= cur_dim

        expression_src = ''
        for expr in ExpressionDAG.exprs:
            try:
                cur_c = expr.gen_c()
            except NotImplementedError as e:
                raise NotImplementedError(str(expr))
            if cur_c != '':
                expression_src += '        ' + cur_c

        # generate c function
        c_src = """
        |//Generated Code
        |#include <stdint.h>
        |#include <stdlib.h>
        |#include <math.h>
        |
        |//aliases for integer absolute values for ints < 32 bits in size
        |//aliases are used because abs() does not work with for
        |// 8 and 16 bit ints in CUDA. We use an alias here so that we can share
        |// code generation infrastructure.
        |#define abs_8(x) abs(x);
        |#define abs_16(x) abs(x);
        |
        |void ${function_name}(${args_str},
        |                      uint32_t block_size, uint32_t thread_index){
        |    uint32_t start = thread_index * block_size;
        |    uint32_t end = start + block_size;
        |    if (end > ${num_workers}) end = ${num_workers};
        |    for(uint32_t worker_index=start; worker_index < end; worker_index++){
        |${expression_src}
        |    }
        |}
        |"""
        c_src = string.Template(c_src).substitute(locals())
        c_src = _strip_margin(c_src)

        cuda_function = _strip_margin(string.Template("""
        |//Generated Code
        |
        |//define integer absolute value function
        |inline __device__ int8_t abs_8(const int8_t  & x){ return ( x<0 ) ? -x : x;}
        |inline __device__ int16_t abs_16(const int16_t  & x){ return ( x<0 ) ? -x : x;}
        |
        |extern \"C\" __global__
        |void ${function_name}(${args_str}){
        |    uint32_t worker_index = blockIdx.x * blockDim.x + threadIdx.x;
        |    if (worker_index < ${num_workers}) {
        |${expression_src}
        |    }
        |}
        """).substitute(locals()))

        # Generate the c generic parameter interface for unpacking polymorphic io parameters
        generic_args = []
        io_ptrs = ''
        for inp in inputs:
            cur_index = inp.proto_expr.io_index
            cur_name = 'in'+str(cur_index)
            generic_args.append(cur_name + '.p_fixed_len')
            elements = inp.size
            tipe = inp.dtype.as_cstr()

            io_ptrs += string.Template("""
                |    if(inputs[${cur_index}]->length() != ${elements}) { *err = 1; return; }
                |    union u_in${cur_index}{
                |       const ${tipe} *p_arb_len;
                |       const ${tipe} (*p_fixed_len)[${elements}];
                |    };
                |    union u_in${cur_index} in${cur_index};
                |    in${cur_index}.p_arb_len = inputs[${cur_index}]->get<${tipe}>();
                |""").substitute(locals())

        for outp in outputs:
            cur_index = outp.proto_expr.io_index
            cur_name = 'out'+str(cur_index)
            generic_args.append(cur_name + '.p_fixed_len')
            elements = outp.size
            tipe = outp.dtype.as_cstr()

            io_ptrs += string.Template("""
                |    if(outputs[${cur_index}]->length() != ${elements}) { *err = 1; return; }
                |    union u_out${cur_index}{
                |       ${tipe} *p_arb_len;
                |       ${tipe} (*p_fixed_len)[${elements}];
                |    };
                |    union u_out${cur_index} out${cur_index};
                |    out${cur_index}.p_arb_len = outputs[${cur_index}]->get<${tipe}>();
                |""").substitute(locals())

        args = _list_to_str(generic_args)
        c_generic = """
        |#include "dynamiclibop.h"
        |#include <vector>
        |#include <memory>
        |#include <cfloat>
        |
        |${c_src}
        |
        |extern "C"
        |void ${function_name}_generic_cpp(std::vector<std::shared_ptr<const InputParameter>> inputs,
        |                                  std::vector<std::shared_ptr<OutputParameter>> outputs,
        |                                  uint32_t num_threads, uint32_t thread_index, uint16_t* err){
        |    //check that the number of inputs and outputs is correct
        |    if(inputs.size() != ${num_inputs}){ *err = 1; return; }
        |    if(outputs.size() != ${num_outputs}){ *err = 1; return; }
        |
        |    //check that the size of inputs and outputs is correct, and cast them as pointers to arrays
        ${io_ptrs}
        |    uint32_t block_size = ${num_workers} / num_threads;
        |    if(${num_workers} % num_threads > 0) block_size += 1;
        |    return ${function_name}(${args}, block_size, thread_index);
        |}
        |"""
        c_generic = string.Template(c_generic).substitute(locals())
        c_generic = _strip_margin(c_generic)

        cuda_generic = """
        |#include "dynamiclibop.h"
        |#include <vector>
        |#include <string>
        |#include <memory>
        |#include <cfloat>
        |#include <cuda.h>
        |
        |${cuda_function}
        |
        |extern "C"
        |void ${function_name}_generic_cuda(std::vector<std::shared_ptr<const InputParameter>> inputs,
        |                                   std::vector<std::shared_ptr<OutputParameter>> outputs,
        |                                   CUstream stream, uint16_t threads_per_block, uint16_t* err){
        |    //check that the number of inputs and outputs is correct
        |    if(inputs.size() != ${num_inputs}){ *err = 1; return; }
        |    if(outputs.size() != ${num_outputs}){ *err = 1; return; }
        |
        |    //check that the size of inputs and outputs is correct, and cast them as pointers to arrays
        ${io_ptrs}
        |    //enqueue function on stream
        |    uint32_t num_blocks = ${num_workers} / threads_per_block;
        |    if(${num_workers} % threads_per_block > 0) num_blocks += 1;
        |    ${function_name}<<<num_blocks, threads_per_block, 0, stream>>>(${args});
        |}
        """
        cuda_generic = string.Template(cuda_generic).substitute(locals())
        cuda_generic = _strip_margin(cuda_generic)

        # Generate the cuda generic parameter interface for unpacking polymorphic io parameters

        return c_generic, cuda_generic

    @staticmethod
    def expr_index(expr):
        """
        Resolve the index of a particular expression in the current DAG
        :param expr: the expression
        :return: its index
        """
        return ExpressionDAG.expr_ids.index(id(expr))


class _Expression(object):
    """
    The abstract class that defines the behavior of expressions.
    """
    def __init__(self, expression_code):
        # assign a protocol buffers member corresponding to this python object
        if expression_code not in list(lang.ExpressionCode.values()):
            raise ValueError('Expression code ' + str(expression_code) + ' is not valid.')
        self.proto_expr = lang.Expression()
        self.proto_expr.code = expression_code
        self.input_exprs = []
        self.name = None

    def _register(self):
        ExpressionDAG.append(self)

    def __str__(self):
        return str(self.proto_expr)

    def gen_c(self):
        raise NotImplementedError('Abstract Class')

    def __ilshift__(self, other):
        raise SyntaxError('Can only use assignment operator <<= on a variable.')

    @staticmethod
    def from_proto(proto, input_exprs):
        raise NotImplementedError('Abstract Class')

    def __bool__(self):
        self.__nonzero__()

    def __nonzero__(self):
        raise SyntaxError('Attempting to interpret the truth of an expression. This typically happens when trying to '
                          'use a python native "if", "min", or "max" statement to create a data-dependent conditional '
                          'inside of an operator, which is not supported. To do so you must use the corresponding '
                          '"with if_(...)", "minimum", and "maximum" functions.')


[docs]class Scalar(_Expression):
    """
    An expression that refers to a single data value which has a data type
    """
    def __init__(self, expr_code, dtype):
        if not isinstance(dtype, DType):
            raise TypeError('Scalar expressions must be initialized with a DType')
        super(Scalar, self).__init__(expr_code)
        self.dtype = dtype
        self.proto_expr.dtype = dtype.as_proto()

[docs]    def __add__(self, other):
        return _BinaryMath(self, other, lang.ADD)

    def __radd__(self, other):
        return _BinaryMath(other, self, lang.ADD)

[docs]    def __sub__(self, other):
        return _BinaryMath(self, other, lang.SUBTRACT)

    def __rsub__(self, other):
        return _BinaryMath(other, self, lang.SUBTRACT)

[docs]    def __mul__(self, other):
        return _BinaryMath(self, other, lang.MULTIPLY)

    def __rmul__(self, other):
        return _BinaryMath(other, self, lang.MULTIPLY)

    # python 2
[docs]    def __div__(self, other):
        return _BinaryMath(self, other, lang.DIVIDE)

    def __rdiv__(self, other):
        return _BinaryMath(other, self, lang.DIVIDE)

    # python 3
[docs]    def __truediv__(self, other):
        return _BinaryMath(self, other, lang.DIVIDE)

[docs]    def __rtruediv__(self, other):
        return _BinaryMath(other, self, lang.DIVIDE)

[docs]    def __mod__(self, other):
        return _BinaryMath(self, other, lang.MODULO)

    def __rmod__(self, other):
        return _BinaryMath(other, self, lang.MODULO)

[docs]    def __eq__(self, other):
        return _BinaryMath(self, other, lang.EQUAL)

[docs]    def __ne__(self, other):
        return _BinaryMath(self, other, lang.NOTEQUAL)

[docs]    def __lt__(self, other):
        return _BinaryMath(self, other, lang.LESS)

[docs]    def __le__(self, other):
        return _BinaryMath(self, other, lang.LESS_EQ)

[docs]    def __gt__(self, other):
        return _BinaryMath(self, other, lang.GREATER)

[docs]    def __ge__(self, other):
        return _BinaryMath(self, other, lang.GREATER_EQ)

[docs]    def __neg__(self):
        return _UnaryMath(self, lang.NEGATE)

    @staticmethod
    def from_proto(proto, input_exprs):
        raise NotImplementedError('Abstract Class')

    def gen_c(self):
        raise NotImplementedError('Abstract Class')


class _TensorExpression(_Expression):
    """
    An expression that refers to a tensor of data which has a TensorType
    """
    def __init__(self, expr_code, tensor_type):
        super(_TensorExpression, self).__init__(expr_code)
        self.dtype = tensor_type.dtype
        self.shape = tensor_type.shape
        self.size = tensor_type.size
        self.rank = tensor_type.rank
        self.tensor_type = tensor_type
        self.proto_expr.tensor_type.CopyFrom(tensor_type.as_proto())

    def gen_ptr(self):
        raise NotImplementedError('Abstract Class')

    @staticmethod
    def from_proto(proto, input_exprs):
        raise NotImplementedError('Abstract Class')

    def gen_c(self):
        raise NotImplementedError('Abstract Class')


def _to_scalar_index(target_shape, index):
    """
    Helper function for indexing tensors. All tensors are stored as C-style flattened arrays in memory, but are indexed
     from the API with an index for each dimension. This function resolves the scalar index of the tensor memory from
     the input index. The length of the input index must always be the same as the rank of the target tensor. Indices
     can be a tensor, or a mixed iterable of constants, scalar expressions, and 0-D tensor expressions.
    :param target: The tensor to be indexed
    :param index: The index tensor or iterable
    :return: a scalar expression containing the index of the flattened target tensor memory
    """
    target_rank = len(target_shape)
    block_size = [1]
    for cur_dim in range(len(target_shape)-1, 0, -1):
        block_size.append(block_size[-1]*target_shape[cur_dim])
    block_size.reverse()

    # try to wrap index as a const tensor
    try:
        index_expr = _ConstTensor(index)
    except TypeError:
        index_expr = index

    # wrap scalar expressions as lists
    if issubclass(index_expr.__class__, Scalar):
        index_expr = [index_expr]

    # try to wrap index as an explicit array tensor
    # (e.g. img[row, column] where both row and column are scalar variables of the same type)
    # allow for constants to be mixed with expressions (e.g. img[row, 5])
    if type(index_expr) is list or type(index_expr) is tuple:
        explicit_len = len(index_expr)
        if target_rank != explicit_len:
            raise IndexError('length of index list (' + str(explicit_len) +
                             ') must match indexed tensor rank (' + str(target_rank) + ')')

        exprs = []
        for value in index_expr:
            if issubclass(value.__class__, _Expression):
                if not issubclass(value.__class__, Scalar):
                    if issubclass(value.__class__, _TensorExpression) and value.size == 1:
                        # enable indexing with size == 1 tensor
                        # This typically arises when the workground shape is 1D and the position
                        # tensor is a single value.
                        value = value[0]
                    else:
                        raise TypeError('Must index tensors with an int or a scalar expression. Instead got:\n' +
                                        str(value))
                exprs.append(value)
            else:
                # this dimension is constant, perform static bounds checking
                cur_dim = len(exprs)
                cur_shape = target_shape[cur_dim]
                cur_value = int(np.floor(value))
                if cur_value >= cur_shape or cur_value < 0:
                    raise IndexError('Expected index to be in range [0, ' + str(cur_shape) +
                                     '), but received index value ' + str(cur_value))

                exprs.append(cur_value)

        index = None
        for i, expr in enumerate(exprs):
            if not isinstance(expr, six.integer_types):
                # todo: optionally dynamically constrain each non-constant dimensional index to within shape bounds
                # bound_expr = cast(minimum(maximum(expr, 0), target_shape[i]-1), uint64)
                bound_expr = cast(expr, uint64)
            else:
                bound_expr = expr

            if index is None:
                index = bound_expr*block_size[i]
            else:
                index = index + bound_expr*block_size[i]
        return index

    elif type(index_expr) is _ConstTensor:
        # indexing with a constant, perform static bounds checking
        if len(index_expr.shape) != 1:
            raise IndexError('Index must be one dimensional')
        if index_expr.shape[0] != target_rank:
            raise IndexError('length of index tensor (' + str(index_expr.shape[0]) +
                             ') must match indexed tensor rank (' + str(target_rank) + ')')

        data = np.floor(index_expr.to_array())
        index = 0
        for i, elem in enumerate(data):
            cur_shape = target_shape[i]
            cur_value = int(elem)
            if cur_value >= cur_shape or cur_value < 0:
                raise IndexError('Expected index to be in range [0, ' + str(cur_shape) +
                                 '), but received index value ' + str(cur_value))
            index += int(elem)*block_size[i]

        return index
    elif type(index_expr) in [LocalTensor, PositionTensor, InputTensor]:
        if len(index_expr.shape) != 1:
            raise IndexError('Index must be one dimensional')
        if index_expr.shape[0] != target_rank:
            raise IndexError('length of index tensor (' + str(index_expr.shape[0]) +
                             ') must match indexed tensor rank (' + str(target_rank) + ')')

        index = None
        for i in range(target_rank):
            cur_shape = target_shape[i]
            cur_index = _ReadTensor(index_expr, i)
            # todo: optionally dynamically constrain each dimensional index to within shape bounds
            # bound_index = minimum(maximum(cast(cur_index, uint64), 0), cur_shape-1)
            bound_index = cast(cur_index, uint64)
            if index is None:
                index = bound_index*block_size[i]
            else:
                index = index + bound_index*block_size[i]

        return index
    else:
        raise TypeError('Cannot index a tensor with a ' + str(type(index_expr)))


class _Readable(object):
    """
    A trait for tensors which enables them to be read
    """
    def __getitem__(self, item):
        return _ReadTensor(self, _to_scalar_index(self.shape, item))


class _Writable(object):
    """
    A trait for tensors which enables them to be written
    """
    def __setitem__(self, key, value):
        _AssignTensor(self, _to_scalar_index(self.shape, key), value)


def _tensor_type_polymorhpic(*args):
    """
    A helper function for resolving polymorphic inputs into a TensorType
    :param args: args the define a TensorType, can be either a TensorType or a shape and a DType
    :return: the resolved TensorType
    """
    err_msg = 'Expected a TensorType or a shape and a dtype as arguments'
    if len(args) == 1:
        if type(args[0]) is not TensorType:
            raise TypeError(err_msg)
        tensor_type = args[0]
    elif len(args) == 2:
        tensor_type = TensorType(args[0], args[1])
    else:
        raise TypeError(err_msg)

    return tensor_type


def input(*args):
    """
    Create a new input
    :param args: args the define a TensorType, can be either a TensorType or a shape and a DType
    :return: the input expression
    """
    tensor_type = _tensor_type_polymorhpic(*args)
    return InputTensor(tensor_type, ExpressionDAG.num_inputs)


[docs]class InputTensor(_TensorExpression, _Readable):
    """
    A read-only input tensor expression
    """
    def __init__(self, tensor_type, io_index):
        if not isinstance(tensor_type, TensorType):
            raise TypeError
        if not isinstance(io_index, six.integer_types):
            raise TypeError

        super(self.__class__, self).__init__(lang.INPUT, tensor_type)

        if io_index < 0 or io_index > 2**32-1:
            raise ValueError
        self.proto_expr.io_index = io_index

        super(self.__class__, self)._register()

    def gen_ptr(self):
        tipe = self.dtype.as_cstr()
        name = self.name
        elems = self.size
        p = string.Template('const ${tipe} ${name}[${elems}]').substitute(locals())

        return p

    @staticmethod
    def from_proto(proto, input_exprs):
        tt = TensorType.from_proto(proto.tensor_type)
        return InputTensor(tt, proto.io_index)

    def gen_c(self):
        return ''


[docs]def output(*args):
    """
    Define a new output

    :param args: args the define a TensorType, can be either a TensorType or a shape and a DType
    :return: a tensor expression which refers to the newly defined output tensor

    :Example:

    Create a new output tensor ``out`` based on the ``TensorType`` of input tensor ``in0`` ::

        out = output(in0.tensor_type)

    :Example:

    Create a new output tensor ``out`` based on the ``shape`` of input tensor ``in0`` and the ``DType`` of input tensor
    ``in1``::

        out = output(in0.shape, in1.dtype)

    """

    tensor_type = _tensor_type_polymorhpic(*args)
    return OutputTensor(tensor_type, ExpressionDAG.num_outputs)


[docs]def output_like(other):
    """
    Define a new output with the same TensorType as another tensor

    :param other: another tensor
    :return: a tensor expression which refers to the newly defined output tensor
    """

    return output(TensorType.like(other))


[docs]class OutputTensor(_TensorExpression, _Writable):
    """
    A write-only output expression
    """
    def __init__(self, tensor_type, io_index):
        if not isinstance(tensor_type, TensorType):
            raise TypeError
        if not isinstance(io_index, six.integer_types):
            raise TypeError

        super(self.__class__, self).__init__(lang.OUTPUT, tensor_type)

        if io_index < 0 or io_index > 2**32-1:
            raise ValueError
        self.proto_expr.io_index = io_index

        super(self.__class__, self)._register()

    def gen_ptr(self):
        tipe = self.dtype.as_cstr()
        name = self.name
        elems = self.size
        p = string.Template('${tipe} ${name}[${elems}]').substitute(locals())

        return p

    @staticmethod
    def from_proto(proto, input_exprs):
        tt = TensorType.from_proto(proto.tensor_type)
        return OutputTensor(tt, proto.io_index)

    def gen_c(self):
        return ''


class _ConstScalar(Scalar):
    """
    A constant expression
    """
    def __init__(self, value):
        if type(value) is float:
            super(self.__class__, self).__init__(lang.CONST_SCALAR, float64)
            self.proto_expr.double_data.append(value)
        elif isinstance(value, six.integer_types):
            super(self.__class__, self).__init__(lang.CONST_SCALAR, int64)
            self.proto_expr.sint64_data.append(value)
        else:
            tipe = str(type(value))
            raise TypeError('Tried to wrap a '+tipe+' as a ConstScalar. Can only wrap an int or float')

        super(self.__class__, self)._register()

    def value(self):
        if self.proto_expr.dtype == lang.FLOAT64:
            return float(self.proto_expr.double_data[0])
        elif self.proto_expr.dtype == lang.INT64:
            return int(self.proto_expr.sint64_data[0])
        else:
            raise ValueError('Can only get a value from float64 or int64 constants.')

    @staticmethod
    def from_proto(proto, input_exprs):
        if proto.dtype == lang.FLOAT64:
            return _ConstScalar(float(proto.double_data[0]))
        elif proto.dtype == lang.FLOAT32:
            return _ConstScalar(float(proto.float_data[0]))
        elif proto.dtype == lang.INT64:
            return _ConstScalar(int(proto.sint64_data[0]))
        else:
            raise ValueError('Cannot recover constant scalar protobuf.')

    def gen_c(self):
        # return 'const ' + self.dtype.as_cstr() + ' ' + self.name + ' = ' + str(self.value()) + ';\n'
        return ''


class _ConstTensor(_TensorExpression, _Readable):
    """
    A constant tensor expression
    """

    # translation table between dtypes and retrieval function for the data container to use
    proto_data_lut = {
        float16: lambda x: x.float_data,
        float32: lambda x: x.float_data,
        float64: lambda x: x.double_data,
        int8: lambda x: x.sint32_data,
        int16: lambda x: x.sint32_data,
        int32: lambda x: x.sint32_data,
        int64: lambda x: x.sint64_data,
        uint8: lambda x: x.uint32_data,
        uint16: lambda x: x.uint32_data,
        uint32: lambda x: x.uint32_data,
        uint64: lambda x: x.uint64_data
    }

    def __init__(self, value):

        # use numpy functionality to convert lists and tuples to arrays
        if type(value) is list:
            array = np.array(value)
        elif type(value) is tuple:
            array = np.array(value)
        elif type(value) is np.ndarray:
            array = value
        elif isinstance(value, six.integer_types) or type(value) is float:
            array = np.array([value])
        else:
            raise TypeError('ConstTensors can wrap lists, tuples, and numpy arrays')

        super(self.__class__, self).__init__(lang.CONST_TENSOR, TensorType.like(array))

        # build up protobuf representation
        flat_data = array.flatten(order='C').tolist()
        vals = list(_ConstTensor.proto_data_lut.values())
        keys = list(_ConstTensor.proto_data_lut.keys())
        proto_data_retrieval = vals[keys.index(self.tensor_type.dtype)]
        proto_data = proto_data_retrieval(self.proto_expr)
        proto_data.extend(flat_data)

        super(self.__class__, self)._register()

    def to_array(self):
        vals = list(_ConstTensor.proto_data_lut.values())
        keys = list(_ConstTensor.proto_data_lut.keys())
        proto_data_retrieval = vals[keys.index(self.tensor_type.dtype)]
        proto_data = proto_data_retrieval(self.proto_expr)
        data = np.array(proto_data, dtype=self.dtype.as_numpy())
        return data

    @staticmethod
    def from_proto(proto, input_exprs):
        dtype = DType(proto.tensor_type.dtype)

        vals = list(_ConstTensor.proto_data_lut.values())
        keys = list(_ConstTensor.proto_data_lut.keys())
        proto_data_retrieval = vals[keys.index(dtype)]
        proto_data = proto_data_retrieval(proto)
        data = np.array(proto_data, dtype=dtype.as_numpy())
        return _ConstTensor(data)

    def gen_ptr(self):
        tipe = self.dtype.as_cstr()
        name = self.name
        elems = self.size

        return string.Template('const ${tipe} ${name}[${elems}]').substitute(locals())

    def gen_c(self):
        return ''


[docs]def position_in(workgroup_shape):
    """
    Define the workgroup shape and retrieve a tensor expression that refers to the current position in that
    workgroup shape.

    :param workgroup_shape: An iterable of ints defining the shape of the workgroup
    :return: a tensor expression which references the current workgroup position
    """
    return PositionTensor(workgroup_shape)


[docs]class PositionTensor(_TensorExpression, _Readable):
    """
    The position expression which refers to the current position within the workgroup shape
    """
    def __init__(self, workgroup_shape):

        if isinstance(workgroup_shape, six.integer_types):
            self.workgroup_shape = [workgroup_shape]
        else:
            try:
                for elem in workgroup_shape:
                    if not isinstance(elem, six.integer_types):
                        raise TypeError
            except TypeError:
                raise TypeError('workgroup_shape must be an int or an iterable of ints')

            self.workgroup_shape = workgroup_shape

        workgroup_dims = len(self.workgroup_shape)
        tensor_type = TensorType([workgroup_dims], uint32)

        super(self.__class__, self).__init__(lang.POSITION, tensor_type)

        self.proto_expr.uint32_data.extend(self.workgroup_shape)

        super(self.__class__, self)._register()

    @staticmethod
    def from_proto(proto, input_exprs):
        return PositionTensor(proto.uint32_data)

    def gen_ptr(self):
        tipe = self.dtype.as_cstr()
        name = self.name
        elems = self.size
        p = string.Template('${tipe} ${name}[${elems}]').substitute(locals())

    def gen_c(self):
        workgroup_block_size = [1]
        for cur_dim in range(self.size-1, 0, -1):
            workgroup_block_size.append(workgroup_block_size[-1]*self.workgroup_shape[cur_dim])
        workgroup_block_size.reverse()

        position_vals = []
        remainder = 'worker_index'
        for cur_block in workgroup_block_size:
            cur_index = '('+remainder+')/'+str(cur_block)
            position_vals.append(cur_index)
            remainder = remainder + ' % ' + str(cur_block)

        return 'const uint32_t position['+str(self.size)+'] = {' + _list_to_str(position_vals) + '};\n'


[docs]def variable(initial_value, dtype):
    """
    Function for declaring a new variable

    :param initial_value: The initial value of the variable
    :param dtype: The DType of the variable
    :return: The variable expression
    """
    if isinstance(initial_value, six.integer_types) or isinstance(initial_value, float):
        return Variable(dtype, _ConstScalar(initial_value))
    elif issubclass(initial_value.__class__, Scalar):
        var = Variable(dtype, _ConstScalar(0))
        var <<= initial_value
        return var
    else:
        raise TypeError('Must initialize a variable with a numeric constant or a scalar expression.')


[docs]class Variable(Scalar):
    """
    A variable expression
    """
    def __init__(self, dtype, intial_const):
        if not isinstance(intial_const, _ConstScalar):
            raise TypeError('Variables must be initialized with a constant scalar')
        if not isinstance(dtype, DType):
            raise TypeError('dtype must be a DType')

        super(self.__class__, self).__init__(lang.VARIABLE, dtype)

        self.input_exprs = [intial_const]

        super(self.__class__, self)._register()

[docs]    def __ilshift__(self, other):
        _AssignVariable(self, other)
        return self

    @staticmethod
    def from_proto(proto, input_exprs):
        return Variable(DType(proto.dtype), input_exprs[0])

    def gen_c(self):
        return self.dtype.as_cstr() + ' ' + self.name + ' = ' + self.input_exprs[0].name + ';\n'


[docs]def cast(value, dtype):
    """
    Cast a scalar expression as a new data type

    :param value: The scalar expression
    :param dtype: The new data type
    :return: The casted scalar expression
    """
    return _Cast(dtype, value)


class _Cast(Scalar):
    """
    The casting expression
    """
    def __init__(self, dtype, target):
        if not isinstance(dtype, DType):
            raise TypeError('dtype must be a DType')
        if not issubclass(target.__class__, Scalar):
            raise TypeError('Can only cast scalar expressions. Received ' + str(type(target)) + ': ' +
                            str(target))

        super(self.__class__, self).__init__(lang.CAST, dtype)
        self.input_exprs = [target]

        super(self.__class__, self)._register()

    @staticmethod
    def from_proto(proto, input_exprs):
        return _Cast(DType(proto.dtype), input_exprs[0])

    def gen_c(self):
        return self.dtype.as_cstr() + ' ' + self.name + ' = ' + self.input_exprs[0].name + ';\n'


class _AssignVariable(_Expression):
    """
    The variable assignment expression
    """
    def __init__(self, scalar_expr, value_expr):
        if not isinstance(scalar_expr, Variable):
            raise TypeError('Can only assign to a variable')

        if issubclass(value_expr.__class__, Scalar):
            value = value_expr
        else:
            value = _ConstScalar(value_expr)
            value = cast(value, scalar_expr.dtype)

        super(self.__class__, self).__init__(lang.ASSIGN_VARIABLE)

        t1 = scalar_expr.proto_expr.dtype
        t2 = value.proto_expr.dtype

        if not t1 == t2:
            t1_str = lang.DType.Name(t1)
            t2_str = lang.DType.Name(t2)
            raise TypeError('cannot assign ' + t2_str + ' to ' + t1_str + ' scalar')

        self.input_exprs = [scalar_expr, value]

        super(self.__class__, self)._register()

    @staticmethod
    def from_proto(proto, input_exprs):
        return _AssignVariable(input_exprs[0], input_exprs[1])

    def gen_c(self):
        return self.input_exprs[0].name + ' = ' + self.input_exprs[1].name + ';\n'


class _UnaryMath(Scalar):
    """
    Unary expressions which transform a single scalar expression
    """
    code_map = {
        lang.ACOS: {lang.FLOAT32: 'acosf', lang.FLOAT64: 'acos'},
        lang.ASIN: {lang.FLOAT32: 'asinf', lang.FLOAT64: 'asin'},
        lang.ATAN: {lang.FLOAT32: 'atanf', lang.FLOAT64: 'atan'},
        lang.COS: {lang.FLOAT32: 'cosf', lang.FLOAT64: 'cos'},
        lang.COSH: {lang.FLOAT32: 'coshf', lang.FLOAT64: 'cosh'},
        lang.SIN: {lang.FLOAT32: 'sinf', lang.FLOAT64: 'sin'},
        lang.SINH: {lang.FLOAT32: 'sinhf', lang.FLOAT64: 'sinh'},
        lang.TAN: {lang.FLOAT32: 'tanf', lang.FLOAT64: 'tan'},
        lang.TANH: {lang.FLOAT32: 'tanhf', lang.FLOAT64: 'tanh'},
        lang.EXP: {lang.FLOAT32: 'expf', lang.FLOAT64: 'exp'},
        lang.LOG: {lang.FLOAT32: 'logf', lang.FLOAT64: 'log'},
        lang.LOG10: {lang.FLOAT32: 'log10f', lang.FLOAT64: 'log10'},
        lang.SQRT: {lang.FLOAT32: 'sqrtf', lang.FLOAT64: 'sqrt'},
        lang.CEIL: {lang.FLOAT32: 'ceilf', lang.FLOAT64: 'ceil'},
        lang.FLOOR: {lang.FLOAT32: 'floorf', lang.FLOAT64: 'floor'},
        lang.ABS: {lang.FLOAT32: 'fabsf', lang.FLOAT64: 'fabs', lang.INT8: 'abs_8',
                   lang.INT16: 'abs_16', lang.INT32: 'abs', lang.INT64: 'labs'},
        lang.NEGATE: {lang.FLOAT32: '-', lang.FLOAT64: '-',
                      lang.INT8: '-', lang.INT16: '-', lang.INT32: '-', lang.INT64: '-'},
        lang.NOT: {lang.FLOAT32: '!', lang.FLOAT64: '!',
                      lang.INT8: '!', lang.INT16: '!', lang.INT32: '!', lang.INT64: '!',
                      lang.UINT8: '!', lang.UINT16: '!', lang.UINT32: '!', lang.UINT64: '!'},
        lang.ISINF: {lang.FLOAT32: 'isinf', lang.FLOAT64: 'isinf'},
        lang.ISFINITE: {lang.FLOAT32: 'isfinite', lang.FLOAT64: 'isfinite'},
        lang.ISNAN: {lang.FLOAT32: 'isnan', lang.FLOAT64: 'isnan'}
    }

    def __init__(self, arg, expr_code):
        if expr_code not in list(_UnaryMath.code_map.keys()):
            raise ValueError(lang.ExpressionCode.Name(expr_code) + 'is an invalid unary math code.')

        if arg.dtype.proto_dtype not in list(_UnaryMath.code_map[expr_code].keys()):
            raise ValueError(lang.DType.Name(arg.dtype.proto_dtype) +
                             ' arguments not supported for unary math function ' +
                             lang.ExpressionCode.Name(expr_code))

        if not issubclass(arg.__class__, Scalar):
            raise TypeError('Must apply math functions to scalar expressions. Received: ' + str(arg))

        super(self.__class__, self).__init__(expr_code, arg.dtype)

        self.input_exprs = [arg]
        super(self.__class__, self)._register()

    @staticmethod
    def from_proto(proto, input_exprs):
        return _UnaryMath(input_exprs[0], proto.code)

    def gen_c(self):
        func_string = _UnaryMath.code_map[self.proto_expr.code][self.proto_expr.dtype]
        return self.dtype.as_cstr() + ' ' + self.name + ' = ' + func_string + '(' + self.input_exprs[0].name + ');\n'


[docs]def arccos(x):
    return _UnaryMath(x, lang.ACOS)


[docs]def arcsin(x):
    return _UnaryMath(x, lang.ASIN)


[docs]def arctan(x):
    return _UnaryMath(x, lang.ATAN)


[docs]def cos(x):
    return _UnaryMath(x, lang.COS)


[docs]def cosh(x):
    return _UnaryMath(x, lang.COSH)


[docs]def sin(x):
    return _UnaryMath(x, lang.SIN)


[docs]def sinh(x):
    return _UnaryMath(x, lang.SINH)


[docs]def tan(x):
    return _UnaryMath(x, lang.TAN)


[docs]def tanh(x):
    return _UnaryMath(x, lang.TANH)


[docs]def exp(x):
    return _UnaryMath(x, lang.EXP)


[docs]def log(x):
    return _UnaryMath(x, lang.LOG)


[docs]def log10(x):
    return _UnaryMath(x, lang.LOG10)


[docs]def sqrt(x):
    return _UnaryMath(x, lang.SQRT)


[docs]def ceil(x):
    return _UnaryMath(x, lang.CEIL)


[docs]def floor(x):
    return _UnaryMath(x, lang.FLOOR)


[docs]def absolute(x):
    return _UnaryMath(x, lang.ABS)


[docs]def logical_not(x):
    return _UnaryMath(x, lang.NOT)


[docs]def isinf(x):
    return _UnaryMath(x, lang.ISINF)


[docs]def isfinite(x):
    return _UnaryMath(x, lang.ISFINITE)


[docs]def isnan(x):
    return _UnaryMath(x, lang.ISNAN)


class _Limits(Scalar):
    """
    A limit expression for floating point types
    """
    code_map = {
        lang.MIN_VALUE: {lang.FLOAT32: 'FLT_MIN', lang.FLOAT64: 'DBL_MIN'},
        lang.MAX_VALUE: {lang.FLOAT32: 'FLT_MAX', lang.FLOAT64: 'DBL_MAX'},
        lang.EPSILON: {lang.FLOAT32: 'FLT_EPSILON', lang.FLOAT64: 'DBL_EPSILON'},
    }

    def __init__(self, expr_code, t):
        if expr_code not in list(_Limits.code_map.keys()):
            raise ValueError(lang.ExpressionCode.Name(expr_code) + 'is an invalid limits code.')

        if not issubclass(t.__class__, DType):
            raise TypeError('Must apply limits functions to dtypes. Received: ' + str(t))

        if t.as_proto() not in list(_Limits.code_map[expr_code].keys()):
            raise ValueError(str(t) +
                             ' arguments not supported for limits function ' +
                             lang.ExpressionCode.Name(expr_code))
        super(self.__class__, self).__init__(expr_code, t)

        self.name = _Limits.code_map[expr_code][t.as_proto()]
        super(self.__class__, self)._register()


    @staticmethod
    def from_proto(proto, input_exprs):
        return _Limits(proto.code, DType(proto.dtype))

    def gen_c(self):
        return ''

[docs]def min_value(dtype):
    """
    Function for getting the minimum normalized positive value of
    floating point types

    :param dtype: The DType of the variable
    :return: minimum value for dtype
    """
    return _Limits(lang.MIN_VALUE, dtype)

[docs]def max_value(dtype):
    """
    Function for getting the maximum value of floating point types

    :param dtype: The DType of the variable
    :return: maximum value for dtype
    """
    return _Limits(lang.MAX_VALUE, dtype)

[docs]def epsilon(dtype):
    """
    Function for getting difference between 1.0 and the next representable
    value for floating point types

    :param dtype: The DType of the variable
    :return: epsilon value for dtype
    """
    return _Limits(lang.EPSILON, dtype)


class _BinaryMath(Scalar):
    """
    Binary expressions which transform two scalars into another
    """
    code_map = {
        lang.ADD: {},
        lang.SUBTRACT: {},
        lang.MULTIPLY: {},
        lang.DIVIDE: {},
        lang.MODULO: {},
        lang.EQUAL: {},
        lang.NOTEQUAL: {},
        lang.LESS: {},
        lang.LESS_EQ: {},
        lang.GREATER: {},
        lang.GREATER_EQ: {},
        lang.MIN: {},
        lang.MAX: {},
        lang.AND: {},
        lang.OR: {},
        lang.POW: {lang.FLOAT32: lambda x, y: 'powf('+x+','+y+')',
                   lang.FLOAT64: lambda x, y: 'pow('+x+','+y+')'},
        lang.ATAN2: {lang.FLOAT32: lambda x, y: 'atan2f('+x+','+y+')',
                     lang.FLOAT64: lambda x, y: 'atan2('+x+','+y+')'},
    }

    for cur_type in supported_types:
        code_map[lang.MIN][cur_type.proto_dtype] = lambda x, y: '((('+x+')<('+y+'))?('+x+'):('+y+'))'
        code_map[lang.MAX][cur_type.proto_dtype] = lambda x, y: '((('+x+')>('+y+'))?('+x+'):('+y+'))'
        code_map[lang.ADD][cur_type.proto_dtype] = lambda x, y: '(' + x + ' + ' + y + ')'
        code_map[lang.SUBTRACT][cur_type.proto_dtype] = lambda x, y: '(' + x + ' - ' + y + ')'
        code_map[lang.MULTIPLY][cur_type.proto_dtype] = lambda x, y: '(' + x + ' * ' + y + ')'
        code_map[lang.DIVIDE][cur_type.proto_dtype] = lambda x, y: '(' + x + ' / ' + y + ')'
        code_map[lang.MODULO][cur_type.proto_dtype] = lambda x, y: '(' + x + ' % ' + y + ')'
        code_map[lang.EQUAL][cur_type.proto_dtype] = lambda x, y: '(' + x + ' == ' + y + ')'
        code_map[lang.NOTEQUAL][cur_type.proto_dtype] = lambda x, y: '(' + x + ' != ' + y + ')'
        code_map[lang.LESS][cur_type.proto_dtype] = lambda x, y: '(' + x + ' < ' + y + ')'
        code_map[lang.LESS_EQ][cur_type.proto_dtype] = lambda x, y: '(' + x + ' <= ' + y + ')'
        code_map[lang.GREATER][cur_type.proto_dtype] = lambda x, y: '(' + x + ' > ' + y + ')'
        code_map[lang.GREATER_EQ][cur_type.proto_dtype] = lambda x, y: '(' + x + ' >= ' + y + ')'
        code_map[lang.AND][cur_type.proto_dtype] = lambda x, y: '(' + x + ' && ' + y + ')'
        code_map[lang.OR][cur_type.proto_dtype] = lambda x, y: '(' + x + ' || ' + y + ')'

    code_map[lang.MODULO][float32.proto_dtype] = lambda x, y: 'fmodf('+x+','+y+')'
    code_map[lang.MODULO][float64.proto_dtype] = lambda x, y: 'fmod('+x+','+y+')'

    def __init__(self, arg1, arg2, expr_code):
        if expr_code not in list(_BinaryMath.code_map.keys()):
            raise ValueError('Invalid binary math code')
        code_str = lang.ExpressionCode.Name(expr_code)

        # first try to wrap args as constants
        try:
            arg1_wrapped = _ConstScalar(arg1)
        except TypeError:
            arg1_wrapped = arg1

        try:
            arg2_wrapped = _ConstScalar(arg2)
        except TypeError:
            arg2_wrapped = arg2

        # throw error if received a non-expression that could not be wrapped as constant
        if not issubclass(arg1_wrapped.__class__, _Expression):
            raise TypeError('Cannot apply ' + code_str + ' to first non-expression argument:\n' + str(arg1_wrapped))
        if not issubclass(arg2_wrapped.__class__, _Expression):
            raise TypeError('Cannot apply ' + code_str + ' to second non-expression argument:\n' + str(arg2_wrapped))

        # throw error if received a non-scalar expression
        if not issubclass(arg1_wrapped.__class__, Scalar):
            raise TypeError('First argument to ' + code_str + ' must be a scalar expression, got:\n' + str(arg1_wrapped))
        if not issubclass(arg2_wrapped.__class__, Scalar):
            raise TypeError('Second argument to ' + code_str + ' must be a scalar expression, got:\n' + str(arg2_wrapped))

        # cast constants according to the type of the other input
        arg1_is_constant = type(arg1_wrapped) == _ConstScalar
        arg2_is_constant = type(arg2_wrapped) == _ConstScalar

        if not arg1_is_constant and not arg2_is_constant:
            arg1_expr = arg1_wrapped
            arg2_expr = arg2_wrapped
        elif not arg1_is_constant and arg2_is_constant:
            arg1_expr = arg1_wrapped
            arg2_expr = cast(arg2_wrapped, arg1_wrapped.dtype)
        elif arg1_is_constant and not arg2_is_constant:
            arg1_expr = cast(arg1_wrapped, arg2_wrapped.dtype)
            arg2_expr = arg2_wrapped
        else:
            raise TypeError('Cannot apply binary operator to two constants.')

        t1 = arg1_expr.proto_expr.dtype
        t2 = arg2_expr.proto_expr.dtype
        if not t1 == t2:
            t1_str = lang.DType.Name(t1)
            t2_str = lang.DType.Name(t2)
            raise TypeError('arg1 type (' + t1_str + ') must be the same as arg2 type (' + t2_str + ')')

        if arg1_expr.dtype.proto_dtype not in list(_BinaryMath.code_map[expr_code].keys()):
            raise ValueError(lang.DType.Name(arg1_expr.dtype.proto_dtype) +
                             ' arguments not supported for binary math function ' +
                             lang.ExpressionCode.Name(expr_code))

        super(self.__class__, self).__init__(expr_code, arg1_expr.dtype)

        self.input_exprs = [arg1_expr, arg2_expr]
        super(self.__class__, self)._register()

    @staticmethod
    def from_proto(proto, input_exprs):
        return _BinaryMath(input_exprs[0], input_exprs[1], proto.code)

    def gen_c(self):
        func = _BinaryMath.code_map[self.proto_expr.code][self.dtype.proto_dtype]
        func_str = func(self.input_exprs[0].name, self.input_exprs[1].name)
        return self.dtype.as_cstr() + ' ' + self.name + ' = ' + func_str + ';\n'


[docs]def minimum(x, y):
    return _BinaryMath(x, y, lang.MIN)


[docs]def maximum(x, y):
    return _BinaryMath(x, y, lang.MAX)


[docs]def power(x, y):
    return _BinaryMath(x, y, lang.POW)


[docs]def arctan2(x, y):
    return _BinaryMath(x, y, lang.ATAN2)


[docs]def logical_and(x, y):
    return _BinaryMath(x, y, lang.AND)


[docs]def logical_or(x, y):
    return _BinaryMath(x, y, lang.OR)


[docs]class LocalTensor(_TensorExpression, _Readable, _Writable):
    """
    Expression which references a worker-local tensor
    """
    def __init__(self, initial_value):

        if type(initial_value) is not _ConstTensor:
            raise TypeError('Tensors must be initialized by ConstTensors')

        super(self.__class__, self).__init__(lang.TENSOR, initial_value.tensor_type)

        self.input_exprs = [initial_value]

        super(self.__class__, self)._register()

    @staticmethod
    def from_proto(proto, input_exprs):
        return LocalTensor(input_exprs[0])

    def gen_ptr(self):
        tipe = self.dtype.as_cstr()
        name = self.name
        elems = self.size
        return string.Template('${tipe} ${name}[${elems}]').substitute(locals())

    def gen_c(self):
        return self.gen_ptr() + ' = ' + self.input_exprs[0].name + ';\n'


[docs]def zeros(shape, dtype):
    """
    Declare a new worker-local tensor with all elements initialized to zero.

    :param shape: the tensor shape
    :param dtype: the tensor data type
    :return: the tensor expression
    """
    np_dtype = DType(dtype).as_numpy()
    init = _ConstTensor(np.zeros(shape, dtype=np_dtype))
    return LocalTensor(init)


[docs]def ones(shape, dtype):
    """
    Declare a new worker-local tensor with all elements initialized to one.

    :param shape: the tensor shape
    :param dtype: the tensor data type
    :return: the tensor expression
    """
    np_dtype = DType(dtype).as_numpy()
    init = _ConstTensor(np.ones(shape, dtype=np_dtype))
    return LocalTensor(init)


def _check_index(target_expr, index_expr):
    """
    helper function for making sure that an index is valid
    :param target_expr: the target tensor
    :param index_expr: the index
    :return: the index, wrapped as an expression if necessary
    """

    if issubclass(index_expr.__class__, _Expression):
        index = index_expr
    else:
        index = _ConstScalar(index_expr)

    if index.proto_expr.dtype is lang.UNDEFINED_TYPE:
        raise TypeError('Can only index with a scalar.')

    if type(index) is _ConstScalar:
        if target_expr.size <= index.value() or index.value() < 0:
            raise IndexError('Index out of bounds.')

    return index


class _AssignTensor(_Expression):
    """
    Expression for assigning to tensors
    """
    def __init__(self, tensor_expr, index_expr, value_expr):
        super(self.__class__, self).__init__(lang.ASSIGN_TENSOR)

        if not issubclass(tensor_expr.__class__, _Writable):
            raise TypeError('Can only assign to writable tensors.')

        index = _check_index(tensor_expr, index_expr)

        # try to wrap value as an expression if it's not
        if issubclass(value_expr.__class__, _Expression):
            value = value_expr
        else:
            value = _ConstScalar(value_expr)
            value = cast(value, tensor_expr.dtype)

        # make sure that value is same type as tensor
        t1 = tensor_expr.proto_expr.tensor_type.dtype
        t2 = value.proto_expr.dtype
        if not t1 == t2:
            t1_str = lang.DType.Name(t1)
            t2_str = lang.DType.Name(t2)
            raise TypeError('cannot assign ' + t2_str + ' to ' + t1_str + ' tensor')

        self.input_exprs = [tensor_expr, index, value]
        super(self.__class__, self)._register()

    @staticmethod
    def from_proto(proto, input_exprs):
        return _AssignTensor(input_exprs[0], input_exprs[1], input_exprs[2])

    def gen_c(self):
        return self.input_exprs[0].name + '[' + self.input_exprs[1].name + '] = ' + self.input_exprs[2].name + ';\n'


class _ReadTensor(Scalar):
    """
    Expression for reading from tensors
    """
    def __init__(self, tensor_expr, index_expr):
        if not issubclass(tensor_expr.__class__, _Readable):
            raise TypeError('Can only index a readable tensor.')

        index = _check_index(tensor_expr, index_expr)

        super(self.__class__, self).__init__(lang.READ_TENSOR, tensor_expr.dtype)

        self.input_exprs = [tensor_expr, index]
        super(self.__class__, self)._register()

    @staticmethod
    def from_proto(proto, input_exprs):
        return _ReadTensor(input_exprs[0], input_exprs[1])

    def gen_c(self):
        return self.dtype.as_cstr() + ' ' + self.name + ' = ' + self.input_exprs[0].name + '['+self.input_exprs[1].name+'];\n'


[docs]def arange(start, stop=None, step=None):
    """
    Create an iterator to iterate over a range

    :param start: The starting point in the iterator
    :param stop: The stopping point in the iterator
    :param step: The iterator step size
    :return: None

    :Example:

    usage for accumulating a variable to 10::

        accum = variable(0, uint64)
        for i in arange(10):
            accum <<= accum + 1

    """
    if stop is None:
        start_inferred = 0
        stop_inferred = start
    else:
        start_inferred = start
        stop_inferred = stop

    if step is None:
        step_inferred = 1
    else:
        step_inferred = step

    # try to cast all non-expressions as constants
    input_exprs = []
    first_type = None
    for val in [start_inferred, stop_inferred, step_inferred]:
        if issubclass(val.__class__, _Expression):
            input_exprs.append(val)
            if first_type is None:
                first_type = val.dtype
        else:
            input_exprs.append(_ConstScalar(val))

    if first_type is None:
        first_type = _ConstScalar(start).dtype

    # cast all constants as the first dtype
    cast_exprs = []
    for expr in input_exprs:
        if type(expr) is _ConstScalar:
            cast_exprs.append(cast(expr, first_type))
        else:
            cast_exprs.append(expr)

    index = variable(0, first_type)
    return _Range(index, cast_exprs[0], cast_exprs[1], cast_exprs[2])


class _Range(_Expression, six.Iterator):
    """
    A range expression
    """
    def __init__(self, index, start, stop, step):

        self.block_done = False

        first_type = index.dtype
        for expr in [start, stop, step]:
            if expr.dtype != first_type:
                raise TypeError('All input expressions must have the same type.')

        super(self.__class__, self).__init__(lang.RANGE)
        self.input_exprs = [index, start, stop, step]
        super(self.__class__, self)._register()

    def __iter__(self):
        return self

    def __next__(self):
        if not self.block_done:
            self.block_done = True
            return self.input_exprs[0]
        else:
            _EndRange()
            raise StopIteration

    @staticmethod
    def from_proto(proto, input_exprs):
        return _Range(*input_exprs)

    def gen_c(self):
        index_name = self.input_exprs[0].name
        start_name = self.input_exprs[1].name
        stop_name = self.input_exprs[2].name
        step_name = self.input_exprs[3].name

        for_string = 'for(${index_name} = ${start_name}; ' \
                     '((${index_name} < ${stop_name})&&(${step_name}>0)) || ' \
                     '((${index_name} > ${stop_name})&&(${step_name}<0)); ' \
                     '${index_name}+=${step_name}){\n'
        return string.Template(for_string).substitute(locals())


class _EndRange(_Expression):
    """
    The end range expression
    """
    def __init__(self):
        super(self.__class__, self).__init__(lang.ENDRANGE)
        super(self.__class__, self)._register()

    @staticmethod
    def from_proto(proto, input_exprs):
        return _EndRange()

    def gen_c(self):
        return '}\n'


[docs]def if_(condition):
    """
    conditional execution, must be used as part of a ``with`` block

    :param condition: The condition under which to execute the body of the with block

    :Example:

    Clip ``input_tensor`` to a maximum value of 1::

        y = variable(0, input_tensor.dtype)
        y = input_tensor[some_index]
        with if_(y > 1):
            y <<= 1
        output_tensor[some_index] = y
    """
    return _If(condition)


class _If(_Expression):
    """
    The if expression
    """
    def __init__(self, condition):
        if not issubclass(condition.__class__, Scalar):
            if isinstance(condition, bool):
                raise TypeError('Attempting to use a constant boolean, %s, with the operator if_ expression. Use the '
                                'python if instead since this can be interpreted at operator '
                                'definition time.' % condition)
            raise TypeError('Condition must be a scalar expression, instead got: ' + str(condition))

        super(self.__class__, self).__init__(lang.IF)
        self.input_exprs = [condition]
        super(self.__class__, self)._register()

    @staticmethod
    def from_proto(proto, input_exprs):
        return _If(input_exprs[0])

    def gen_c(self):
        return 'if('+self.input_exprs[0].name+'){\n'

    def __enter__(self):
        pass

    def __exit__(self, exc_type, exc_val, exc_tb):
        _EndIf()


[docs]def elif_(condition):
    """
    else if conditional execution, must be used as part of a ``with`` block and must come directly after
    another if or else if block.

    :param condition: The condition under which to execute the body of the with block

    :Example:

    Clip ``input_tensor`` to a maximum value of 1 and a minimum value of -1::

        y = variable(0, input_tensor.dtype)
        y = input_tensor[some_index]
        with if_(y > 1):
            y <<= 1
        with elif_(y <-1):
            y <<= -1
        output_tensor[some_index] = y

    :param condition: The condition under which to execute the body of the with block
    :return: None
    """
    return _ElseIf(condition)


class _ElseIf(_Expression):
    """
    The elif expression
    """
    def __init__(self, condition):
        if not issubclass(condition.__class__, Scalar):
            raise TypeError('Condition must be a scalar expression')

        super(self.__class__, self).__init__(lang.ELSEIF)
        self.input_exprs = [condition]
        super(self.__class__, self)._register()

    @staticmethod
    def from_proto(proto, input_exprs):
        return _ElseIf(input_exprs[0])

    def gen_c(self):
        return '}\nelse if('+self.input_exprs[0].name+'){\n'

    def __enter__(self):
        ExpressionDAG.remove_endif()

    def __exit__(self, exc_type, exc_val, exc_tb):
        _EndIf()


[docs]def else_():
    """
    else conditional execution, must be used as part of a ``with`` block and must come directly after
    another if or else if block.

    :Example:

    Clip ``input_tensor`` to a maximum value of 1 and a minimum value of -1, and zero it
    out if it is within that range::

        y = variable(0, input_tensor.dtype)
        with if_(y > 1):
            y <<= 1
        with elif_(y <-1):
            y <<= -1
        with else_():
            y <<= 0
        output_tensor[some_index] = y

    """
    return _Else()


class _Else(_Expression):
    """
    The else expression
    """
    def __init__(self):
        super(self.__class__, self).__init__(lang.ELSE)
        super(self.__class__, self)._register()

    @staticmethod
    def from_proto(proto, input_exprs):
        return _Else()

    def gen_c(self):
        return '}\nelse{\n'

    def __enter__(self):
        ExpressionDAG.remove_endif()

    def __exit__(self, exc_type, exc_val, exc_tb):
        _EndIf()


class _EndIf(_Expression):
    """
    The endif expression
    """
    def __init__(self):
        super(self.__class__, self).__init__(lang.ENDIF)
        super(self.__class__, self)._register()

    @staticmethod
    def from_proto(proto, input_exprs):
        return _EndIf()

    def gen_c(self):
        return '}\n'