Source code for examples.test_graph

# Copyright 2016 Hewlett Packard Enterprise Development LP
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for
# the specific language governing permissions and limitations under the License.


import unittest
import numpy as np
import opveclib as ovl


@ovl.operator()
def triangles_op(startEdge, fromVertex, toVertex):
    """Counts the triangles in an undirected graph.

    Notice that this method assumes that the graph is given as an adjacency list where all lists with vertex neighbors
    are sorted.

    The parallel algorithm uses the following strategy. We map one thread per edge, This is also called the edge-based
    iterator strategy.

    The idea behind the algorithm is:
        1. Go over all edges (u, v).
        2. The neighboring indices for vertex u are N(u) and for vertex v are N(v).
        3. Increment the triangle counter by | N(u) /\ N(v) | where /\ is the set intersection operator.

    We enforce an order on the vertices that avoids counting the same triangle three times, instead each triangle is
    counted once.

    Attributes: None.

    The array toVertex is a flattened list of lists structure, where startEdge encodes the start indices of the
    separate lists.

    :param startEdge: Indices into toVertex where edges start.
    :type startEdge: list.
    :param fromVertex: The from-vertex of each edge.
    :type fromVertex: list.
    :param toVertex: The to-vertex of each edge.
    :type toVertex: list.
    :return: Counts of triangles per edge.
    """
    iEdge           = ovl.position_in(toVertex.shape)[0]
    count           = ovl.output(toVertex.shape, ovl.uint64)
    nTriangle       = ovl.variable(0, ovl.uint64)

    iFromVertex     = ovl.variable(fromVertex[iEdge], fromVertex.dtype)
    iFromEdge       = ovl.variable(startEdge[iFromVertex], startEdge.dtype)
    iFromEdgeEnd    = ovl.variable(startEdge[iFromVertex + 1], startEdge.dtype)
    iiFromVertex    = ovl.variable(toVertex[iFromEdge], toVertex.dtype)

    iToVertex       = ovl.variable(toVertex[iEdge], toVertex.dtype)
    iToEdge         = ovl.variable(startEdge[iToVertex], startEdge.dtype)
    iToEdgeEnd      = ovl.variable(startEdge[iToVertex + 1], startEdge.dtype)
    iiToVertex      = ovl.variable(toVertex[iToEdge], toVertex.dtype)

    nMerge          = iToEdgeEnd-iToEdge + iFromEdgeEnd-iFromEdge # Maximum number of merges.

    # This construction is a work-around for simulating the function of a while loop.
    #TODO(raudies@hpe.com): Replace this construct by a while loop once it is available in ovl.
    for iMerge in ovl.arange(nMerge):
        doMerge = ovl.logical_and(iFromEdge < iFromEdgeEnd, iToEdge < iToEdgeEnd)
        doMerge = ovl.logical_and(doMerge, iiFromVertex < iToVertex)

        with ovl.if_(doMerge):

            with ovl.if_(iiFromVertex < iiToVertex):
                iFromEdge <<= iFromEdge+1
                iiFromVertex <<= toVertex[iFromEdge]

            with ovl.elif_(iiFromVertex > iiToVertex):
                iToEdge <<= iToEdge+1
                iiToVertex <<= toVertex[iToEdge]

            with ovl.else_():
                nTriangle <<= nTriangle+1
                iFromEdge <<= iFromEdge+1
                iToEdge <<= iToEdge+1
                iiFromVertex <<= toVertex[iFromEdge]
                iiToVertex <<= toVertex[iToEdge]


    #TODO(raudies@hpe.com): Use a reduction function that computes a partial or complete sum.
    count[iEdge] = nTriangle # Save the triangles for each edge.

    return count


[docs]def triangles(startEdge, fromVertex, toVertex, target_language='cpp'): """Count the triangles on the GPU. The array toVertex is a flattened list of lists structure, where startEdge encodes the start indices of the lists. :param startEdge: Indices into toVertex where edges start. :type startEdge: list. :param fromVertex: The from-vertex of each edge. :type fromVertex: list. :param toVertex: The to-vertex of each edge. :type toVertex: list. :return: Triangle count of graph. :Examples: .. doctest:: >>> from opveclib.examples.test_graph import load_graph_from_text_file, write_example_graph_to_text_file, triangles >>> tmpName = "/tmp/v7e20.txt" >>> write_example_graph_to_text_file(tmpName) >>> startEdge, fromVertex, toVertex = load_graph_from_text_file(tmpName) >>> triangles(startEdge, fromVertex, toVertex) 3 """ if not ovl.local.cuda_enabled and target_language == 'cuda': ovl.logger.debug("triangles tried to use cuda which is not enabled.") return 0 count = ovl.evaluate(triangles_op(startEdge, fromVertex, toVertex), target_language=target_language) return np.sum(count, axis=0, dtype=np.uint64)
def reference(startEdge, fromVertex, toVertex): """Count the triangles using python. This is a reference implementation of the operator function in python. The array toVertex is a flattened list of lists structure, where startEdge encodes the start indices of the lists. :param startEdge: Indices into toVertex where edges start. :type startEdge: list. :param fromVertex: The from-vertex of each edge. :type fromVertex: list. :param toVertex: The to-vertex of each edge. :type toVertex: list. :return: Triangle count of graph. """ assert len(fromVertex)==len(toVertex), "From vertex has %d entries that must match %d entries in to vertex!" \ % (len(fromVertex), len(toVertex)) nEdge = len(fromVertex) nTriangle = 0 for iEdge in range(nEdge): iFromVertex = int(fromVertex[iEdge]) iFromEdge = int(startEdge[iFromVertex]) iFromEdgeEnd = int(startEdge[iFromVertex+1]) iiFromVertex = int(toVertex[iFromEdge]) iToVertex = int(toVertex[iEdge]) iToEdge = int(startEdge[iToVertex]) iToEdgeEnd = int(startEdge[iToVertex+1]) iiToVertex = int(toVertex[iToEdge]) while ((iFromEdge < iFromEdgeEnd) & (iToEdge < iToEdgeEnd) & (iiFromVertex < iToVertex)): if (iiFromVertex < iiToVertex): iFromEdge += 1 iiFromVertex = int(toVertex[iFromEdge]) elif (iiFromVertex > iiToVertex): iToEdge += 1 iiToVertex = int(toVertex[iToEdge]) else: iFromEdge += 1 iToEdge += 1 nTriangle += 1 iiFromVertex = int(toVertex[iFromEdge]) iiToVertex = int(toVertex[iToEdge]) return nTriangle def write_example_graph_to_text_file(fileName): """"Writes an example graph to file. :param fileName: The path + name of the ascii text file to hold the edge list of the graph. :type fileName: String. """ # The edge list of the graph with 7 vertices and 20 edges. edgeList = [[0,1], [1,2], [2,3], [3,4], [4,5], [5,6], [0,6], [1,3], [3,5], [1,6], [1,0], [2,1], [3,2], [4,3], [5,4], [6,5], [6,0], [3,1], [5,3], [6,1]] # Write the graph to a text file. with open(fileName, "w") as file: nEdge = len(edgeList) for iEdge in range(nEdge): file.write("%d\t%d\n" % (edgeList[iEdge][0], edgeList[iEdge][1])) def load_graph_from_text_file(fileName): """Load a graph from a text file where each line has a pair of numbers representing (from,to) of an edge. :param fileName: The path + name of the ascii text file to read. :type fileName: str. :return: Three lists startEdge, fromVertex, toVertex -- where startEdge contains the start/end indices of single lists within fromVertex and toVertex. The list fromVertex contains the from vertices and the list toVertex contains the list toVertex. The memory footprint of this adjacency list representation is (|V| + 1 + 2|E|), where |V| is the number of vertices and |E| is the number of edges. For an example of such a list representation, assume that vertices are given by v0...vn. Then the simplest representation is a list of lists representation such as index | list entries v0 | v1, v3, v7, v11, v99 // 5 entries v1 | v8, v9, v15, v32, v20, v34 // 6 entries v2 | v0, v1 // 2 entries ... | ... vn-1 | v3, v7, v87, v55 // 4 entries We use a first array (ARRAY I) of length |V|+1 to represent the start indices for neighbor indices of a second array. This second array (ARRAY II) contains all the to vertex indices and is |E| long. Our above example has the following arrays: ARRAY I: for the start indices: ------------------------------------------------------- | index | 0 | 1 | 2 | 3 | ... | nVertex-1 | nVertex | | start | 0 | 5 | 11 | 13 | ... | nEdge-4 | nEdge | ------------------------------------------------------- ARRAY II: for the TO vertices. ------------------------------------------------------------------------------- | index | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | ... | | to vertex | 1 | 3 | 7 | 11 | 99 | 8 | 9 | 15 | 32 | 20 | 34 | 0 | 1 | ... | ------------------------------------------------------------------------------- """ # ****************************************************************************************************************** # Estimate the maximum number of edges based on the number of lines in the file. # ****************************************************************************************************************** blockSize = 65536 nEdge = 0 with open(fileName, "rb") as fHandle: block = fHandle.read(blockSize) while block: nEdge += block.count(b'\n') block = fHandle.read(blockSize) # ****************************************************************************************************************** # Read the edges from the file. # ****************************************************************************************************************** edges = np.zeros((nEdge, 2), dtype=np.uint64) nEdge = 0 nVertex = 0 for line in open(fileName, 'r'): if len(line)==0 or line.startswith('#'): continue tokens = line.split('\t') assert len(tokens)==2, "Found %d tokens in line %s but expected %d tokens." % (len(tokens), line, 2) iFrom = int(tokens[0]) iTo = int(tokens[1]) edges[nEdge,0] = iFrom edges[nEdge,1] = iTo nEdge += 1 nVertex = max(nVertex, iFrom) nVertex = max(nVertex, iTo) nVertex += 1 # Count 0th vertex index edges = edges[0:nEdge,:] # Re-index idToIndex = dict() iVertex = 0 for iEdge in range(nEdge): iFrom = edges[iEdge,0] iTo = edges[iEdge,1] if iFrom in idToIndex: iFrom = idToIndex[iFrom] else: idToIndex[iFrom] = iVertex iFrom = iVertex iVertex += 1 if iTo in idToIndex: iTo = idToIndex[iTo] else: idToIndex[iTo] = iVertex iTo = iVertex iVertex += 1 edges[iEdge,0] = iFrom edges[iEdge,1] = iTo # ****************************************************************************************************************** # Remove all double-linked edges. # ****************************************************************************************************************** edgeExists = set() # set of tuples nSelf = 0 nDouble = 0 iiEdge = 0 for iEdge in range(0, nEdge): iFrom = edges[iEdge,0] iTo = edges[iEdge,1] if iFrom==iTo: nSelf += 1 continue if iFrom > iTo: exists = (iFrom, iTo) in edgeExists if not exists: edgeExists.add((iFrom,iTo)) edges[iiEdge,0] = iFrom edges[iiEdge,1] = iTo iiEdge += 1 else: nDouble += 1 else: exists = (iTo, iFrom) in edgeExists if not exists: edgeExists.add((iTo,iFrom)) edges[iiEdge,0] = iTo edges[iiEdge,1] = iFrom iiEdge += 1 else: nDouble += 1 nEdge = iiEdge edges = edges[0:nEdge,:] # ****************************************************************************************************************** # Build up the adjacency list. # ****************************************************************************************************************** index = np.argsort(edges[:,0], axis=0) startEdge = np.zeros(nVertex+1, dtype=np.uint64) fromVertex = np.zeros(nEdge, dtype=np.uint64) toVertex = np.zeros(nEdge, dtype=np.uint64) iFromLast = 0 iVertex = 1 for iEdge in range(nEdge): iFrom = edges[index[iEdge],0] iTo = edges[index[iEdge],1] toVertex[iEdge] = iTo fromVertex[iEdge] = iFrom if iFrom > iFromLast: for iJump in range(int(iFrom-iFromLast)): startEdge[iVertex] = iEdge iVertex += 1 iFromLast = iFrom while iVertex <= nVertex: startEdge[iVertex] = nEdge iVertex += 1 # ****************************************************************************************************************** # Sort the neighbor lists. # ****************************************************************************************************************** iOffset = 0 for iVertex in range(nVertex): nNeighbor = int(startEdge[iVertex+1] - startEdge[iVertex]) toVertex[iOffset:iOffset+nNeighbor] = np.sort(toVertex[iOffset:iOffset+nNeighbor]) iOffset += nNeighbor return startEdge, fromVertex, toVertex class TestGraphTriangleCountOp(unittest.TestCase): """ Test cases for the triangle counting operator. """ def test(self): """ This test cases compares the numpy reference implementation and the opveclib implementation with the ground-truth count. """ # Specify the graph data. tmpName = "/tmp/v7e20.txt" nTriangle = 3 write_example_graph_to_text_file(tmpName) ovl.logger.info('Testing graph %s.' % tmpName) startEdge, fromVertex, toVertex = load_graph_from_text_file(tmpName) assert nTriangle == triangles(startEdge, fromVertex, toVertex) assert nTriangle == reference(startEdge, fromVertex, toVertex) if ovl.local.cuda_enabled: assert nTriangle == triangles(startEdge, fromVertex, toVertex, target_language='cuda') if __name__ == '__main__': ovl.clear_op_cache() unittest.main()