# Copyright 2016 Hewlett Packard Enterprise Development LP
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for
# the specific language governing permissions and limitations under the License.
import unittest
import numpy as np
import opveclib as ovl
@ovl.operator()
def triangles_op(startEdge, fromVertex, toVertex):
"""Counts the triangles in an undirected graph.
Notice that this method assumes that the graph is given as an adjacency list where all lists with vertex neighbors
are sorted.
The parallel algorithm uses the following strategy. We map one thread per edge, This is also called the edge-based
iterator strategy.
The idea behind the algorithm is:
1. Go over all edges (u, v).
2. The neighboring indices for vertex u are N(u) and for vertex v are N(v).
3. Increment the triangle counter by | N(u) /\ N(v) | where /\ is the set intersection operator.
We enforce an order on the vertices that avoids counting the same triangle three times, instead each triangle is
counted once.
Attributes: None.
The array toVertex is a flattened list of lists structure, where startEdge encodes the start indices of the
separate lists.
:param startEdge: Indices into toVertex where edges start.
:type startEdge: list.
:param fromVertex: The from-vertex of each edge.
:type fromVertex: list.
:param toVertex: The to-vertex of each edge.
:type toVertex: list.
:return: Counts of triangles per edge.
"""
iEdge = ovl.position_in(toVertex.shape)[0]
count = ovl.output(toVertex.shape, ovl.uint64)
nTriangle = ovl.variable(0, ovl.uint64)
iFromVertex = ovl.variable(fromVertex[iEdge], fromVertex.dtype)
iFromEdge = ovl.variable(startEdge[iFromVertex], startEdge.dtype)
iFromEdgeEnd = ovl.variable(startEdge[iFromVertex + 1], startEdge.dtype)
iiFromVertex = ovl.variable(toVertex[iFromEdge], toVertex.dtype)
iToVertex = ovl.variable(toVertex[iEdge], toVertex.dtype)
iToEdge = ovl.variable(startEdge[iToVertex], startEdge.dtype)
iToEdgeEnd = ovl.variable(startEdge[iToVertex + 1], startEdge.dtype)
iiToVertex = ovl.variable(toVertex[iToEdge], toVertex.dtype)
nMerge = iToEdgeEnd-iToEdge + iFromEdgeEnd-iFromEdge # Maximum number of merges.
# This construction is a work-around for simulating the function of a while loop.
#TODO(raudies@hpe.com): Replace this construct by a while loop once it is available in ovl.
for iMerge in ovl.arange(nMerge):
doMerge = ovl.logical_and(iFromEdge < iFromEdgeEnd, iToEdge < iToEdgeEnd)
doMerge = ovl.logical_and(doMerge, iiFromVertex < iToVertex)
with ovl.if_(doMerge):
with ovl.if_(iiFromVertex < iiToVertex):
iFromEdge <<= iFromEdge+1
iiFromVertex <<= toVertex[iFromEdge]
with ovl.elif_(iiFromVertex > iiToVertex):
iToEdge <<= iToEdge+1
iiToVertex <<= toVertex[iToEdge]
with ovl.else_():
nTriangle <<= nTriangle+1
iFromEdge <<= iFromEdge+1
iToEdge <<= iToEdge+1
iiFromVertex <<= toVertex[iFromEdge]
iiToVertex <<= toVertex[iToEdge]
#TODO(raudies@hpe.com): Use a reduction function that computes a partial or complete sum.
count[iEdge] = nTriangle # Save the triangles for each edge.
return count
[docs]def triangles(startEdge, fromVertex, toVertex, target_language='cpp'):
"""Count the triangles on the GPU.
The array toVertex is a flattened list of lists structure, where startEdge encodes the start indices of the lists.
:param startEdge: Indices into toVertex where edges start.
:type startEdge: list.
:param fromVertex: The from-vertex of each edge.
:type fromVertex: list.
:param toVertex: The to-vertex of each edge.
:type toVertex: list.
:return: Triangle count of graph.
:Examples:
.. doctest::
>>> from opveclib.examples.test_graph import load_graph_from_text_file, write_example_graph_to_text_file, triangles
>>> tmpName = "/tmp/v7e20.txt"
>>> write_example_graph_to_text_file(tmpName)
>>> startEdge, fromVertex, toVertex = load_graph_from_text_file(tmpName)
>>> triangles(startEdge, fromVertex, toVertex)
3
"""
if not ovl.local.cuda_enabled and target_language == 'cuda':
ovl.logger.debug("triangles tried to use cuda which is not enabled.")
return 0
count = ovl.evaluate(triangles_op(startEdge, fromVertex, toVertex), target_language=target_language)
return np.sum(count, axis=0, dtype=np.uint64)
def reference(startEdge, fromVertex, toVertex):
"""Count the triangles using python.
This is a reference implementation of the operator function in python.
The array toVertex is a flattened list of lists structure, where startEdge encodes the start indices of the lists.
:param startEdge: Indices into toVertex where edges start.
:type startEdge: list.
:param fromVertex: The from-vertex of each edge.
:type fromVertex: list.
:param toVertex: The to-vertex of each edge.
:type toVertex: list.
:return: Triangle count of graph.
"""
assert len(fromVertex)==len(toVertex), "From vertex has %d entries that must match %d entries in to vertex!" \
% (len(fromVertex), len(toVertex))
nEdge = len(fromVertex)
nTriangle = 0
for iEdge in range(nEdge):
iFromVertex = int(fromVertex[iEdge])
iFromEdge = int(startEdge[iFromVertex])
iFromEdgeEnd = int(startEdge[iFromVertex+1])
iiFromVertex = int(toVertex[iFromEdge])
iToVertex = int(toVertex[iEdge])
iToEdge = int(startEdge[iToVertex])
iToEdgeEnd = int(startEdge[iToVertex+1])
iiToVertex = int(toVertex[iToEdge])
while ((iFromEdge < iFromEdgeEnd) & (iToEdge < iToEdgeEnd) & (iiFromVertex < iToVertex)):
if (iiFromVertex < iiToVertex):
iFromEdge += 1
iiFromVertex = int(toVertex[iFromEdge])
elif (iiFromVertex > iiToVertex):
iToEdge += 1
iiToVertex = int(toVertex[iToEdge])
else:
iFromEdge += 1
iToEdge += 1
nTriangle += 1
iiFromVertex = int(toVertex[iFromEdge])
iiToVertex = int(toVertex[iToEdge])
return nTriangle
def write_example_graph_to_text_file(fileName):
""""Writes an example graph to file.
:param fileName: The path + name of the ascii text file to hold the edge list of the graph.
:type fileName: String.
"""
# The edge list of the graph with 7 vertices and 20 edges.
edgeList = [[0,1],
[1,2],
[2,3],
[3,4],
[4,5],
[5,6],
[0,6],
[1,3],
[3,5],
[1,6],
[1,0],
[2,1],
[3,2],
[4,3],
[5,4],
[6,5],
[6,0],
[3,1],
[5,3],
[6,1]]
# Write the graph to a text file.
with open(fileName, "w") as file:
nEdge = len(edgeList)
for iEdge in range(nEdge):
file.write("%d\t%d\n" % (edgeList[iEdge][0], edgeList[iEdge][1]))
def load_graph_from_text_file(fileName):
"""Load a graph from a text file where each line has a pair of numbers representing (from,to) of an edge.
:param fileName: The path + name of the ascii text file to read.
:type fileName: str.
:return: Three lists startEdge, fromVertex, toVertex -- where startEdge contains the start/end indices of single
lists within fromVertex and toVertex. The list fromVertex contains the from vertices and the list toVertex
contains the list toVertex.
The memory footprint of this adjacency list representation is (|V| + 1 + 2|E|), where |V| is the number of vertices
and |E| is the number of edges.
For an example of such a list representation, assume that vertices are given by v0...vn. Then the simplest
representation is a list of lists representation such as
index | list entries
v0 | v1, v3, v7, v11, v99 // 5 entries
v1 | v8, v9, v15, v32, v20, v34 // 6 entries
v2 | v0, v1 // 2 entries
... | ...
vn-1 | v3, v7, v87, v55 // 4 entries
We use a first array (ARRAY I) of length |V|+1 to represent the start indices for neighbor indices of a second array.
This second array (ARRAY II) contains all the to vertex indices and is |E| long.
Our above example has the following arrays:
ARRAY I: for the start indices:
-------------------------------------------------------
| index | 0 | 1 | 2 | 3 | ... | nVertex-1 | nVertex |
| start | 0 | 5 | 11 | 13 | ... | nEdge-4 | nEdge |
-------------------------------------------------------
ARRAY II: for the TO vertices.
-------------------------------------------------------------------------------
| index | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | ... |
| to vertex | 1 | 3 | 7 | 11 | 99 | 8 | 9 | 15 | 32 | 20 | 34 | 0 | 1 | ... |
-------------------------------------------------------------------------------
"""
# ******************************************************************************************************************
# Estimate the maximum number of edges based on the number of lines in the file.
# ******************************************************************************************************************
blockSize = 65536
nEdge = 0
with open(fileName, "rb") as fHandle:
block = fHandle.read(blockSize)
while block:
nEdge += block.count(b'\n')
block = fHandle.read(blockSize)
# ******************************************************************************************************************
# Read the edges from the file.
# ******************************************************************************************************************
edges = np.zeros((nEdge, 2), dtype=np.uint64)
nEdge = 0
nVertex = 0
for line in open(fileName, 'r'):
if len(line)==0 or line.startswith('#'):
continue
tokens = line.split('\t')
assert len(tokens)==2, "Found %d tokens in line %s but expected %d tokens." % (len(tokens), line, 2)
iFrom = int(tokens[0])
iTo = int(tokens[1])
edges[nEdge,0] = iFrom
edges[nEdge,1] = iTo
nEdge += 1
nVertex = max(nVertex, iFrom)
nVertex = max(nVertex, iTo)
nVertex += 1 # Count 0th vertex index
edges = edges[0:nEdge,:]
# Re-index
idToIndex = dict()
iVertex = 0
for iEdge in range(nEdge):
iFrom = edges[iEdge,0]
iTo = edges[iEdge,1]
if iFrom in idToIndex:
iFrom = idToIndex[iFrom]
else:
idToIndex[iFrom] = iVertex
iFrom = iVertex
iVertex += 1
if iTo in idToIndex:
iTo = idToIndex[iTo]
else:
idToIndex[iTo] = iVertex
iTo = iVertex
iVertex += 1
edges[iEdge,0] = iFrom
edges[iEdge,1] = iTo
# ******************************************************************************************************************
# Remove all double-linked edges.
# ******************************************************************************************************************
edgeExists = set() # set of tuples
nSelf = 0
nDouble = 0
iiEdge = 0
for iEdge in range(0, nEdge):
iFrom = edges[iEdge,0]
iTo = edges[iEdge,1]
if iFrom==iTo:
nSelf += 1
continue
if iFrom > iTo:
exists = (iFrom, iTo) in edgeExists
if not exists:
edgeExists.add((iFrom,iTo))
edges[iiEdge,0] = iFrom
edges[iiEdge,1] = iTo
iiEdge += 1
else:
nDouble += 1
else:
exists = (iTo, iFrom) in edgeExists
if not exists:
edgeExists.add((iTo,iFrom))
edges[iiEdge,0] = iTo
edges[iiEdge,1] = iFrom
iiEdge += 1
else:
nDouble += 1
nEdge = iiEdge
edges = edges[0:nEdge,:]
# ******************************************************************************************************************
# Build up the adjacency list.
# ******************************************************************************************************************
index = np.argsort(edges[:,0], axis=0)
startEdge = np.zeros(nVertex+1, dtype=np.uint64)
fromVertex = np.zeros(nEdge, dtype=np.uint64)
toVertex = np.zeros(nEdge, dtype=np.uint64)
iFromLast = 0
iVertex = 1
for iEdge in range(nEdge):
iFrom = edges[index[iEdge],0]
iTo = edges[index[iEdge],1]
toVertex[iEdge] = iTo
fromVertex[iEdge] = iFrom
if iFrom > iFromLast:
for iJump in range(int(iFrom-iFromLast)):
startEdge[iVertex] = iEdge
iVertex += 1
iFromLast = iFrom
while iVertex <= nVertex:
startEdge[iVertex] = nEdge
iVertex += 1
# ******************************************************************************************************************
# Sort the neighbor lists.
# ******************************************************************************************************************
iOffset = 0
for iVertex in range(nVertex):
nNeighbor = int(startEdge[iVertex+1] - startEdge[iVertex])
toVertex[iOffset:iOffset+nNeighbor] = np.sort(toVertex[iOffset:iOffset+nNeighbor])
iOffset += nNeighbor
return startEdge, fromVertex, toVertex
class TestGraphTriangleCountOp(unittest.TestCase):
"""
Test cases for the triangle counting operator.
"""
def test(self):
"""
This test cases compares the numpy reference implementation and the opveclib implementation with the
ground-truth count.
"""
# Specify the graph data.
tmpName = "/tmp/v7e20.txt"
nTriangle = 3
write_example_graph_to_text_file(tmpName)
ovl.logger.info('Testing graph %s.' % tmpName)
startEdge, fromVertex, toVertex = load_graph_from_text_file(tmpName)
assert nTriangle == triangles(startEdge, fromVertex, toVertex)
assert nTriangle == reference(startEdge, fromVertex, toVertex)
if ovl.local.cuda_enabled:
assert nTriangle == triangles(startEdge, fromVertex, toVertex, target_language='cuda')
if __name__ == '__main__':
ovl.clear_op_cache()
unittest.main()