Source code for pyamg.graph

"""Algorithms related to graphs."""

from warnings import warn
import numpy as np
from scipy import sparse

from . import amg_core



[docs]
def asgraph(G):
    """Return (square) array as sparse.

    Parameters
    ----------
    G : sparray
        Sparse matrix.

    Returns
    -------
    csr_array or csc_array
        Converted array.

    """
    if not sparse.issparse(G) or G.format not in ('csc', 'csr'):
        G = sparse.csr_array(G)

    if G.shape[0] != G.shape[1]:
        raise ValueError('expected square matrix')

    return G




[docs]
def maximal_independent_set(G, algo='serial', k=None):
    """Compute a maximal independent vertex set for a graph.

    Parameters
    ----------
    G : sparray
        Symmetric matrix, preferably in sparse CSR or CSC format.
        The nonzeros of G represent the edges of an undirected graph.
    algo : {'serial', 'parallel'}
        Algorithm used to compute the MIS
            * serial   : greedy serial algorithm
            * parallel : variant of Luby's parallel MIS algorithm
    k : int
        Minimum separation between MIS vertices.

    Returns
    -------
    array
        - ``S[i] = 1`` if vertex i is in the MIS.
        - ``S[i] = 0`` otherwise.

    Notes
    -----
    Diagonal entries in the G (self loops) will be ignored.

    Luby's algorithm is significantly more expensive than the
    greedy serial algorithm.

    """
    G = asgraph(G)
    N = G.shape[0]

    mis = np.empty(N, dtype='intc')
    mis[:] = -1

    if k is None:
        if algo == 'serial':
            fn = amg_core.maximal_independent_set_serial
            fn(N, G.indptr, G.indices, -1, 1, 0, mis)
        elif algo == 'parallel':
            fn = amg_core.maximal_independent_set_parallel
            fn(N, G.indptr, G.indices, -1, 1, 0, mis, np.random.rand(N), -1)
        else:
            raise ValueError(f'Unknown algorithm ({algo})')
    else:
        fn = amg_core.maximal_independent_set_k_parallel
        fn(N, G.indptr, G.indices, k, mis, np.random.rand(N), -1)

    return mis




[docs]
def vertex_coloring(G, method='MIS'):
    """Compute a vertex coloring of a graph.

    Parameters
    ----------
    G : sparray
        Symmetric matrix, preferably in sparse CSR or CSC format
        The nonzeros of G represent the edges of an undirected graph.
    method : str
        Algorithm used to compute the vertex coloring:

            * 'MIS' - Maximal Independent Set
            * 'JP'  - Jones-Plassmann (parallel)
            * 'LDF' - Largest-Degree-First (parallel)

    Returns
    -------
    array
        An array of vertex colors (integers beginning at 0).

    Notes
    -----
    Diagonal entries in the G (self loops) will be ignored.

    """
    G = asgraph(G)
    N = G.shape[0]

    coloring = np.empty(N, dtype='intc')

    if method == 'MIS':
        fn = amg_core.vertex_coloring_mis
        fn(N, G.indptr, G.indices, coloring)
    elif method == 'JP':
        fn = amg_core.vertex_coloring_jones_plassmann
        fn(N, G.indptr, G.indices, coloring, np.random.rand(N))
    elif method == 'LDF':
        fn = amg_core.vertex_coloring_LDF
        fn(N, G.indptr, G.indices, coloring, np.random.rand(N))
    else:
        raise ValueError(f'Unknown method ({method})')

    return coloring




[docs]
def bellman_ford(G, centers, method='standard', tiebreaking=True):
    """Bellman-Ford iteration.

    Parameters
    ----------
    G : sparray
        Directed graph with positive weights.
    centers : list
        Starting centers or source nodes.
    method : string
        - 'standard': base implementation of Bellman-Ford.
        - 'balanced': a balanced version of Bellman-Ford.
    tiebreaking : bool
        Tie break flag if ``method='balanced'``.

    Returns
    -------
    array
        Distance of each point to the nearest center.
    array
        Index of the nearest center.
    array
        Predecessors in the array.

    See Also
    --------
    pyamg.amg_core.bellman_ford
    scipy.sparse.csgraph.bellman_ford

    Notes
    -----
    This should be viewed as the transpose of Bellman-Ford in
    scipy.sparse.csgraph. Here, bellman_ford is used to find the shortest path
    from any point *to* the seeds. In csgraph, bellman_ford is used to find
    "the shortest distance from point i to point j".  So csgraph.bellman_ford
    could be run `for seed in seeds`.  Also note that ``test_graph.py`` tests
    against ``csgraph.bellman_ford(G.T)``.

    """
    G = asgraph(G)
    n = G.shape[0]

    if G.nnz > 0:
        if G.data.min() < 0:
            raise ValueError('Bellman-Ford is defined only for positive weights.')
    if G.dtype == complex:
        raise ValueError('Bellman-Ford is defined only for real weights.')

    centers = np.asarray(centers, dtype=np.int32)
    num_clusters = len(centers)

    # allocate space for returns and working arrays
    distances = np.full(n, np.inf, dtype=G.dtype)  # distance to cluster center (inf)
    nearest = np.full(n, -1, dtype=np.int32)       # nearest center, cluster membership (-1)
    predecessors = np.full(n, -1, dtype=np.int32)  # predecessor on the shortest path (-1)
    distances[centers] = 0                         # distance = 0 at centers
    nearest[centers] = np.arange(num_clusters)     # number the membership

    if method == 'standard':
        amg_core.bellman_ford(n, G.indptr, G.indices, G.data, centers,  # IN
                              distances, nearest, predecessors)         # OUT
    elif method == 'balanced':
        predecessors_count = np.full(n, 0, dtype=np.int32)
        cluster_size = np.full(num_clusters, 1, dtype=np.int32)
        amg_core.bellman_ford_balanced(n, G.indptr, G.indices, G.data, centers,  # IN
                                       distances, nearest, predecessors,         # OUT
                                       predecessors_count, cluster_size,         # OUT
                                       tiebreaking)
    else:
        raise ValueError(f'Method {method} is not supported in Bellman-Ford')

    return distances, nearest, predecessors




[docs]
def lloyd_cluster(G, centers, maxiter=5):
    """Perform Lloyd clustering on graph with weighted edges.

    Parameters
    ----------
    G : csr_array, csc_array
        A sparse matrix of size (n,n) where each nonzero entry G[i,j] is the distance
        between nodes i and j.
    centers : int array
        If centers is an integer, then its value determines the number of
        clusters.  Otherwise, centers is an array of unique integers between 0
        and n-1 that will be used as the initial centers for clustering.
    maxiter : int
        Maximum number of iterations.

    Returns
    -------
    int array
        Id of each cluster of points.
    int array
        Index of each seed.

    Notes
    -----
    If G has complex values, abs(G) is used instead.

    Only positive edge weights may be used

    """
    G = asgraph(G)
    n = G.shape[0]

    # complex dtype
    if G.dtype.kind == 'c':
        G = np.abs(G)

    if G.nnz > 0:
        if G.data.min() < 0:
            raise ValueError('Lloyd Clustering is defined only for positive weights.')

    if np.isscalar(centers):
        centers = np.random.permutation(n)[:centers]
        centers = np.int32(centers)
    else:
        centers = np.asarray(centers, dtype=np.int32)

    num_clusters = len(centers)

    if num_clusters < 1:
        raise ValueError('at least one center is required')

    if centers.min() < 0:
        raise ValueError(f'invalid center index {centers.min()}')

    if centers.max() >= n:
        raise ValueError(f'invalid center index {centers.max()}')

    distances = np.full(n, np.inf, dtype=G.dtype)
    clusters = np.full(n, -1, dtype=np.int32)
    predecessors = np.full(n, -1, dtype=np.int32)
    distances[centers] = 0
    clusters[centers] = np.arange(num_clusters)
    changed = True
    it = 0

    _dist, _near, _pred = bellman_ford(G, centers, method='standard')

    while changed and it < maxiter:
        if it > 0:
            distances.fill(np.inf)
            clusters.fill(-1)
            predecessors.fill(-1)
            distances[centers] = 0
            clusters[centers] = np.arange(num_clusters)

        amg_core.bellman_ford(n, G.indptr, G.indices, G.data, centers,  # IN
                              distances, clusters, predecessors)        # OUT

        changed = amg_core.most_interior_nodes(n, G.indptr, G.indices, G.data, centers,
                                               distances, clusters, predecessors)

        it += 1

    return clusters, centers




[docs]
def balanced_lloyd_cluster(G, centers, maxiter=5, rebalance_iters=5, tiebreaking=True):
    """Perform Lloyd clustering on graph with weighted edges.

    Parameters
    ----------
    G : csr_array, csc_array
        A sparse nxn matrix where each nonzero entry G[i,j] is the distance
        between nodes i and j.
    centers : int array
        If centers is an integer, then its value determines the number of
        clusters.  Otherwise, centers is an array of unique integers between 0
        and n-1 that will be used as the initial centers for clustering.
    maxiter : int
        Number of bellman_ford_balanced->center_nodes iterations to run within
        the clustering.
    rebalance_iters : int
        Number of post-Lloyd rebalancing iterations to run.
    tiebreaking : bool, default True
        Flag for triggering tiebreaking.

    Returns
    -------
    clusters : int array
        id of each cluster of points
    centers : int array
        index of each center

    Notes
    -----
    - If G has complex values, abs(G) is used instead.
    - Only positive edge weights may be used
    - This version computes improved cluster centers with Floyd-Warshall and
      also uses a balanced version of Bellman-Ford to try and find
      nearly-equal-sized clusters.
    - Repeated calls to bellman_ford_balanced() in the rebalance loop can result
      in different centers.  This is due to the tie-breaker based on aggregate
      size in bellman_ford_balanced().  Alternatively, the graph can be seeded
      with a small random number to make the edge lengths (and distances) unique.

    """
    G = asgraph(G)
    n = G.shape[0]

    # complex dtype
    if G.dtype.kind == 'c':
        G = np.abs(G)

    if G.nnz > 0:
        if G.data.min() < 0:
            raise ValueError('Lloyd Clustering is defined only for positive weights.')

    if np.isscalar(centers):
        centers = np.random.permutation(n)[:centers]  # same as np.random.choice()
        centers = np.int32(centers)
    else:
        centers = np.asarray(centers, dtype=np.int32)

    num_clusters = len(centers)

    if num_clusters < 1:
        raise ValueError('at least one center is required')

    if centers.min() < 0:
        raise ValueError(f'invalid center index {centers.min()}')

    if centers.max() >= n:
        raise ValueError(f'invalid center index {centers.max()}')

    # create work arrays for C++
    # empty() values are initialized in the kernel
    maxsize = int(12*np.ceil(n / num_clusters))

    d = np.full(n, np.inf, dtype=G.dtype)         # distance to cluster center (inf)
    m = np.full(n, -1, dtype=np.int32)            # cluster membership or index (-1)
    p = np.full(n, -1, dtype=np.int32)            # predecessor on the shortest path (-1)
    pc = np.full(n, 0, dtype=np.int32)            # predecessor count (0)
    s = np.full(num_clusters, 1, dtype=np.int32)  # cluster size (1)
    d[centers] = 0                                # distance = 0 at centers
    m[centers] = np.arange(num_clusters)          # number the membership

    Cptr = np.empty(num_clusters, dtype=np.int32)    # ptr to start in C for each cluster
    CC = np.empty(n, dtype=np.int32)                 # FW global index for current cluster

    D = np.empty(maxsize*maxsize, dtype=G.dtype)     # FW distance array
    P = np.empty(maxsize*maxsize, dtype=np.int32)    # FW predecessor array
    L = np.empty(n, dtype=np.int32)                  # FW local index for current cluster
    q = np.empty(maxsize, dtype=G.dtype)             # FW work array for d**2

    # global work array for distances
    dist_all = np.empty((num_clusters, maxsize*maxsize), dtype=G.dtype, order='C')

    for riter in range(rebalance_iters+1):

        # lloyd cluster balanced
        it = 0
        changed1 = True
        changed2 = True
        # reinitialize
        d.fill(np.inf)
        m.fill(-1)
        p.fill(-1)
        p[centers] = centers
        pc.fill(0)
        pc[centers] = 1
        s.fill(1)
        d[centers] = 0
        m[centers] = np.arange(num_clusters)
        while (changed1 or changed2) and (it < maxiter):
            changed1 = amg_core.bellman_ford_balanced(n, G.indptr, G.indices, G.data,
                                                      centers, d, m, p, pc, s, tiebreaking)

            if s.max() > maxsize:
                raise ValueError('maxsize (maximum cluster size) is too small')

            if m.min() < 0 or d.min() < 0:
                raise ValueError('Encountered a disconnected nodes from m or d:  '
                                 f'{m.min()=} {d.min()=})')

            changed2 = amg_core.center_nodes(n, G.indptr, G.indices, G.data,
                                             Cptr, D, P, CC, L, q,
                                             centers, d, m, p, pc, s)

            it += 1

        # slow list version: [len(np.where(p==i)[0]) for i in range(len(p))]
        truepc = np.bincount(p[p > -1], minlength=len(p))
        if np.count_nonzero(truepc - pc):
            raise ValueError('Predecessor count is incorrect.')

        # don't rebalance on last pass
        if riter == rebalance_iters:
            break

        # don't rebalance a single cluster
        if num_clusters < 2:
            break

        # calculate distances
        dist_all.fill(np.inf)
        for a in range(num_clusters):
            N = s[a]  # cluster size
            Nloc = Cptr[a]+N
            if Nloc >= G.shape[0]:
                Nloc = None
            P.fill(-1)
            amg_core.floyd_warshall(G.shape[0], G.indptr, G.indices, G.data,
                                    dist_all[a, :].ravel(), P, CC[Cptr[a]:Nloc], L,
                                    m, a, N)
        # rebalance
        centers, rebalance_change = _rebalance(G, centers, m, d, dist_all, num_clusters)

        # rebalance did nothing
        if not rebalance_change:
            break

    return m, centers




[docs]
def _rebalance(G, c, m, d, dist_all, num_clusters):
    """Rebalance clusters.

    Parameters
    ----------
    G : sparray
        Sparse graph.
    c : array
        List of centers.
    m : array
        Cluster membership.
    d : array
        Distance to cluster center.
    dist_all : array
        Node-to-node distance for every node in each cluster.
    num_clusters : int
        Number of clusters (= number centers).

    Return
    ------
    array
        List of new centers.
    bool
        Indicate whether centers has changed.

    """
    newc = c.copy()

    # aggregate-to-aggregate neighbors
    AggOp = sparse.coo_array((np.ones(len(m)), (np.arange(len(m)), m))).tocsr()
    Agg2Agg = AggOp.T @ G @ AggOp
    Agg2Agg = Agg2Agg.tocsr()

    # calculate elimination and split measures
    E = _elimination_penalty(G, m, d, dist_all, num_clusters)
    S, c1, c2 = _split_improvement(m, d, dist_all, num_clusters)

    # sort both ascending
    M = np.ones(num_clusters, dtype=bool)
    Esortidx = np.argsort(E)
    Ssortidx = np.argsort(S)

    i_e = 0               # elimination index
    i_s = num_clusters-1  # splitting index

    rebalance_change = False
    # 0, 1, ..., num_clusters-1
    while i_e <= (num_clusters-1) and i_s >= 0:
        a_e = Esortidx[i_e]
        a_s = Ssortidx[i_s]

        if not M[a_e] or a_e == a_s:  # is cluster a_e modifiable and distinct from a_s?
            i_e += 1
            continue

        if not M[a_s]:                # is cluster a_s modifiable?
            i_s -= 1
            continue

        if E[a_e] > S[a_s]:
            break

        M[Agg2Agg[[a_e], :].indices] = False  # neighbors of a_e
        M[Agg2Agg[[a_s], :].indices] = False  # neighbors of a_s
        newc[a_e] = c1[a_s]   # redefine centers
        newc[a_s] = c2[a_s]   # redefine centers
        rebalance_change = True

    return newc, rebalance_change




[docs]
def _elimination_penalty(A, m, d, dist_all, num_clusters):
    """Calculate elimination penalty.

    see _rebalance()
    """
    Acol = A.tocsc()

    # pylint: disable=too-many-nested-blocks
    E = np.inf * np.ones(num_clusters)
    for a in range(num_clusters):
        E[a] = 0
        Va = np.int32(np.where(m == a)[0])
        N = len(Va)

        for iloc, _ in enumerate(Va):
            dmin = np.inf
            for jloc, j in enumerate(Va):
                for k in Acol[:, [j]].indices:
                    if m[k] != m[j]:
                        jiloc = jloc * N + iloc
                        dmin = min(d[k] + A[k, j] + dist_all[a, jiloc], dmin)
            E[a] += dmin**2
        E[a] -= np.sum(d[Va]**2)
    return E




[docs]
def _split_improvement(m, d, dist_all, num_clusters):
    """Calculate split improvement.

    see _rebalance()
    """
    S = np.inf * np.ones(num_clusters)
    I = -1 * np.ones(num_clusters, dtype=np.int32)  # better cluster centers if split
    J = -1 * np.ones(num_clusters, dtype=np.int32)  # better cluster centers if split
    for a in range(num_clusters):
        S[a] = np.inf
        Va = np.int32(np.where(m == a)[0])
        N = len(Va)

        for iloc, i in enumerate(Va):
            for jloc, j in enumerate(Va):
                Snew = 0
                for kloc, _ in enumerate(Va):
                    ikloc = iloc * N + kloc
                    jkloc = jloc * N + kloc
                    if dist_all[a, ikloc] < dist_all[a, jkloc]:
                        Snew = Snew + dist_all[a, ikloc]**2
                    else:
                        Snew = Snew + dist_all[a, jkloc]**2
                if Snew < S[a]:
                    S[a] = Snew
                    I[a] = i
                    J[a] = j
        S[a] = np.sum(d[Va]**2) - S[a]
    return S, I, J




[docs]
def _choice(p):
    """Random selection based on a distribution.

    Parameters
    ----------
    p : array
        Probabilities [0,1], with sum(p) == 1.

    Return
    ------
    int
        Index to a selected integer based on the distribution of p.

    Notes
    -----
    For efficiency, there are no checks.

    """
    a = p / np.max(p)
    i = -1
    while True:
        i = np.random.randint(len(a))
        if np.random.rand() < a[i]:
            break
    return i




[docs]
def kmeanspp_seed(G, nseeds):
    """K-means++ seed.

    Parameters
    ----------
    G : sparray
        Sparse graph on which to seed.

    nseeds : int
        Number of seeds.

    Return
    ------
    array
        List of seeds.

    Notes
    -----
    This is a reference algorithms, at O(n^3).

    TODO - needs testing

    """
    warn('kmeanspp_seed is O(n^3) -- use only for testing')

    n = G.shape[0]
    C = np.random.choice(n, 1, replace=False)
    for _ in range(nseeds-1):
        d = sparse.csgraph.bellman_ford(G, directed=False, indices=C)
        d = d.min(axis=0)   # shortest path from a seed
        d = d**2            # distance squared
        p = d / np.sum(d)   # probability
        # newC = np.random.choice(n, 1, p=p, replace=False) # <- does not work properly
        newC = _choice(p)
        C = np.append(C, newC)
    return C




[docs]
def breadth_first_search(G, seed):
    """Breadth First search of a graph.

    Parameters
    ----------
    G : csr_array, csc_array
        A sparse NxN matrix where each nonzero entry G[i,j] is the distance
        between nodes i and j.
    seed : int
        Index of the seed location.

    Returns
    -------
    order : int array
        Breadth first order.
    level : int array
        Final levels.

    Examples
    --------
    >>> # 0---2
    >>> # |  /
    >>> # | /
    >>> # 1---4---7---8---9
    >>> # |  /|  /
    >>> # | / | /
    >>> # 3/  6/
    >>> # |
    >>> # |
    >>> # 5
    >>> import numpy as np
    >>> import pyamg
    >>> import scipy.sparse as sparse
    >>> edges = np.array([[0,1],[0,2],[1,2],[1,3],[1,4],[3,4],[3,5],
    ...                   [4,6], [4,7], [6,7], [7,8], [8,9]], dtype=np.int32)
    >>> N = np.max(edges.ravel())+1
    >>> data = np.ones((edges.shape[0],))
    >>> A = sparse.coo_array((data, (edges[:,0], edges[:,1])), shape=(N,N))
    >>> c, l = pyamg.graph.breadth_first_search(A, 0)
    >>> print(l)
    [0 1 1 2 2 3 3 3 4 5]
    >>> print(c)
    [0 1 2 3 4 5 6 7 8 9]

    """
    G = asgraph(G)
    N = G.shape[0]

    order = np.empty(N, G.indptr.dtype)
    level = np.empty(N, G.indptr.dtype)
    level[:] = -1

    BFS = amg_core.breadth_first_search
    BFS(G.indptr, G.indices, int(seed), order, level)

    return order, level




[docs]
def connected_components(G):
    """Compute the connected components of a graph.

    The connected components of a graph G, which is represented by a
    symmetric sparse matrix, are labeled with the integers 0,1,..(K-1) where
    K is the number of components.

    Parameters
    ----------
    G : symmetric matrix, preferably in sparse CSR or CSC format
        The nonzeros of G represent the edges of an undirected graph.

    Returns
    -------
    ndarray
        An array of component labels for each vertex of the graph.

    Notes
    -----
    If the nonzero structure of G is not symmetric, then the
    result is undefined.

    Examples
    --------
    >>> from pyamg.graph import connected_components
    >>> print(connected_components( [[0,1,0],[1,0,1],[0,1,0]] ))
    [0 0 0]
    >>> print(connected_components( [[0,1,0],[1,0,0],[0,0,0]] ))
    [0 0 1]
    >>> print(connected_components( [[0,0,0],[0,0,0],[0,0,0]] ))
    [0 1 2]
    >>> print(connected_components( [[0,1,0,0],[1,0,0,0],[0,0,0,1],[0,0,1,0]] ))
    [0 0 1 1]

    """
    G = asgraph(G)
    N = G.shape[0]

    components = np.empty(N, G.indptr.dtype)

    fn = amg_core.connected_components
    fn(N, G.indptr, G.indices, components)

    return components




[docs]
def symmetric_rcm(A):
    """Symmetric Reverse Cutthill-McKee.

    Parameters
    ----------
    A : sparray
        Sparse matrix.

    Returns
    -------
    sparray
        Permuted matrix with reordering.

    See Also
    --------
    pseudo_peripheral_node

    Notes
    -----
    Get a pseudo-peripheral node, then call BFS.

    Examples
    --------
    >>> from pyamg import gallery
    >>> from pyamg.graph import symmetric_rcm
    >>> n = 200
    >>> density = 1.0/n
    >>> A = gallery.sprand(n, n, density, format='csr')
    >>> S = A + A.T
    >>> # try the visualizations
    >>> # import matplotlib.pyplot as plt
    >>> # plt.figure()
    >>> # plt.subplot(121)
    >>> # plt.spy(S,marker='.')
    >>> # plt.subplot(122)
    >>> # plt.spy(symmetric_rcm(S),marker='.')

    """
    _dummy_root, order, _dummy_level = pseudo_peripheral_node(A)

    p = order[::-1]

    return A[p, :][:, p]




[docs]
def pseudo_peripheral_node(A):
    """Find a pseudo peripheral node.

    Parameters
    ----------
    A : sparray
        Sparse matrix.

    Returns
    -------
    x : int
        Location of the node.
    order : array
        BFS ordering.
    level : array
        BFS levels.

    Notes
    -----
    Algorithm in Saad.

    """
    n = A.shape[0]

    valence = np.diff(A.indptr)

    # select an initial node x, set delta = 0
    x = int(np.random.rand() * n)
    delta = 0

    while True:
        # do a level-set traversal from x
        order, level = breadth_first_search(A, x)

        # select a node y in the last level with min degree
        maxlevel = level.max()
        lastnodes = np.where(level == maxlevel)[0]
        lastnodesvalence = valence[lastnodes]
        minlastnodesvalence = lastnodesvalence.min()
        y = np.where(lastnodesvalence == minlastnodesvalence)[0][0]
        y = lastnodes[y]

        # if d(x,y)>delta, set, and go to bfs above
        if level[y] > delta:
            x = y
            delta = level[y]
        else:
            return x, order, level




[docs]
def metis_partition(G, nparts=5, seed=None):
    """Perform partitioning of graph with weighted edges using METIS.

    Parameters
    ----------
    G : sparray
        A sparse n x n matrix where each nonzero entry G[i,j] is the distance
        between nodes i and j.  G[i,j] is required to be integer.
    nparts : int
        Number of parts in the resulting partition.
    seed : int
        Random seed for METIS.

    Returns
    -------
    array
        Array of n x 1 indices from 0 ... nparts-1.

    """
    G = sparse.csr_array(G)

    if G.dtype.kind != 'i':
        raise ValueError('METIS partitioning requires integer weights')

    if G.nnz > 0:
        if G.data.min() < 0:
            raise ValueError('METIS partitioning requires positive integer weights.')

    if not isinstance(nparts, int) or nparts < 1:
        raise ValueError('nparts should be a positive integer')

    try:
        import pymetis  # noqa: PLC0415
    except ImportError as expt:
        raise ImportError('pymetis required for METIS partitioning') from expt

    # set diagonal to zero and force reallocation
    G = G.tocoo()
    G.setdiag(0)
    G = G.tocsr()

    # metis options
    opt = pymetis.Options()
    opt.contig = 1
    if seed:
        opt.seed = seed
    _, parts = pymetis.part_graph(nparts, xadj=G.indptr, adjncy=G.indices, eweights=G.data,
                                  options=opt)

    return np.array(parts)