Module `xswap.prior`

Source code

from typing import List, Tuple

import numpy
import pandas
import scipy.sparse

import xswap.network_formats


def compute_xswap_occurrence_matrix(edge_list: List[Tuple[int, int]],
                                    n_permutations: int,
                                    shape: Tuple[int, int],
                                    allow_self_loops: bool = False,
                                    allow_antiparallel: bool = False,
                                    sparse: bool = True,
                                    swap_multiplier: float = 10,
                                    initial_seed: int = 0,
                                    max_malloc: int = 4000000000):
    """
    Compute the XSwap prior probability for every node pair in a network. The
    XSwap prior is the probability of a node pair having an edge between them in
    degree-preserving permutations of a network. The prior value for a node
    pair can be considered as the probability of an edge existing between two
    nodes given only the network's degree sequence.

    Parameters
    ----------
    edge_list : List[Tuple[int, int]]
        Edge list representing the graph whose XSwap edge priors are to be
        computed. Tuples contain integer values representing nodes. No value
        should be greater than C++'s `INT_MAX`, in this case 2_147_483_647.
        An adjacency matrix will be created assuming that a node's value is its
        index in the matrix. If not, map edges (identifiers can be string or
        otherwise) using `xswap.preprocessing.map_str_edges`.
    n_permutations : int
        The number of permuted networks used to compute the empirical XSwap prior
    shape : Tuple[int, int]
        The shape of the matrix to be returned. In other words, a tuple of the
        number of source and target nodes.
    allow_self_loops : bool
        Whether to allow edges like (0, 0). In the case of bipartite graphs,
        such an edge represents a connection between two distinct nodes, while
        in other graphs it may represent an edge from a node to itself, in which
        case an edge may or may not be meaningful depending on context.
    allow_antiparallel : bool
        Whether to allow simultaneous edges like (0, 1) and (1, 0). In the case
        of bipartite graphs, these edges represent two connections between four
        distinct nodes, while for other graphs, these may be connections between
        the same two nodes.
    sparse : bool
        Whether to use a sparse matrix when adding up edge occurrences across
        permutations. If large changes in sparsity are expected, a dense
        array may be preferable.
    swap_multiplier : float
        The number of edge swap attempts is determined by the product of the
        number of existing edges and multiplier. For example, if five edges are
        passed and multiplier is set to 10, 50 swaps will be attempted. Non-integer
        products will be rounded down to the nearest integer.
    initial_seed : int
        Random seed that will be passed to the C++ Mersenne Twister 19937 random
        number generator. `initial_seed` will be used for the first permutation,
        and the seed used for each subsequent permutation will be incremented by
        one. For example, if `initial_seed` is 0 and `n_permutations` is 2, then
        the two permutations will pass seeds 0 and 1, respectively.
    max_malloc : int (`unsigned long long int` in C)
        The maximum amount of memory to be allocated using `malloc` when making
        a bitset to hold edges. An uncompressed bitset is implemented for
        holding edges that is significantly faster than alternatives. However,
        it is memory-inefficient and will not be used if more memory is required
        than `max_malloc`. Above the threshold, a Roaring bitset will be used.

    Returns
    -------
    edge_counter : scipy.sparse.csc_matrix
        Adjacency matrix with entries equal to the number of permutations in
        which a given edge appeared
    """
    import xswap._xswap_backend
    if len(edge_list) != len(set(edge_list)):
        raise ValueError("Edge list contained duplicate edges. "
                         "XSwap does not support multigraphs.")

    num_swaps = int(swap_multiplier * len(edge_list))

    max_id = max(map(max, edge_list))

    if sparse:
        edge_counter = scipy.sparse.csc_matrix(shape, dtype=int)
    else:
        edge_counter = numpy.zeros(shape, dtype=int)

    for i in range(n_permutations):
        permuted_edges, stats = xswap._xswap_backend._xswap(
            edge_list, [], max_id, allow_self_loops, allow_antiparallel,
            num_swaps, initial_seed + i, max_malloc)
        permuted_matrix = xswap.network_formats.edges_to_matrix(
            permuted_edges, add_reverse_edges=(not allow_antiparallel),
            shape=shape, dtype=int, sparse=sparse)
        edge_counter += permuted_matrix

    return edge_counter


def compute_xswap_priors(edge_list: List[Tuple[int, int]], n_permutations: int,
                         shape: Tuple[int, int], allow_self_loops: bool = False,
                         allow_antiparallel: bool = False, sparse: bool = True,
                         swap_multiplier: int = 10, initial_seed: int = 0,
                         max_malloc: int = 4000000000,
                         dtypes = {'id': numpy.uint16, 'degree': numpy.uint16,
                                   'edge': bool, 'xswap_prior': float},
                        ):
    """
    Compute the XSwap prior for every potential edge in the network. Uses
    degree-grouping to maximize the effective number of permutations for each
    node pair. That is, node pairs with the same source and target degrees can
    be grouped when computing the XSwap prior, allowing there to be more
    permutations for some node pairs than `n_permutations`.

    Note that the mechanics of this function are separated to minimize memory use.

    Parameters
    ----------
    edge_list : List[Tuple[int, int]]
        Edge list representing the graph whose XSwap edge priors are to be
        computed. Tuples contain integer values representing nodes. No value
        should be greater than C++'s `INT_MAX`, in this case 2_147_483_647.
        An adjacency matrix will be created assuming that a node's value is its
        index in the matrix. If not, map edges (identifiers can be string or
        otherwise) using `xswap.preprocessing.map_str_edges`.
    n_permutations : int
        The number of permuted networks used to compute the empirical XSwap prior
    shape : Tuple[int, int]
        The shape of the matrix to be returned. In other words, a tuple of the
        number of source and target nodes.
    allow_self_loops : bool
        Whether to allow edges like (0, 0). In the case of bipartite graphs,
        such an edge represents a connection between two distinct nodes, while
        in other graphs it may represent an edge from a node to itself, in which
        case an edge may or may not be meaningful depending on context.
    allow_antiparallel : bool
        Whether to allow simultaneous edges like (0, 1) and (1, 0). In the case
        of bipartite graphs, these edges represent two connections between four
        distinct nodes, while for other graphs, these may be connections between
        the same two nodes.
    sparse : bool
        Whether to use a sparse matrix when adding up edge occurrences across
        permutations. If large changes in sparsity are expected, a dense
        array may be preferable.
    swap_multiplier : float
        The number of edge swap attempts is determined by the product of the
        number of existing edges and multiplier. For example, if five edges are
        passed and multiplier is set to 10, 50 swaps will be attempted. Non-integer
        products will be rounded down to the nearest integer.
    initial_seed : int
        Random seed that will be passed to the C++ Mersenne Twister 19937 random
        number generator. `initial_seed` will be used for the first permutation,
        and the seed used for each subsequent permutation will be incremented by
        one. For example, if `initial_seed` is 0 and `n_permutations` is 2, then
        the two permutations will pass seeds 0 and 1, respectively.
    max_malloc : int (`unsigned long long int` in C)
        The maximum amount of memory to be allocated using `malloc` when making
        a bitset to hold edges. An uncompressed bitset is implemented for
        holding edges that is significantly faster than alternatives. However,
        it is memory-inefficient and will not be used if more memory is required
        than `max_malloc`. Above the threshold, a Roaring bitset will be used.
    dtypes : dict
        Dictionary mapping returned column types to dtypes. Keys should be
        `'id'`, `'degree'`, `'edge'`, and `'xswap_prior'`. `dtype` need only
        be changed from its defaults if the values of `id` or `degree` are
        greater than the maxima in the default dtypes, or in cases where greater
        precision is desired. (`numpy.uint16` has a maximum value of 65535.)

    Returns
    -------
    prior_df : pandas.DataFrame
        Columns are the following:
        [source_id, target_id, edge, source_degree, target_degree, xswap_prior]
    """
    # Compute the adjacency matrix of the original (unpermuted) network
    original_edges = xswap.network_formats.edges_to_matrix(
        edge_list, add_reverse_edges=(not allow_antiparallel), shape=shape,
        dtype=dtypes['edge'], sparse=True)

    # Setup DataFrame for recording prior data
    prior_df = pandas.DataFrame({
        'source_id': numpy.repeat(numpy.arange(shape[0], dtype=dtypes['id']), shape[1]),
        'target_id': numpy.tile(numpy.arange(shape[1], dtype=dtypes['id']), shape[0]),
        'edge': original_edges.toarray().flatten(),
    })
    del original_edges

    prior_df['source_degree'] = (prior_df
                                 .groupby('source_id')
                                 .transform(sum)['edge']
                                 .astype(dtypes['degree']))
    del prior_df['source_id']

    prior_df['target_degree'] = (prior_df
                                 .groupby('target_id')
                                 .transform(sum)['edge']
                                 .astype(dtypes['degree']))
    del prior_df['target_id']

    # Compute the number of occurrences of each edge across permutations
    edge_counter = compute_xswap_occurrence_matrix(
        edge_list=edge_list, n_permutations=n_permutations, shape=shape,
        allow_self_loops=allow_self_loops, allow_antiparallel=allow_antiparallel,
        sparse=sparse, swap_multiplier=swap_multiplier, initial_seed=initial_seed,
        max_malloc=max_malloc)

    prior_df['num_permuted_edges'] = edge_counter.toarray().flatten()
    del edge_counter

    # The number of edges that occurred across all node pairs with the same
    # `source_degree` and `target_degree`
    dgp_edge_count = (
        prior_df
        .groupby(['source_degree', 'target_degree'])
        .transform(sum)['num_permuted_edges']
        .values
        .astype(dtypes['degree'])
    )
    del prior_df['num_permuted_edges']

    # The effective number of permutations for every node pair, incorporating
    # degree-grouping
    num_dgp = (
        n_permutations * prior_df.groupby(['source_degree', 'target_degree'])
                                 .transform(len)['edge']
                                 .values
    )
    xswap_prior = (dgp_edge_count / num_dgp).astype(dtypes['xswap_prior'])
    del dgp_edge_count, num_dgp

    prior_df['xswap_prior'] = xswap_prior
    del xswap_prior

    prior_df = (
        prior_df
        .assign(
            source_id=numpy.repeat(numpy.arange(shape[0], dtype=dtypes['id']), shape[1]),
            target_id=numpy.tile(numpy.arange(shape[1], dtype=dtypes['id']), shape[0]),
        )
        .filter(items=['source_id', 'target_id', 'edge', 'source_degree',
                       'target_degree', 'xswap_prior'])
    )
    return prior_df


def approximate_xswap_prior(source_degree, target_degree, num_edges):
    """
    Approximate the XSwap prior by assuming that the XSwap Markov Chain is stationary.
    While this is not the case in reality, some networks' priors can be estimated
    very well using this equation.

    Parameters
    ----------
    source_degree : int, float, numpy.array, or pandas.Series
        The source degree for a single node pair or a number of source degrees.
        The type of object passed should match `target_degree`.
    target_degree : int, float, numpy.array, or pandas.Series
        The target degree for a single node pair or a number of target degrees.
        The type of object passed should match `source_degree`.
    num_edges : int or float
        The total number of edges in the network

    Returns
    -------
    approximate_prior : float, numpy.array, or pandas.Series
        Output type matches the types of `source_degree` and `target_degree`.
    """
    return source_degree * target_degree / (
        (source_degree * target_degree) ** 2
        + (num_edges - source_degree - target_degree + 1) ** 2
    ) ** 0.5

Functions

def approximate_xswap_prior(source_degree, target_degree, num_edges)

Approximate the XSwap prior by assuming that the XSwap Markov Chain is stationary. While this is not the case in reality, some networks' priors can be estimated very well using this equation.

Parameters

source_degree : int, float, numpy.array, or pandas.Series: The source degree for a single node pair or a number of source degrees. The type of object passed should match target_degree.
target_degree : int, float, numpy.array, or pandas.Series: The target degree for a single node pair or a number of target degrees. The type of object passed should match source_degree.
num_edges : int or float: The total number of edges in the network

Returns

approximate_prior : float, numpy.array, or pandas.Series: Output type matches the types of source_degree and target_degree.

Source code

def approximate_xswap_prior(source_degree, target_degree, num_edges):
    """
    Approximate the XSwap prior by assuming that the XSwap Markov Chain is stationary.
    While this is not the case in reality, some networks' priors can be estimated
    very well using this equation.

    Parameters
    ----------
    source_degree : int, float, numpy.array, or pandas.Series
        The source degree for a single node pair or a number of source degrees.
        The type of object passed should match `target_degree`.
    target_degree : int, float, numpy.array, or pandas.Series
        The target degree for a single node pair or a number of target degrees.
        The type of object passed should match `source_degree`.
    num_edges : int or float
        The total number of edges in the network

    Returns
    -------
    approximate_prior : float, numpy.array, or pandas.Series
        Output type matches the types of `source_degree` and `target_degree`.
    """
    return source_degree * target_degree / (
        (source_degree * target_degree) ** 2
        + (num_edges - source_degree - target_degree + 1) ** 2
    ) ** 0.5

def compute_xswap_occurrence_matrix(edge_list, n_permutations, shape, allow_self_loops=False, allow_antiparallel=False, sparse=True, swap_multiplier=10, initial_seed=0, max_malloc=4000000000)

Compute the XSwap prior probability for every node pair in a network. The XSwap prior is the probability of a node pair having an edge between them in degree-preserving permutations of a network. The prior value for a node pair can be considered as the probability of an edge existing between two nodes given only the network's degree sequence.

Parameters

edge_list : List[Tuple[int, int]]: Edge list representing the graph whose XSwap edge priors are to be computed. Tuples contain integer values representing nodes. No value should be greater than C++'s INT_MAX, in this case 2_147_483_647. An adjacency matrix will be created assuming that a node's value is its index in the matrix. If not, map edges (identifiers can be string or otherwise) using map_str_edges().
n_permutations : int: The number of permuted networks used to compute the empirical XSwap prior
shape : Tuple[int, int]: The shape of the matrix to be returned. In other words, a tuple of the number of source and target nodes.
allow_self_loops : bool: Whether to allow edges like (0, 0). In the case of bipartite graphs, such an edge represents a connection between two distinct nodes, while in other graphs it may represent an edge from a node to itself, in which case an edge may or may not be meaningful depending on context.
allow_antiparallel : bool: Whether to allow simultaneous edges like (0, 1) and (1, 0). In the case of bipartite graphs, these edges represent two connections between four distinct nodes, while for other graphs, these may be connections between the same two nodes.
sparse : bool: Whether to use a sparse matrix when adding up edge occurrences across permutations. If large changes in sparsity are expected, a dense array may be preferable.
swap_multiplier : float: The number of edge swap attempts is determined by the product of the number of existing edges and multiplier. For example, if five edges are passed and multiplier is set to 10, 50 swaps will be attempted. Non-integer products will be rounded down to the nearest integer.
initial_seed : int: Random seed that will be passed to the C++ Mersenne Twister 19937 random number generator. initial_seed will be used for the first permutation, and the seed used for each subsequent permutation will be incremented by one. For example, if initial_seed is 0 and n_permutations is 2, then the two permutations will pass seeds 0 and 1, respectively.
max_malloc : int (unsigned` `long` `long` `int in C): The maximum amount of memory to be allocated using malloc when making a bitset to hold edges. An uncompressed bitset is implemented for holding edges that is significantly faster than alternatives. However, it is memory-inefficient and will not be used if more memory is required than max_malloc. Above the threshold, a Roaring bitset will be used.

Returns

edge_counter : scipy.sparse.csc_matrix: Adjacency matrix with entries equal to the number of permutations in which a given edge appeared

Source code

def compute_xswap_occurrence_matrix(edge_list: List[Tuple[int, int]],
                                    n_permutations: int,
                                    shape: Tuple[int, int],
                                    allow_self_loops: bool = False,
                                    allow_antiparallel: bool = False,
                                    sparse: bool = True,
                                    swap_multiplier: float = 10,
                                    initial_seed: int = 0,
                                    max_malloc: int = 4000000000):
    """
    Compute the XSwap prior probability for every node pair in a network. The
    XSwap prior is the probability of a node pair having an edge between them in
    degree-preserving permutations of a network. The prior value for a node
    pair can be considered as the probability of an edge existing between two
    nodes given only the network's degree sequence.

    Parameters
    ----------
    edge_list : List[Tuple[int, int]]
        Edge list representing the graph whose XSwap edge priors are to be
        computed. Tuples contain integer values representing nodes. No value
        should be greater than C++'s `INT_MAX`, in this case 2_147_483_647.
        An adjacency matrix will be created assuming that a node's value is its
        index in the matrix. If not, map edges (identifiers can be string or
        otherwise) using `xswap.preprocessing.map_str_edges`.
    n_permutations : int
        The number of permuted networks used to compute the empirical XSwap prior
    shape : Tuple[int, int]
        The shape of the matrix to be returned. In other words, a tuple of the
        number of source and target nodes.
    allow_self_loops : bool
        Whether to allow edges like (0, 0). In the case of bipartite graphs,
        such an edge represents a connection between two distinct nodes, while
        in other graphs it may represent an edge from a node to itself, in which
        case an edge may or may not be meaningful depending on context.
    allow_antiparallel : bool
        Whether to allow simultaneous edges like (0, 1) and (1, 0). In the case
        of bipartite graphs, these edges represent two connections between four
        distinct nodes, while for other graphs, these may be connections between
        the same two nodes.
    sparse : bool
        Whether to use a sparse matrix when adding up edge occurrences across
        permutations. If large changes in sparsity are expected, a dense
        array may be preferable.
    swap_multiplier : float
        The number of edge swap attempts is determined by the product of the
        number of existing edges and multiplier. For example, if five edges are
        passed and multiplier is set to 10, 50 swaps will be attempted. Non-integer
        products will be rounded down to the nearest integer.
    initial_seed : int
        Random seed that will be passed to the C++ Mersenne Twister 19937 random
        number generator. `initial_seed` will be used for the first permutation,
        and the seed used for each subsequent permutation will be incremented by
        one. For example, if `initial_seed` is 0 and `n_permutations` is 2, then
        the two permutations will pass seeds 0 and 1, respectively.
    max_malloc : int (`unsigned long long int` in C)
        The maximum amount of memory to be allocated using `malloc` when making
        a bitset to hold edges. An uncompressed bitset is implemented for
        holding edges that is significantly faster than alternatives. However,
        it is memory-inefficient and will not be used if more memory is required
        than `max_malloc`. Above the threshold, a Roaring bitset will be used.

    Returns
    -------
    edge_counter : scipy.sparse.csc_matrix
        Adjacency matrix with entries equal to the number of permutations in
        which a given edge appeared
    """
    import xswap._xswap_backend
    if len(edge_list) != len(set(edge_list)):
        raise ValueError("Edge list contained duplicate edges. "
                         "XSwap does not support multigraphs.")

    num_swaps = int(swap_multiplier * len(edge_list))

    max_id = max(map(max, edge_list))

    if sparse:
        edge_counter = scipy.sparse.csc_matrix(shape, dtype=int)
    else:
        edge_counter = numpy.zeros(shape, dtype=int)

    for i in range(n_permutations):
        permuted_edges, stats = xswap._xswap_backend._xswap(
            edge_list, [], max_id, allow_self_loops, allow_antiparallel,
            num_swaps, initial_seed + i, max_malloc)
        permuted_matrix = xswap.network_formats.edges_to_matrix(
            permuted_edges, add_reverse_edges=(not allow_antiparallel),
            shape=shape, dtype=int, sparse=sparse)
        edge_counter += permuted_matrix

    return edge_counter

def compute_xswap_priors(edge_list, n_permutations, shape, allow_self_loops=False, allow_antiparallel=False, sparse=True, swap_multiplier=10, initial_seed=0, max_malloc=4000000000, dtypes={'id': , 'degree': , 'edge': , 'xswap_prior': })

Compute the XSwap prior for every potential edge in the network. Uses degree-grouping to maximize the effective number of permutations for each node pair. That is, node pairs with the same source and target degrees can be grouped when computing the XSwap prior, allowing there to be more permutations for some node pairs than n_permutations.

Note that the mechanics of this function are separated to minimize memory use.

Parameters

edge_list : List[Tuple[int, int]]: Edge list representing the graph whose XSwap edge priors are to be computed. Tuples contain integer values representing nodes. No value should be greater than C++'s INT_MAX, in this case 2_147_483_647. An adjacency matrix will be created assuming that a node's value is its index in the matrix. If not, map edges (identifiers can be string or otherwise) using map_str_edges().
n_permutations : int: The number of permuted networks used to compute the empirical XSwap prior
shape : Tuple[int, int]: The shape of the matrix to be returned. In other words, a tuple of the number of source and target nodes.
allow_self_loops : bool: Whether to allow edges like (0, 0). In the case of bipartite graphs, such an edge represents a connection between two distinct nodes, while in other graphs it may represent an edge from a node to itself, in which case an edge may or may not be meaningful depending on context.
allow_antiparallel : bool: Whether to allow simultaneous edges like (0, 1) and (1, 0). In the case of bipartite graphs, these edges represent two connections between four distinct nodes, while for other graphs, these may be connections between the same two nodes.
sparse : bool: Whether to use a sparse matrix when adding up edge occurrences across permutations. If large changes in sparsity are expected, a dense array may be preferable.
swap_multiplier : float: The number of edge swap attempts is determined by the product of the number of existing edges and multiplier. For example, if five edges are passed and multiplier is set to 10, 50 swaps will be attempted. Non-integer products will be rounded down to the nearest integer.
initial_seed : int: Random seed that will be passed to the C++ Mersenne Twister 19937 random number generator. initial_seed will be used for the first permutation, and the seed used for each subsequent permutation will be incremented by one. For example, if initial_seed is 0 and n_permutations is 2, then the two permutations will pass seeds 0 and 1, respectively.
max_malloc : int (unsigned` `long` `long` `int in C): The maximum amount of memory to be allocated using malloc when making a bitset to hold edges. An uncompressed bitset is implemented for holding edges that is significantly faster than alternatives. However, it is memory-inefficient and will not be used if more memory is required than max_malloc. Above the threshold, a Roaring bitset will be used.
dtypes : dict: Dictionary mapping returned column types to dtypes. Keys should be 'id', 'degree', 'edge', and 'xswap_prior'. dtype need only be changed from its defaults if the values of id or degree are greater than the maxima in the default dtypes, or in cases where greater precision is desired. (numpy.uint16 has a maximum value of 65535.)

Returns

prior_df : pandas.DataFrame: Columns are the following: [source_id, target_id, edge, source_degree, target_degree, xswap_prior]

Source code

def compute_xswap_priors(edge_list: List[Tuple[int, int]], n_permutations: int,
                         shape: Tuple[int, int], allow_self_loops: bool = False,
                         allow_antiparallel: bool = False, sparse: bool = True,
                         swap_multiplier: int = 10, initial_seed: int = 0,
                         max_malloc: int = 4000000000,
                         dtypes = {'id': numpy.uint16, 'degree': numpy.uint16,
                                   'edge': bool, 'xswap_prior': float},
                        ):
    """
    Compute the XSwap prior for every potential edge in the network. Uses
    degree-grouping to maximize the effective number of permutations for each
    node pair. That is, node pairs with the same source and target degrees can
    be grouped when computing the XSwap prior, allowing there to be more
    permutations for some node pairs than `n_permutations`.

    Note that the mechanics of this function are separated to minimize memory use.

    Parameters
    ----------
    edge_list : List[Tuple[int, int]]
        Edge list representing the graph whose XSwap edge priors are to be
        computed. Tuples contain integer values representing nodes. No value
        should be greater than C++'s `INT_MAX`, in this case 2_147_483_647.
        An adjacency matrix will be created assuming that a node's value is its
        index in the matrix. If not, map edges (identifiers can be string or
        otherwise) using `xswap.preprocessing.map_str_edges`.
    n_permutations : int
        The number of permuted networks used to compute the empirical XSwap prior
    shape : Tuple[int, int]
        The shape of the matrix to be returned. In other words, a tuple of the
        number of source and target nodes.
    allow_self_loops : bool
        Whether to allow edges like (0, 0). In the case of bipartite graphs,
        such an edge represents a connection between two distinct nodes, while
        in other graphs it may represent an edge from a node to itself, in which
        case an edge may or may not be meaningful depending on context.
    allow_antiparallel : bool
        Whether to allow simultaneous edges like (0, 1) and (1, 0). In the case
        of bipartite graphs, these edges represent two connections between four
        distinct nodes, while for other graphs, these may be connections between
        the same two nodes.
    sparse : bool
        Whether to use a sparse matrix when adding up edge occurrences across
        permutations. If large changes in sparsity are expected, a dense
        array may be preferable.
    swap_multiplier : float
        The number of edge swap attempts is determined by the product of the
        number of existing edges and multiplier. For example, if five edges are
        passed and multiplier is set to 10, 50 swaps will be attempted. Non-integer
        products will be rounded down to the nearest integer.
    initial_seed : int
        Random seed that will be passed to the C++ Mersenne Twister 19937 random
        number generator. `initial_seed` will be used for the first permutation,
        and the seed used for each subsequent permutation will be incremented by
        one. For example, if `initial_seed` is 0 and `n_permutations` is 2, then
        the two permutations will pass seeds 0 and 1, respectively.
    max_malloc : int (`unsigned long long int` in C)
        The maximum amount of memory to be allocated using `malloc` when making
        a bitset to hold edges. An uncompressed bitset is implemented for
        holding edges that is significantly faster than alternatives. However,
        it is memory-inefficient and will not be used if more memory is required
        than `max_malloc`. Above the threshold, a Roaring bitset will be used.
    dtypes : dict
        Dictionary mapping returned column types to dtypes. Keys should be
        `'id'`, `'degree'`, `'edge'`, and `'xswap_prior'`. `dtype` need only
        be changed from its defaults if the values of `id` or `degree` are
        greater than the maxima in the default dtypes, or in cases where greater
        precision is desired. (`numpy.uint16` has a maximum value of 65535.)

    Returns
    -------
    prior_df : pandas.DataFrame
        Columns are the following:
        [source_id, target_id, edge, source_degree, target_degree, xswap_prior]
    """
    # Compute the adjacency matrix of the original (unpermuted) network
    original_edges = xswap.network_formats.edges_to_matrix(
        edge_list, add_reverse_edges=(not allow_antiparallel), shape=shape,
        dtype=dtypes['edge'], sparse=True)

    # Setup DataFrame for recording prior data
    prior_df = pandas.DataFrame({
        'source_id': numpy.repeat(numpy.arange(shape[0], dtype=dtypes['id']), shape[1]),
        'target_id': numpy.tile(numpy.arange(shape[1], dtype=dtypes['id']), shape[0]),
        'edge': original_edges.toarray().flatten(),
    })
    del original_edges

    prior_df['source_degree'] = (prior_df
                                 .groupby('source_id')
                                 .transform(sum)['edge']
                                 .astype(dtypes['degree']))
    del prior_df['source_id']

    prior_df['target_degree'] = (prior_df
                                 .groupby('target_id')
                                 .transform(sum)['edge']
                                 .astype(dtypes['degree']))
    del prior_df['target_id']

    # Compute the number of occurrences of each edge across permutations
    edge_counter = compute_xswap_occurrence_matrix(
        edge_list=edge_list, n_permutations=n_permutations, shape=shape,
        allow_self_loops=allow_self_loops, allow_antiparallel=allow_antiparallel,
        sparse=sparse, swap_multiplier=swap_multiplier, initial_seed=initial_seed,
        max_malloc=max_malloc)

    prior_df['num_permuted_edges'] = edge_counter.toarray().flatten()
    del edge_counter

    # The number of edges that occurred across all node pairs with the same
    # `source_degree` and `target_degree`
    dgp_edge_count = (
        prior_df
        .groupby(['source_degree', 'target_degree'])
        .transform(sum)['num_permuted_edges']
        .values
        .astype(dtypes['degree'])
    )
    del prior_df['num_permuted_edges']

    # The effective number of permutations for every node pair, incorporating
    # degree-grouping
    num_dgp = (
        n_permutations * prior_df.groupby(['source_degree', 'target_degree'])
                                 .transform(len)['edge']
                                 .values
    )
    xswap_prior = (dgp_edge_count / num_dgp).astype(dtypes['xswap_prior'])
    del dgp_edge_count, num_dgp

    prior_df['xswap_prior'] = xswap_prior
    del xswap_prior

    prior_df = (
        prior_df
        .assign(
            source_id=numpy.repeat(numpy.arange(shape[0], dtype=dtypes['id']), shape[1]),
            target_id=numpy.tile(numpy.arange(shape[1], dtype=dtypes['id']), shape[0]),
        )
        .filter(items=['source_id', 'target_id', 'edge', 'source_degree',
                       'target_degree', 'xswap_prior'])
    )
    return prior_df