Source code for magine.networks.databases.hmdb

import logging
import os

import networkx as nx

from magine.data.storage import network_data_dir
from magine.logging import get_logger

logger = get_logger(__name__, log_level=logging.INFO)


[docs]def load_hmdb_network(fresh_download=False): """ Create HMDB network containing all metabolite-protein interactions Parameters ---------- fresh_download : bool Download fresh copy from HMDB verbose : bool Returns ------- nx.DiGraph """ out_name = os.path.join(network_data_dir, 'hmdb_graph.p.gz') if not fresh_download and os.path.exists(out_name): tmp_graph = nx.read_gpickle(out_name) else: from magine.mappings.chemical_mapper import ChemicalMapper cm = ChemicalMapper() tmp_graph = nx.DiGraph() def _add_node(node, node_type): attrs = {'databaseSource': 'HMDB', 'speciesType': node_type} if node_type == 'compound': if node in cm.hmdb_to_chem_name: attrs['chemName'] = sorted(cm.hmdb_to_chem_name[node])[0] tmp_graph.add_node(node, **attrs) for source, genes in cm.hmdb_main_to_protein.items(): if source == '': continue _add_node(source, 'compound') for target in genes: if target == '': continue _add_node(target, 'gene') tmp_graph.add_edge(source, target, interactionType='chemical', databaseSource='HMDB') nx.write_gpickle(tmp_graph, out_name) logger.info("HMDB : {} nodes and {} edges".format( len(tmp_graph.nodes), len(tmp_graph.edges)) ) return tmp_graph
if __name__ == '__main__': load_hmdb_network(True, True)