Source code for magine.networks.databases.signor

import logging
import os

import networkx as nx
import pandas as pd

from magine.data.storage import network_data_dir
from magine.networks.standards import edge_standards

_p_name = os.path.join(network_data_dir, 'signor.p.gz')
from magine.logging import get_logger

logger = get_logger(__name__, log_level=logging.INFO)


[docs]def download_signor(): logger.info("Downloading SIGNOR") col_names = [ 'ENTITYA', 'TYPEA', 'IDA', 'DATABASEA', 'ENTITYB', 'TYPEB', 'IDB', 'DATABASEB', 'EFFECT', 'MECHANISM', 'RESIDUE', 'SEQUENCE', 'TAX_ID', 'CELL_DATA', 'TISSUE_DATA', 'MODULATOR_COMPLEX', 'TARGET_COMPLEX', 'MODIFICATIONA', 'MODASEQ', 'MODIFICATIONB', 'MODBSEQ', 'PMID', 'DIRECT', 'SENTENCE', 'SIGNOR_ID', 'NA1', 'NA2', 'NA3'] table = pd.read_csv('https://signor.uniroma2.it/getData.php?organism=9606', names=col_names, delimiter='\t', index_col=None, error_bad_lines=False, encoding='utf-8' ) # filter out non direct table = table.loc[table['DIRECT'] == 't'] # Filter out non descriptive table = table.loc[~table['MECHANISM'].isnull()] # Drop SIGNOR edges, these are generally complexes table = table[~(table['DATABASEA'] == 'SIGNOR')] table = table[~(table['DATABASEB'] == 'SIGNOR')] # Not sure what they mean, so will remove. Ideally other DBs have this info table = table[~(table['MECHANISM'] == 'post transcriptional regulation')] col_a = ['ENTITYA', 'TYPEA', 'IDA', 'DATABASEA'] col_b = ['ENTITYB', 'TYPEB', 'IDB', 'DATABASEB'] cols = ['name', 'species_type', 'id', 'db'] species_a = table[col_a].copy() species_b = table[col_b].copy() species_a.rename(columns={i: j for i, j in zip(col_a, cols)}, inplace=True) species_b.rename(columns={i: j for i, j in zip(col_b, cols)}, inplace=True) species_a.drop_duplicates(inplace=True) species_b.drop_duplicates(inplace=True) all_species = pd.concat([species_a, species_b]) all_species.drop_duplicates(inplace=True) def map_to_activate_inhibit(row): effect = '' mechanism = row['MECHANISM'] if 'down-regulates' in row['EFFECT']: effect = 'inhibit' elif 'up-regulates' in row['EFFECT']: effect = 'activate' if mechanism in edge_standards: mechanism = edge_standards[mechanism] elif mechanism == 'transcriptional regulation': if effect == 'inhibit': mechanism = 'repression' elif effect == 'activate': mechanism = 'expression' if effect == '': return mechanism else: return "|".join([effect, mechanism]) # relabel edge types table['interactionType'] = table.apply(map_to_activate_inhibit, axis=1) table['databaseSource'] = 'SIGNOR' table['pmid'] = table['PMID'] table['source'] = table['ENTITYA'] table['target'] = table['ENTITYB'] protein_graph = nx.from_pandas_edgelist( table, 'source', 'target', edge_attr=['interactionType', 'databaseSource'], create_using=nx.DiGraph() ) # add names to graph for row in all_species.values: name, species_type, id_name, db = row if species_type != 'protein': species_type = 'compound' if species_type == 'protein': species_type = 'gene' protein_graph.add_node(name, databaseSource='SIGNOR', speciesType=species_type) nx.write_gpickle(protein_graph, _p_name) logger.info("Done downloading SIGNOR")
[docs]def load_signor(fresh_download=False): """ Load reactome functional interaction network Parameters ---------- fresh_download: bool Download fresh network verbose : bool Returns ------- nx.DiGraph """ if not os.path.exists(_p_name) or fresh_download: print("Downloading Signor network!") download_signor() if not os.path.exists(_p_name): raise FileNotFoundError("Error downloading reactome FI. ") tmp_graph = nx.read_gpickle(_p_name) logger.info("SIGNOR : {} nodes and {} edges".format(len(tmp_graph.nodes), len(tmp_graph.edges))) return tmp_graph
if __name__ == '__main__': download_signor()