Source code for rnaglib.dataset.rna

import os
import tempfile

from Bio.PDB.PDBList import PDBList

from rnaglib.prepare_data import build_graph_from_cif
from rnaglib.utils import get_default_download_dir, dump_json, load_graph


[docs] def rna_from_pdbid( pdbid, version="2.0.0", annotated=False, chop=False, redundancy="nr", download_dir=None, verbose=True, ): """Fetch an annotated graph with a PDBID. :param pdbid: PDB id to fetch :param version: database version to query :param graph_dir: path containing annotated graphs :return: RNA dictionary object. """ tag = f"rnaglib-{redundancy}-{version}{'-chop' if chop else ''}{'-' + 'annotated' if annotated else ''}" graph_path = None # Try in look into the existing data, we need to check for both annotated and graphs, as well as in each dl dl_dir = get_default_download_dir() graph_path = os.path.join(dl_dir, "datasets", tag, "graphs", f"{pdbid.lower()}.json") if not os.path.exists(graph_path): print( """The required pdb was not found in existing default downloads. Fetching PDB and annotating... If you want to use a local graph, pass a path to the `graph_dir` argument. """ ) pl = PDBList() with tempfile.TemporaryDirectory() as tmpdir: if download_dir is None: pdir = tmpdir cif_path = pl.retrieve_pdb_file(pdbid, pdir=pdir, file_format="mmCif") graph = build_graph_from_cif(cif_path, None) else: if verbose: print("Loading graph from local database...") graph = {"rna": load_graph(graph_path)} return graph
class RNA: def __init__(self, rna_dict: dict = None, pdbid: str = None, path: str = None, multigraph: bool = False): # check that only one of the three is provided self.multigraph = multigraph if sum([rna_dict is not None, pdbid is not None, path is not None]) != 1: raise ValueError("Only one of rna_dict, pdbid, or path must be provided") if rna_dict is not None: self.from_dict(rna_dict) elif pdbid is not None: self.from_pdbid(pdbid) elif path is not None: self.from_path(path) else: raise ValueError("No valid input provided") def from_dict(self, rna_dict: dict): for k, v in rna_dict.items(): if k == 'rna': for attr, val in rna_dict['rna'].graph.items(): setattr(self, k, v) else: setattr(self, k, v) def from_pdbid(self, pdbid: str): rna_dict = rna_from_pdbid(pdbid, multigraph=self.multigraph) self.from_dict(rna_dict) def to_dict(self): return self.rna_dict def save(self, path: str): dump_json(path, self.rna_dict) pass def from_path(self, path: str): self.rna_dict = load_graph(path, multigraph=self.multigraph) self.from_dict(self.rna_dict)