Source code for freqsap.dbsnp

"""Module for interacting with the dbSNP database API."""

from __future__ import annotations
import re
import requests
from freqsap.allele import Allele
from freqsap.interfaces import VariantFrequencyAPI
from freqsap.report import ReferenceSNPReport
from freqsap.study import Study
from freqsap.variation import Variation


[docs] class DBSNP(VariantFrequencyAPI): """Interface to interact with the dbSNP database to obtain frequency information for specific variants.""" def __init__(self, timeout: int = 10): """Initialize the DBSNP API interface. Sets up connection parameters and data parsing requirements. """
[docs] self._timeout = timeout
[docs] self._num_required_sections = 2
[docs] self._num_required_columns = 6
[docs] def get(self, variation: Variation) -> ReferenceSNPReport | None: """Get the ReferenceSNPReport for the given single amino-acid polymorphism. Args: variation (Variation): Variation for which to get the report. Returns: ReferenceSNPReport | None: Report if it is found on dbSNP, otherwise None. """ freq_url = f"https://www.ncbi.nlm.nih.gov/snp/{variation}/download/frequency" r = requests.get(freq_url, headers={"Accept": "application/json"}, timeout=self._timeout) sections = [re.split(r"\n+", x.strip()) for x in re.split(r"#Frequency Data Table", r.text)] if len(sections) < self._num_required_sections: return None metadata_section = sections[0] studies_section = sections[1] metadata_section.pop() studies_section.pop(0) metadata: dict = {} for entry in metadata_section: key, value = entry.strip("#").split("\t") metadata[key] = value studies: list[Study] = [] studies_section.pop(0).strip("#").split("\t") for entry in studies_section: tokens = entry.split("\t") if len(tokens) < self._num_required_columns: return None source = tokens[0] population = tokens[1] group = tokens[2] size = tokens[3] ref = tokens[4] alts = tokens[5] ref_nucelotide, ref_frequency = ref.split("=") reference = Allele(ref_nucelotide, ref_frequency) alternatives: list[Allele] = [] for alt in alts.split(","): alt_nucleotide, alt_frequency = alt.split("=") alternatives.append(Allele(alt_nucleotide, alt_frequency)) study = Study(source, population, group, size, reference, alternatives) studies.append(study) return ReferenceSNPReport(variation, metadata, studies)
[docs] def available(self) -> bool: """Check whether the service is available. Returns: bool: True if the service is available, False otherwise. """ # Placeholder implementation return True