Source code for freqsap.__main__

"""Command-line interface for the freqsap package.

This module provides the main entry point for querying protein variants
and their population frequencies from various databases.
"""

from __future__ import annotations
import argparse
import csv
import sys
from pathlib import Path
from openpyxl import Workbook
from freqsap import __version__
from freqsap.accession import Accession
from freqsap.dbsnp import DBSNP
from freqsap.ebi import EBI
from freqsap.interfaces import ProteinVariantAPI
from freqsap.interfaces import VariantFrequencyAPI
from freqsap.report import PopulationFilter
from freqsap.report import ReferenceSNPReport


[docs] def parse_args() -> argparse.Namespace: """Parse command line arguments. Returns: argparse.Namespace: Parsed command line arguments containing: - accession: Protein accession identifier - output: Path to output file - protein_api: Protein variant API choice (uniprot or ebi) - frequency_api: Variant frequency API choice (dbsnp) - delimiter: Output file delimiter (default: tab) """ parser = argparse.ArgumentParser(description="Query protein variants and their frequencies from various databases.") parser.add_argument( "-a", "--accession", type=str, required=True, help="Protein accession number.", ) parser.add_argument( "-r", "--regions", type=str, required=True, help="Comma-separated list of regions.", ) parser.add_argument( "-d", "--delimiter", type=str, default="\t", help="Delimiter for output file (default: tab). Use 'xlsx' to output Excel format. Supports escape sequences like \\t, \\n, etc.", ) parser.add_argument( "-o", "--output-file", type=str, help="Output file name.", ) parser.add_argument( "-t", "--timeout", type=int, default=30, help="Timeout parameter for the REST APIs.", ) parser.add_argument( "-v", "--version", action="version", version=f"%(prog)s {__version__}", help="Show CLI version and exit.", ) args = parser.parse_args() # Convert escape sequences in delimiter args.delimiter = args.delimiter.encode().decode("unicode_escape") return args
[docs] def write_reports(reports: list[ReferenceSNPReport], regions: list[str], output_path: str, delimiter: str) -> None: r"""Write all reports to the output file in delimited format. Args: reports: List of ReferenceSNPReport objects to write regions: List of region populations to report. output_path: Path to the output file delimiter: Character to use as field delimiter (e.g., '\t' or ','). Use 'xlsx' for Excel format. Returns: None Raises: IOError: If the output file cannot be written IndexError: If reports list is empty """ header = reports[0].header() for report in reports: other = report.header() if header < other: header.extend(other[len(header) :]) if delimiter.lower() == "xlsx": _write_xlsx(reports, regions, output_path, header) else: _write_csv(reports, regions, output_path, header, delimiter)
[docs] def _write_csv( reports: list[ReferenceSNPReport], regions: list[str], output_path: str, header: list[str], delimiter: str, ) -> None: """Write reports to a delimited text file (CSV/TSV). Args: reports: List of ReferenceSNPReport objects to write regions: List of populations to report. output_path: Path to the output file header: List of column headers delimiter: Character to use as field delimiter Returns: None """ with Path.open(output_path, "w") as file: writer = csv.DictWriter(file, fieldnames=header, delimiter=delimiter, extrasaction="ignore") writer.writeheader() for report in reports: rows = PopulationFilter.apply(regions, report) writer.writerows(rows)
[docs] def _write_xlsx(reports: list[ReferenceSNPReport], regions: list[str], output_path: str, header: list[str]) -> None: """Write reports to an Excel file (XLSX format). Args: reports: List of ReferenceSNPReport objects to write regions: List of region populations to report. output_path: Path to the output file header: List of column headers Returns: None Raises: ImportError: If openpyxl is not installed """ wb = Workbook() ws = wb.active ws.title = "Variants" # Write header ws.append(header) # Write data rows for report in reports: rows = PopulationFilter.apply(regions, report) for row_dict in rows: # Convert dict to list in the correct order according to header row = [row_dict.get(col, "") for col in header] ws.append(row) wb.save(output_path)
[docs] def check_apis(protein_api: ProteinVariantAPI, frequency_api: VariantFrequencyAPI) -> None: """Check if the chosen APIs are available. Args: protein_api: Instance of the protein variant API frequency_api: Instance of the variant frequency API Returns: None Raises: SystemExit: If either API is not available """ if not protein_api.available(): sys.exit(1) if not frequency_api.available(): sys.exit(1)
[docs] def main() -> None: """Main entry point for the freqsap application. This function orchestrates the entire workflow: 1. Parses command line arguments 2. Instantiates the chosen protein and frequency APIs 3. Validates API availability 4. Queries protein variants using the accession 5. Collects frequency reports for all variants 6. Writes results to the specified output file Returns: None Raises: SystemExit: If APIs are unavailable or other errors occur """ args = parse_args() # Instantiate chosen APIs protein_api = EBI(timeout=args.timeout) frequency_api = DBSNP(timeout=args.timeout) # Check if APIs are available check_apis(protein_api, frequency_api) # Query protein variants accession = Accession(args.accession) protein = protein_api.get(accession) # Collect frequency reports for all variants reports: list[ReferenceSNPReport] = list( filter(None, [frequency_api.get(variation) for variation in protein.variations]), ) # Write reports to output file write_reports(reports, args.regions.split(","), args.output_file, args.delimiter)
if __name__ == "__main__": main()