Coverage for src/python/ensembl/io/genomio/genbank/download.py: 76%
32 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-21 15:37 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-21 15:37 +0000
1# See the NOTICE file distributed with this work for additional information
2# regarding copyright ownership.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Download a Genbank file from NCBI from an accession."""
17__all__ = ["DownloadError", "download_genbank"]
19import logging
20from os import PathLike
21from pathlib import Path
23import requests
25import ensembl.io.genomio
26from ensembl.utils.argparse import ArgumentParser
27from ensembl.utils.logging import init_logging_with_args
30class DownloadError(Exception):
31 """In case a download failed."""
33 def __init__(self, msg: str) -> None:
34 self.msg = msg
37def download_genbank(accession: str, output_file: PathLike) -> None:
38 """Given a GenBank accession, download the corresponding file in GenBank format.
40 Uses NCBI Entrez service to fetch the data.
42 Args:
43 accession: INSDC Genbank record accession.
44 output_file: Path to the downloaded record in Genbank format.
46 Raises:
47 DownloadError: If the download fails.
49 """
51 # Get the list of assemblies for this accession
52 entrez_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
53 entrez_params = {
54 "db": "nuccore",
55 "rettype": "gbwithparts",
56 "retmode": "text",
57 }
58 entrez_params["id"] = accession
59 logging.debug(f"Getting file from {entrez_url} with params {entrez_params}")
60 result = requests.get(entrez_url, params=entrez_params, timeout=60)
61 if result and result.status_code == 200:
62 with Path(output_file).open("wb") as gbff:
63 gbff.write(result.content)
64 logging.info(f"GenBank file written to {output_file}")
65 return
66 raise DownloadError(f"Could not download the genbank ({accession}) file: {result}")
69def main() -> None:
70 """Main script entry-point."""
71 parser = ArgumentParser(description="Download a sequence from GenBank.")
72 parser.add_argument("--accession", required=True, help="Sequence accession")
73 parser.add_argument_dst_path("--output_file", required=True, help="Output GenBank file")
74 parser.add_argument("--version", action="version", version=ensembl.io.genomio.__version__)
75 parser.add_log_arguments()
76 args = parser.parse_args()
77 init_logging_with_args(args)
79 download_genbank(accession=args.accession, output_file=args.output_file)
82if __name__ == "__main__":
83 main()