Coverage for src/python/ensembl/io/genomio/external_db/db_map.py: 100%
20 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-21 15:37 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-21 15:37 +0000
1# See the NOTICE file distributed with this work for additional information
2# regarding copyright ownership.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Get a mapping for external db names."""
17__all__ = [
18 "DEFAULT_EXTERNAL_DB_MAP",
19 "MapFormatError",
20 "get_external_db_map",
21]
23from importlib.resources import as_file, files
24from pathlib import Path
26# Provide the default map from a data file
27default_map_res = files("ensembl.io.genomio.data.external_db_map").joinpath("default.txt")
28with as_file(default_map_res) as default_map_path:
29 DEFAULT_EXTERNAL_DB_MAP = default_map_path
32class MapFormatError(ValueError):
33 """Error when parsing the db map file."""
36def get_external_db_map(map_file: Path) -> dict[str, str]:
37 """Get an external_db map from a tab file without header.
39 Empty lines and comments (lines starting with #) are ignored.
40 The first 2 columns are expected to be the main name, and the alternative name. Any other columns
41 after that are ignored.
43 Args:
44 map_file: Path to a file with external DB mapping.
46 Returns:
47 Dict with keys as alternate names, and values as standard name.
49 """
50 db_map: dict[str, str] = {}
51 with map_file.open("r") as map_fh:
52 for line in map_fh:
53 line = line.rstrip()
54 if line.startswith("#") or line.startswith(" ") or line == "":
55 continue
56 parts = line.split("\t")
57 if len(parts) < 2:
58 raise MapFormatError(f"External db file is not formatted correctly for: {line}")
59 (main_name, alt_name) = parts[0:2]
60 db_map[alt_name] = main_name
61 return db_map