Coverage for src/python/ensembl/io/genomio/external_db/db_map.py: 100%

20 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-02-21 15:37 +0000

1# See the NOTICE file distributed with this work for additional information 

2# regarding copyright ownership. 

3# 

4# Licensed under the Apache License, Version 2.0 (the "License"); 

5# you may not use this file except in compliance with the License. 

6# You may obtain a copy of the License at 

7# 

8# http://www.apache.org/licenses/LICENSE-2.0 

9# 

10# Unless required by applicable law or agreed to in writing, software 

11# distributed under the License is distributed on an "AS IS" BASIS, 

12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

13# See the License for the specific language governing permissions and 

14# limitations under the License. 

15"""Get a mapping for external db names.""" 

16 

17__all__ = [ 

18 "DEFAULT_EXTERNAL_DB_MAP", 

19 "MapFormatError", 

20 "get_external_db_map", 

21] 

22 

23from importlib.resources import as_file, files 

24from pathlib import Path 

25 

26# Provide the default map from a data file 

27default_map_res = files("ensembl.io.genomio.data.external_db_map").joinpath("default.txt") 

28with as_file(default_map_res) as default_map_path: 

29 DEFAULT_EXTERNAL_DB_MAP = default_map_path 

30 

31 

32class MapFormatError(ValueError): 

33 """Error when parsing the db map file.""" 

34 

35 

36def get_external_db_map(map_file: Path) -> dict[str, str]: 

37 """Get an external_db map from a tab file without header. 

38 

39 Empty lines and comments (lines starting with #) are ignored. 

40 The first 2 columns are expected to be the main name, and the alternative name. Any other columns 

41 after that are ignored. 

42 

43 Args: 

44 map_file: Path to a file with external DB mapping. 

45 

46 Returns: 

47 Dict with keys as alternate names, and values as standard name. 

48 

49 """ 

50 db_map: dict[str, str] = {} 

51 with map_file.open("r") as map_fh: 

52 for line in map_fh: 

53 line = line.rstrip() 

54 if line.startswith("#") or line.startswith(" ") or line == "": 

55 continue 

56 parts = line.split("\t") 

57 if len(parts) < 2: 

58 raise MapFormatError(f"External db file is not formatted correctly for: {line}") 

59 (main_name, alt_name) = parts[0:2] 

60 db_map[alt_name] = main_name 

61 return db_map