Coverage for src/python/ensembl/io/genomio/events/format.py: 0%

53 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-02-21 15:37 +0000

1# See the NOTICE file distributed with this work for additional information 

2# regarding copyright ownership. 

3# 

4# Licensed under the Apache License, Version 2.0 (the "License"); 

5# you may not use this file except in compliance with the License. 

6# You may obtain a copy of the License at 

7# 

8# http://www.apache.org/licenses/LICENSE-2.0 

9# 

10# Unless required by applicable law or agreed to in writing, software 

11# distributed under the License is distributed on an "AS IS" BASIS, 

12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

13# See the License for the specific language governing permissions and 

14# limitations under the License. 

15"""Module to map stable ids in a file, given a mapping.""" 

16 

17__all__ = ["IdsMapper", "load_list"] 

18 

19from os import PathLike 

20from pathlib import Path 

21import re 

22from typing import Dict, List 

23 

24import ensembl.io.genomio 

25from ensembl.io.genomio.events.load import EventCollection 

26from ensembl.utils.argparse import ArgumentParser 

27from ensembl.utils.logging import init_logging_with_args 

28 

29 

30class IdsMapper: 

31 """Simple mapper object, to cleanly get a mapping dict.""" 

32 

33 def __init__(self, map_file: PathLike) -> None: 

34 self.map = self._load_mapping(Path(map_file)) 

35 

36 def _load_mapping(self, map_file: Path) -> Dict[str, str]: 

37 """Return a mapping in a simple dict from a tab file with 2 columns: from_id, to_id. 

38 

39 Args: 

40 map_file: Tab file path. 

41 """ 

42 mapping = {} 

43 with map_file.open("r") as map_fh: 

44 for line in map_fh: 

45 if line == "": 

46 continue 

47 items = line.split("\t") 

48 if len(items) < 2: 

49 raise ValueError(f"Not 2 elements in {line}") 

50 (from_id, to_id) = items[0:2] 

51 mapping[from_id] = to_id 

52 

53 return mapping 

54 

55 

56def load_list(list_file: Path) -> List[str]: 

57 """Return a simple list from a file.""" 

58 items = set() 

59 empty_spaces = re.compile(r"\s+") 

60 with Path(list_file).open("r") as map_fh: 

61 for line in map_fh: 

62 line = re.sub(empty_spaces, "", line) 

63 if line == "": 

64 continue 

65 items.add(line) 

66 

67 return list(items) 

68 

69 

70def main() -> None: 

71 """Main entrypoint""" 

72 parser = ArgumentParser(description="Map stable IDs in a file and produce an events file.") 

73 parser.add_argument_src_path("--input_file", required=True, help="Input file from gene_diff") 

74 parser.add_argument_src_path( 

75 "--deletes_file", required=True, help="Deleted genes file (apart from the deletes from the gene diff)" 

76 ) 

77 parser.add_argument_src_path( 

78 "--map_file", required=True, help="Mapping tab file with 2 columns: old_id, new_id" 

79 ) 

80 parser.add_argument("--release_name", required=True, metavar="NAME", help="Release name for all events") 

81 parser.add_argument("--release_date", required=True, metavar="DATE", help="Release date for all events") 

82 parser.add_argument_dst_path("--output_file", required=True, help="Output formatted event file") 

83 parser.add_argument("--version", action="version", version=ensembl.io.genomio.__version__) 

84 parser.add_log_arguments() 

85 args = parser.parse_args() 

86 init_logging_with_args(args) 

87 

88 events = EventCollection() 

89 deleted_genes = load_list(args.deletes_file) 

90 events.add_deletes(deleted_genes, args.release_name, args.release_date) 

91 events.load_events_from_gene_diff(args.input_file, args.release_name, args.release_date) 

92 mapper = IdsMapper(args.map_file) 

93 events.remap_to_ids(mapper.map) 

94 events.write_events_to_file(args.output_file) 

95 

96 

97if __name__ == "__main__": 

98 main()