Coverage for src/python/ensembl/io/genomio/events/format.py: 0%
53 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-21 15:37 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-02-21 15:37 +0000
1# See the NOTICE file distributed with this work for additional information
2# regarding copyright ownership.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Module to map stable ids in a file, given a mapping."""
17__all__ = ["IdsMapper", "load_list"]
19from os import PathLike
20from pathlib import Path
21import re
22from typing import Dict, List
24import ensembl.io.genomio
25from ensembl.io.genomio.events.load import EventCollection
26from ensembl.utils.argparse import ArgumentParser
27from ensembl.utils.logging import init_logging_with_args
30class IdsMapper:
31 """Simple mapper object, to cleanly get a mapping dict."""
33 def __init__(self, map_file: PathLike) -> None:
34 self.map = self._load_mapping(Path(map_file))
36 def _load_mapping(self, map_file: Path) -> Dict[str, str]:
37 """Return a mapping in a simple dict from a tab file with 2 columns: from_id, to_id.
39 Args:
40 map_file: Tab file path.
41 """
42 mapping = {}
43 with map_file.open("r") as map_fh:
44 for line in map_fh:
45 if line == "":
46 continue
47 items = line.split("\t")
48 if len(items) < 2:
49 raise ValueError(f"Not 2 elements in {line}")
50 (from_id, to_id) = items[0:2]
51 mapping[from_id] = to_id
53 return mapping
56def load_list(list_file: Path) -> List[str]:
57 """Return a simple list from a file."""
58 items = set()
59 empty_spaces = re.compile(r"\s+")
60 with Path(list_file).open("r") as map_fh:
61 for line in map_fh:
62 line = re.sub(empty_spaces, "", line)
63 if line == "":
64 continue
65 items.add(line)
67 return list(items)
70def main() -> None:
71 """Main entrypoint"""
72 parser = ArgumentParser(description="Map stable IDs in a file and produce an events file.")
73 parser.add_argument_src_path("--input_file", required=True, help="Input file from gene_diff")
74 parser.add_argument_src_path(
75 "--deletes_file", required=True, help="Deleted genes file (apart from the deletes from the gene diff)"
76 )
77 parser.add_argument_src_path(
78 "--map_file", required=True, help="Mapping tab file with 2 columns: old_id, new_id"
79 )
80 parser.add_argument("--release_name", required=True, metavar="NAME", help="Release name for all events")
81 parser.add_argument("--release_date", required=True, metavar="DATE", help="Release date for all events")
82 parser.add_argument_dst_path("--output_file", required=True, help="Output formatted event file")
83 parser.add_argument("--version", action="version", version=ensembl.io.genomio.__version__)
84 parser.add_log_arguments()
85 args = parser.parse_args()
86 init_logging_with_args(args)
88 events = EventCollection()
89 deleted_genes = load_list(args.deletes_file)
90 events.add_deletes(deleted_genes, args.release_name, args.release_date)
91 events.load_events_from_gene_diff(args.input_file, args.release_name, args.release_date)
92 mapper = IdsMapper(args.map_file)
93 events.remap_to_ids(mapper.map)
94 events.write_events_to_file(args.output_file)
97if __name__ == "__main__":
98 main()