Coverage for src/python/ensembl/io/genomio/schemas/json/factory.py: 84%

39 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-02-21 15:37 +0000

1# See the NOTICE file distributed with this work for additional information 

2# regarding copyright ownership. 

3# 

4# Licensed under the Apache License, Version 2.0 (the "License"); 

5# you may not use this file except in compliance with the License. 

6# You may obtain a copy of the License at 

7# 

8# http://www.apache.org/licenses/LICENSE-2.0 

9# 

10# Unless required by applicable law or agreed to in writing, software 

11# distributed under the License is distributed on an "AS IS" BASIS, 

12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

13# See the License for the specific language governing permissions and 

14# limitations under the License. 

15"""Generates one JSON file per metadata type inside `manifest`, including the manifest itself.""" 

16 

17__all__ = ["schema_factory"] 

18 

19import json 

20from os import PathLike 

21from pathlib import Path 

22import shutil 

23from typing import List 

24 

25import ensembl.io.genomio 

26from ensembl.utils.argparse import ArgumentParser 

27from ensembl.utils.logging import init_logging_with_args 

28 

29 

30def schema_factory(manifest_dir: PathLike, metadata_types: List[str], output_dir: PathLike) -> None: 

31 """Generates one JSON file per metadata type inside `manifest`, including "manifest.json" itself. 

32 

33 Each JSON file will have the file name of the metadata type, e.g. "seq_region.json". 

34 

35 Args: 

36 manifest_dir: Path to the folder with the manifest JSON file to check. 

37 metadata_types: Metadata types to extract from `manifest` as JSON files. 

38 output_dir: Path to the folder where to generate the JSON files. 

39 

40 """ 

41 manifest_path = Path(manifest_dir, "manifest.json") 

42 with manifest_path.open() as manifest_file: 

43 content = json.load(manifest_file) 

44 shutil.copyfile(manifest_path, Path(output_dir, "manifest.json")) 

45 json_files = {} 

46 # Use dir name from the manifest 

47 for name in content: 

48 if "file" in content[name]: 

49 file_name = content[name]["file"] 

50 json_files[name] = manifest_path.parent / file_name 

51 else: 

52 for key in content[name]: 

53 if "file" in content[name][key]: 

54 file_name = content[name][key]["file"] 

55 json_files[name] = {key: manifest_path.parent / file_name} 

56 # Check the other JSON schemas 

57 for metadata_key in metadata_types: 

58 if metadata_key in json_files: 

59 if isinstance(json_files[metadata_key], dict): 

60 for key, filepath in json_files[metadata_key].items(): 

61 shutil.copyfile(filepath, Path(output_dir, f"{metadata_key}_{key}.json")) 

62 else: 

63 shutil.copyfile(json_files[metadata_key], Path(output_dir, f"{metadata_key}.json")) 

64 

65 

66def main() -> None: 

67 """Main script entry-point.""" 

68 parser = ArgumentParser( 

69 description="Generates one JSON file per metadata type in the provided manifest, including itself." 

70 ) 

71 parser.add_argument_src_path( 

72 "--manifest_dir", required=True, help="Folder containing the 'manifest.json' file to check" 

73 ) 

74 parser.add_argument( 

75 "--metadata_types", required=True, nargs="+", metavar="TYPE", help="Metadata types to extract" 

76 ) 

77 parser.add_argument_dst_path( 

78 "--output_dir", default=Path.cwd(), help="Folder to store the produced files" 

79 ) 

80 parser.add_argument("--version", action="version", version=ensembl.io.genomio.__version__) 

81 parser.add_log_arguments() 

82 args = parser.parse_args() 

83 init_logging_with_args(args) 

84 

85 schema_factory(**vars(args))