Spaces:
Runtime error
Runtime error
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # | |
| # This source code is licensed under the license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| import typing | |
| import zipfile | |
| from dataclasses import dataclass | |
| from functools import lru_cache | |
| from typing_extensions import Literal | |
| DEFAULT_SIZE = 32 | |
| MODE = Literal['r', 'w', 'x', 'a'] | |
| class PathInZip: | |
| """Class for holding a path of file within a zip file. | |
| Args: | |
| path: The convention is <path_to_zip>:<relative_path_inside_zip> | |
| Let's assume there is a zip file /some/location/foo.zip | |
| and inside of it is a json file located at /data/file1.json, | |
| Then we expect path = "/some/location/foo.zip:/data/file1.json" | |
| """ | |
| INFO_PATH_SEP = ':' | |
| zip_path: str | |
| file_path: str | |
| def __init__(self, path: str) -> None: | |
| split_path = path.split(self.INFO_PATH_SEP) | |
| assert len(split_path) == 2 | |
| self.zip_path, self.file_path = split_path | |
| def from_paths(cls, zip_path: str, file_path: str): | |
| return cls(zip_path + cls.INFO_PATH_SEP + file_path) | |
| def __str__(self) -> str: | |
| return self.zip_path + self.INFO_PATH_SEP + self.file_path | |
| def _open_zip(path: str, mode: MODE = 'r'): | |
| return zipfile.ZipFile(path, mode) | |
| _cached_open_zip = lru_cache(DEFAULT_SIZE)(_open_zip) | |
| def set_zip_cache_size(max_size: int): | |
| """Sets the maximal LRU caching for zip file opening. | |
| Args: | |
| max_size: the maximal LRU cache. | |
| """ | |
| global _cached_open_zip | |
| _cached_open_zip = lru_cache(max_size)(_open_zip) | |
| def open_file_in_zip(path_in_zip: PathInZip, mode: str = 'r') -> typing.IO: | |
| """Opens a file stored inside a zip and returns a file-like object. | |
| Args: | |
| path_in_zip: A PathInZip object representing the file to return a file-like object of. | |
| mode: The mode in which to open the file with. | |
| Returns: | |
| A file-like object for PathInZip. | |
| """ | |
| zf = _cached_open_zip(path_in_zip.zip_path) | |
| return zf.open(path_in_zip.file_path) | |