ayousanz commited on Dec 10, 2024

Commit

f96afdc

verified ·

1 Parent(s): 978c416

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/INSTALLER +1 -0
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/LICENSE +29 -0
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/METADATA +167 -0
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/RECORD +56 -0
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/REQUESTED +0 -0
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/WHEEL +5 -0
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/top_level.txt +1 -0
.venv/Lib/site-packages/fsspec/implementations/__init__.py +0 -0
.venv/Lib/site-packages/fsspec/implementations/jupyter.py +124 -0
.venv/Lib/site-packages/fsspec/implementations/libarchive.py +213 -0
.venv/Lib/site-packages/fsspec/implementations/local.py +418 -0
.venv/Lib/site-packages/fsspec/implementations/memory.py +292 -0
.venv/Lib/site-packages/fsspec/implementations/reference.py +1160 -0
.venv/Lib/site-packages/fsspec/implementations/sftp.py +180 -0
.venv/Lib/site-packages/fsspec/implementations/smb.py +324 -0
.venv/Lib/site-packages/fsspec/implementations/tar.py +124 -0
.venv/Lib/site-packages/fsspec/implementations/webhdfs.py +486 -0
.venv/Lib/site-packages/fsspec/implementations/zip.py +133 -0
.venv/Lib/site-packages/fsspec/tests/abstract/__init__.py +287 -0
.venv/Lib/site-packages/fsspec/tests/abstract/common.py +175 -0
.venv/Lib/site-packages/fsspec/tests/abstract/copy.py +557 -0
.venv/Lib/site-packages/fsspec/tests/abstract/get.py +587 -0
.venv/Lib/site-packages/fsspec/tests/abstract/put.py +591 -0
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/INSTALLER +1 -0
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/LICENSE +21 -0
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/LICENSE.mecab +29 -0
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/METADATA +157 -0
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/RECORD +16 -0
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/REQUESTED +0 -0
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/WHEEL +5 -0
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/entry_points.txt +4 -0
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/top_level.txt +1 -0
.venv/Lib/site-packages/fugashi/__init__.py +2 -0
.venv/Lib/site-packages/fugashi/__pycache__/__init__.cpython-39.pyc +0 -0
.venv/Lib/site-packages/fugashi/cli.py +47 -0
.venv/Lib/site-packages/fugashi/fugashi.cp39-win_amd64.pyd +0 -0
.venv/Lib/site-packages/functorch/_C.cp39-win_amd64.pyd +0 -0
.venv/Lib/site-packages/functorch/__init__.py +39 -0
.venv/Lib/site-packages/functorch/_src/make_functional/__init__.py +4 -0
.venv/Lib/site-packages/functorch/_src/vmap/__init__.py +16 -0
.venv/Lib/site-packages/functorch/compile/__init__.py +30 -0
.venv/Lib/site-packages/functorch/dim/batch_tensor.py +26 -0
.venv/Lib/site-packages/functorch/dim/delayed_mul_tensor.py +77 -0
.venv/Lib/site-packages/functorch/dim/dim.py +121 -0
.venv/Lib/site-packages/functorch/dim/magic_trace.py +42 -0
.venv/Lib/site-packages/functorch/dim/op_properties.py +312 -0
.venv/Lib/site-packages/functorch/dim/reference.py +645 -0
.venv/Lib/site-packages/functorch/dim/tree_map.py +15 -0
.venv/Lib/site-packages/functorch/dim/wrap_type.py +72 -0
.venv/Lib/site-packages/huggingface_hub/__init__.py +1002 -0

.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ uv

.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/LICENSE ADDED Viewed

	@@ -0,0 +1,29 @@

+BSD 3-Clause License
+Copyright (c) 2018, Martin Durant
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,167 @@

+Metadata-Version: 2.1
+Name: fsspec
+Version: 2024.2.0
+Summary: File-system specification
+Home-page: https://github.com/fsspec/filesystem_spec
+Maintainer: Martin Durant
+Maintainer-email: [email protected]
+License: BSD
+Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
+Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
+Keywords: file
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Provides-Extra: abfs
+Requires-Dist: adlfs ; extra == 'abfs'
+Provides-Extra: adl
+Requires-Dist: adlfs ; extra == 'adl'
+Provides-Extra: arrow
+Requires-Dist: pyarrow >=1 ; extra == 'arrow'
+Provides-Extra: dask
+Requires-Dist: dask ; extra == 'dask'
+Requires-Dist: distributed ; extra == 'dask'
+Provides-Extra: devel
+Requires-Dist: pytest ; extra == 'devel'
+Requires-Dist: pytest-cov ; extra == 'devel'
+Provides-Extra: dropbox
+Requires-Dist: dropboxdrivefs ; extra == 'dropbox'
+Requires-Dist: requests ; extra == 'dropbox'
+Requires-Dist: dropbox ; extra == 'dropbox'
+Provides-Extra: entrypoints
+Provides-Extra: full
+Requires-Dist: adlfs ; extra == 'full'
+Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'full'
+Requires-Dist: dask ; extra == 'full'
+Requires-Dist: distributed ; extra == 'full'
+Requires-Dist: dropbox ; extra == 'full'
+Requires-Dist: dropboxdrivefs ; extra == 'full'
+Requires-Dist: fusepy ; extra == 'full'
+Requires-Dist: gcsfs ; extra == 'full'
+Requires-Dist: libarchive-c ; extra == 'full'
+Requires-Dist: ocifs ; extra == 'full'
+Requires-Dist: panel ; extra == 'full'
+Requires-Dist: paramiko ; extra == 'full'
+Requires-Dist: pyarrow >=1 ; extra == 'full'
+Requires-Dist: pygit2 ; extra == 'full'
+Requires-Dist: requests ; extra == 'full'
+Requires-Dist: s3fs ; extra == 'full'
+Requires-Dist: smbprotocol ; extra == 'full'
+Requires-Dist: tqdm ; extra == 'full'
+Provides-Extra: fuse
+Requires-Dist: fusepy ; extra == 'fuse'
+Provides-Extra: gcs
+Requires-Dist: gcsfs ; extra == 'gcs'
+Provides-Extra: git
+Requires-Dist: pygit2 ; extra == 'git'
+Provides-Extra: github
+Requires-Dist: requests ; extra == 'github'
+Provides-Extra: gs
+Requires-Dist: gcsfs ; extra == 'gs'
+Provides-Extra: gui
+Requires-Dist: panel ; extra == 'gui'
+Provides-Extra: hdfs
+Requires-Dist: pyarrow >=1 ; extra == 'hdfs'
+Provides-Extra: http
+Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'http'
+Provides-Extra: libarchive
+Requires-Dist: libarchive-c ; extra == 'libarchive'
+Provides-Extra: oci
+Requires-Dist: ocifs ; extra == 'oci'
+Provides-Extra: s3
+Requires-Dist: s3fs ; extra == 's3'
+Provides-Extra: sftp
+Requires-Dist: paramiko ; extra == 'sftp'
+Provides-Extra: smb
+Requires-Dist: smbprotocol ; extra == 'smb'
+Provides-Extra: ssh
+Requires-Dist: paramiko ; extra == 'ssh'
+Provides-Extra: tqdm
+Requires-Dist: tqdm ; extra == 'tqdm'
+# filesystem_spec
+[![PyPI version](https://badge.fury.io/py/fsspec.svg)](https://pypi.python.org/pypi/fsspec/)
+[![Anaconda-Server Badge](https://anaconda.org/conda-forge/fsspec/badges/version.svg)](https://anaconda.org/conda-forge/fsspec)
+![Build](https://github.com/fsspec/filesystem_spec/workflows/CI/badge.svg)
+[![Docs](https://readthedocs.org/projects/filesystem-spec/badge/?version=latest)](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
+[![PyPi downloads](https://img.shields.io/pypi/dm/fsspec?label=pypi%20downloads&style=flat)](https://pepy.tech/project/fsspec)
+A specification for pythonic filesystems.
+## Install
+```bash
+pip install fsspec
+```
+would install the base fsspec. Various optionally supported features might require specification of custom
+extra require, e.g. `pip install fsspec[ssh]` will install dependencies for `ssh` backends support.
+Use `pip install fsspec[full]` for installation of all known extra dependencies.
+Up-to-date package also provided through conda-forge distribution:
+```bash
+conda install -c conda-forge fsspec
+```
+## Purpose
+To produce a template or specification for a file-system interface, that specific implementations should follow,
+so that applications making use of them can rely on a common behaviour and not have to worry about the specific
+internal implementation decisions with any given backend. Many such implementations are included in this package,
+or in sister projects such as `s3fs` and `gcsfs`.
+In addition, if this is well-designed, then additional functionality, such as a key-value store or FUSE
+mounting of the file-system implementation may be available for all implementations "for free".
+## Documentation
+Please refer to [RTD](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
+## Develop
+fsspec uses GitHub Actions for CI. Environment files can be found
+in the "ci/" directory. Note that the main environment is called "py38",
+but it is expected that the version of python installed be adjustable at
+CI runtime. For local use, pick a version suitable for you.
+### Testing
+Tests can be run in the dev environment, if activated, via ``pytest fsspec``.
+The full fsspec suite requires a system-level docker, docker-compose, and fuse
+installation. If only making changes to one backend implementation, it is
+not generally necessary to run all tests locally.
+It is expected that contributors ensure that any change to fsspec does not
+cause issues or regressions for either other fsspec-related packages such
+as gcsfs and s3fs, nor for downstream users of fsspec. The "downstream" CI
+run and corresponding environment file run a set of tests from the dask
+test suite, and very minimal tests against pandas and zarr from the
+test_downstream.py module in this repo.
+### Code Formatting
+fsspec uses [Black](https://black.readthedocs.io/en/stable) to ensure
+a consistent code format throughout the project.
+Run ``black fsspec`` from the root of the filesystem_spec repository to
+auto-format your code. Additionally, many editors have plugins that will apply
+``black`` as you edit files. ``black`` is included in the ``tox`` environments.
+Optionally, you may wish to setup [pre-commit hooks](https://pre-commit.com) to
+automatically run ``black`` when you make a git commit.
+Run ``pre-commit install --install-hooks`` from the root of the
+filesystem_spec repository to setup pre-commit hooks. ``black`` will now be run
+before you commit, reformatting any changed files. You can format without
+committing via ``pre-commit run`` or skip these checks with ``git commit
+--no-verify``.

.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,56 @@

+fsspec-2024.2.0.dist-info/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
+fsspec-2024.2.0.dist-info/METADATA,sha256=uwzW1Braxnd_QGVI8W6J0KHi5KTiTJEm8YzSUdG-_Dc,6786
+fsspec-2024.2.0.dist-info/RECORD,,
+fsspec-2024.2.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+fsspec-2024.2.0.dist-info/top_level.txt,sha256=blt2pDrQDwN3Gklcw13CSPLQRd6aaOgJ8AxqrW395MI,7
+fsspec-2024.2.0.dist-info\INSTALLER,sha256=5hhM4Q4mYTT9z6QB6PGpUAW81PGNFrYrdXMj4oM_6ak,2
+fsspec-2024.2.0.dist-info\REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+fsspec/__init__.py,sha256=2kT62GfFK-AjgS-LgwSsCo_VA2IePvsyv8Ash5oiaFA,1982
+fsspec/_version.py,sha256=onTKKWe4fXkBjQxbTwM82SUT0H3x4U17IYrciFAryaU,500
+fsspec/archive.py,sha256=S__DzfZj-urAN3tp2W6jJ6YDiXG1fAl7FjvWUN73qIE,2386
+fsspec/asyn.py,sha256=kJ45sFFya2lZsmu2v8CVc8ZPRs8AccEzAy6Jot2ylkU,36157
+fsspec/caching.py,sha256=N45pzJdD4w5FOX_sxGvHWirggPNB66JTGP1HH6fpSck,28781
+fsspec/callbacks.py,sha256=BDIwLzK6rr_0V5ch557fSzsivCElpdqhXr5dZ9Te-EE,9210
+fsspec/compression.py,sha256=Yyd8FXw2rwWRtVoRVah_yguv-J7BUcBo4yDu6Qt52a0,4859
+fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
+fsspec/conftest.py,sha256=fVfx-NLrH_OZS1TIpYNoPzM7efEcMoL62reHOdYeFCA,1245
+fsspec/core.py,sha256=0yCj1Z5MhbSDIQiqFs49VORl9QaGwV6hp9bXdkIoPIo,22363
+fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
+fsspec/exceptions.py,sha256=xcS7LiRrQ748kvOB9mrUR14kpjNztrHgEkZWi9M-VaI,330
+fsspec/fuse.py,sha256=66amOa6wdIbS0DMhhfAPUoOB37HPorfXD1izV0prmTY,10145
+fsspec/generic.py,sha256=NuNaP66OaphwMbuLHRFBLda78TD81isa9O4ozJqbUv0,13455
+fsspec/gui.py,sha256=XKoXZpUhRE7jOhRCJH4-jRbKhVu56aS8h9tecvPD3nc,13932
+fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+fsspec/implementations/arrow.py,sha256=_7TLuV6ZzNlpmUU_v6ud56u2wadzsKmY5qugPBxgMEs,8649
+fsspec/implementations/cache_mapper.py,sha256=iHgBA6gjzDJ7_mBboHFzpLTf55HP3UEwUOZ43xyUK4M,2429
+fsspec/implementations/cache_metadata.py,sha256=ZvyA7Y3KK-5Ct4E5pELzD6mH_5T03XqaKVT96qYDADU,8576
+fsspec/implementations/cached.py,sha256=LbbPbeUup07O0y7gXD_atFgajWM9p1vlDKu_BOyLfbo,30943
+fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
+fsspec/implementations/data.py,sha256=Oti0dKzyeadnVIedo3s8CADoh9bNM-96_6viTEYr4lo,1245
+fsspec/implementations/dbfs.py,sha256=cix9OYUveuSOx5UO5uRUwNUkYqjzyY0fkKnca1kTgZ0,15014
+fsspec/implementations/dirfs.py,sha256=inDIRSDPhI1_ud1MMBFrpZQ11VIAMJ_dZQtbE4V08Ng,11384
+fsspec/implementations/ftp.py,sha256=rp6cTog8xqjDPlKdSLKcsyP7K593_ByMabxGbNSEpTo,11655
+fsspec/implementations/git.py,sha256=vKGI-Vd5q4H2RrvhebkPc9NwlfkZ980OUGhebeCw-M0,4034
+fsspec/implementations/github.py,sha256=0kIiKkeAaROuHgdWBHVQFrzJ2ZfoDgymCehL_kJXHYA,7565
+fsspec/implementations/http.py,sha256=PkhfgUV3-T7fG2Jf-NLX9doH52snV5Wmw91uVA9k74M,29454
+fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
+fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
+fsspec/implementations/local.py,sha256=nxiRKg9FAQHTQss9-ET8ZzDXPGhSOktgkxrg0ffMs2I,13454
+fsspec/implementations/memory.py,sha256=2iU--pOV2KCTrS-d5K8VKSygh9MPk2D7NZ_C8lMMEIw,9701
+fsspec/implementations/reference.py,sha256=0iGu8mscaQ3a5iTlRNByytQ3_-1Bj8__ARqVwyy4q2M,43871
+fsspec/implementations/sftp.py,sha256=fMY9XZcmpjszQ2tCqO_TPaJesaeD_Dv7ptYzgUPGoO0,5631
+fsspec/implementations/smb.py,sha256=k3RtzW97lJtYuw_QpP1rJRFnUBmSsw9twFjUCex0a5U,10591
+fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
+fsspec/implementations/webhdfs.py,sha256=wqVfno7z0TY1HepaIvKTUUcl_bi5NkV6qWsST8t_s7Y,16745
+fsspec/implementations/zip.py,sha256=JDX-3HOI15qUl6VTBsNPuDp5RVN6s2n3Bywd4mMu0T0,4347
+fsspec/mapping.py,sha256=WFEXRWxujQwfzzkRP5tpdIE0265okAtlP97qFZGvV1k,8165
+fsspec/parquet.py,sha256=qVxDhwc960SGOt5etcYAJxCr-7HQKP01687KpDR02Gw,19463
+fsspec/registry.py,sha256=-dl7sh2tsfhMA2uxz5KQDsPFehQTgMJIbVjNq6QLoKU,11145
+fsspec/spec.py,sha256=3t96RgizRN_slIuHXnuR0bXjVUfBS1TfuDrEua4oQvE,66277
+fsspec/tests/abstract/__init__.py,sha256=i1wcFixV6QhOwdoB24c8oXjzobISNqiKVz9kl2DvAY8,10028
+fsspec/tests/abstract/common.py,sha256=1GQwNo5AONzAnzZj0fWgn8NJPLXALehbsuGxS3FzWVU,4973
+fsspec/tests/abstract/copy.py,sha256=gU5-d97U3RSde35Vp4RxPY4rWwL744HiSrJ8IBOp9-8,19967
+fsspec/tests/abstract/get.py,sha256=vNR4HztvTR7Cj56AMo7_tx7TeYz1Jgr_2Wb8Lv-UiBY,20755
+fsspec/tests/abstract/put.py,sha256=7aih17OKB_IZZh1Mkq1eBDIjobhtMQmI8x-Pw-S_aZk,21201
+fsspec/transaction.py,sha256=jeexB-H6Aw_gN6Z7hoKKe6v8zizITq39-gyTgpipIKE,2251
+fsspec/utils.py,sha256=_VX_0VwDtoAFSjMYrxvJvnPNX9FMoHO5BlFHXJ0bHFI,23053

.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/REQUESTED ADDED Viewed

File without changes

.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,5 @@

+Wheel-Version: 1.0
+Generator: bdist_wheel (0.42.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ fsspec

.venv/Lib/site-packages/fsspec/implementations/__init__.py ADDED Viewed

File without changes

.venv/Lib/site-packages/fsspec/implementations/jupyter.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import base64
+import io
+import re
+import requests
+import fsspec
+class JupyterFileSystem(fsspec.AbstractFileSystem):
+    """View of the files as seen by a Jupyter server (notebook or lab)"""
+    protocol = ("jupyter", "jlab")
+    def __init__(self, url, tok=None, **kwargs):
+        """
+        Parameters
+        ----------
+        url : str
+            Base URL of the server, like "http://127.0.0.1:8888". May include
+            token in the string, which is given by the process when starting up
+        tok : str
+            If the token is obtained separately, can be given here
+        kwargs
+        """
+        if "?" in url:
+            if tok is None:
+                try:
+                    tok = re.findall("token=([a-z0-9]+)", url)[0]
+                except IndexError as e:
+                    raise ValueError("Could not determine token") from e
+            url = url.split("?", 1)[0]
+        self.url = url.rstrip("/") + "/api/contents"
+        self.session = requests.Session()
+        if tok:
+            self.session.headers["Authorization"] = f"token {tok}"
+        super().__init__(**kwargs)
+    def ls(self, path, detail=True, **kwargs):
+        path = self._strip_protocol(path)
+        r = self.session.get(f"{self.url}/{path}")
+        if r.status_code == 404:
+            return FileNotFoundError(path)
+        r.raise_for_status()
+        out = r.json()
+        if out["type"] == "directory":
+            out = out["content"]
+        else:
+            out = [out]
+        for o in out:
+            o["name"] = o.pop("path")
+            o.pop("content")
+            if o["type"] == "notebook":
+                o["type"] = "file"
+        if detail:
+            return out
+        return [o["name"] for o in out]
+    def cat_file(self, path, start=None, end=None, **kwargs):
+        path = self._strip_protocol(path)
+        r = self.session.get(f"{self.url}/{path}")
+        if r.status_code == 404:
+            return FileNotFoundError(path)
+        r.raise_for_status()
+        out = r.json()
+        if out["format"] == "text":
+            # data should be binary
+            b = out["content"].encode()
+        else:
+            b = base64.b64decode(out["content"])
+        return b[start:end]
+    def pipe_file(self, path, value, **_):
+        path = self._strip_protocol(path)
+        json = {
+            "name": path.rsplit("/", 1)[-1],
+            "path": path,
+            "size": len(value),
+            "content": base64.b64encode(value).decode(),
+            "format": "base64",
+            "type": "file",
+        }
+        self.session.put(f"{self.url}/{path}", json=json)
+    def mkdir(self, path, create_parents=True, **kwargs):
+        path = self._strip_protocol(path)
+        if create_parents and "/" in path:
+            self.mkdir(path.rsplit("/", 1)[0], True)
+        json = {
+            "name": path.rsplit("/", 1)[-1],
+            "path": path,
+            "size": None,
+            "content": None,
+            "type": "directory",
+        }
+        self.session.put(f"{self.url}/{path}", json=json)
+    def _rm(self, path):
+        path = self._strip_protocol(path)
+        self.session.delete(f"{self.url}/{path}")
+    def _open(self, path, mode="rb", **kwargs):
+        path = self._strip_protocol(path)
+        if mode == "rb":
+            data = self.cat_file(path)
+            return io.BytesIO(data)
+        else:
+            return SimpleFileWriter(self, path, mode="wb")
+class SimpleFileWriter(fsspec.spec.AbstractBufferedFile):
+    def _upload_chunk(self, final=False):
+        """Never uploads a chunk until file is done
+        Not suitable for large files
+        """
+        if final is False:
+            return False
+        self.buffer.seek(0)
+        data = self.buffer.read()
+        self.fs.pipe_file(self.path, data)

.venv/Lib/site-packages/fsspec/implementations/libarchive.py ADDED Viewed

	@@ -0,0 +1,213 @@

+from contextlib import contextmanager
+from ctypes import (
+    CFUNCTYPE,
+    POINTER,
+    c_int,
+    c_longlong,
+    c_void_p,
+    cast,
+    create_string_buffer,
+)
+import libarchive
+import libarchive.ffi as ffi
+from fsspec import open_files
+from fsspec.archive import AbstractArchiveFileSystem
+from fsspec.implementations.memory import MemoryFile
+from fsspec.utils import DEFAULT_BLOCK_SIZE
+# Libarchive requires seekable files or memory only for certain archive
+# types. However, since we read the directory first to cache the contents
+# and also allow random access to any file, the file-like object needs
+# to be seekable no matter what.
+# Seek call-backs (not provided in the libarchive python wrapper)
+SEEK_CALLBACK = CFUNCTYPE(c_longlong, c_int, c_void_p, c_longlong, c_int)
+read_set_seek_callback = ffi.ffi(
+    "read_set_seek_callback", [ffi.c_archive_p, SEEK_CALLBACK], c_int, ffi.check_int
+)
+new_api = hasattr(ffi, "NO_OPEN_CB")
+@contextmanager
+def custom_reader(file, format_name="all", filter_name="all", block_size=ffi.page_size):
+    """Read an archive from a seekable file-like object.
+    The `file` object must support the standard `readinto` and 'seek' methods.
+    """
+    buf = create_string_buffer(block_size)
+    buf_p = cast(buf, c_void_p)
+    def read_func(archive_p, context, ptrptr):
+        # readinto the buffer, returns number of bytes read
+        length = file.readinto(buf)
+        # write the address of the buffer into the pointer
+        ptrptr = cast(ptrptr, POINTER(c_void_p))
+        ptrptr[0] = buf_p
+        # tell libarchive how much data was written into the buffer
+        return length
+    def seek_func(archive_p, context, offset, whence):
+        file.seek(offset, whence)
+        # tell libarchvie the current position
+        return file.tell()
+    read_cb = ffi.READ_CALLBACK(read_func)
+    seek_cb = SEEK_CALLBACK(seek_func)
+    if new_api:
+        open_cb = ffi.NO_OPEN_CB
+        close_cb = ffi.NO_CLOSE_CB
+    else:
+        open_cb = libarchive.read.OPEN_CALLBACK(ffi.VOID_CB)
+        close_cb = libarchive.read.CLOSE_CALLBACK(ffi.VOID_CB)
+    with libarchive.read.new_archive_read(format_name, filter_name) as archive_p:
+        read_set_seek_callback(archive_p, seek_cb)
+        ffi.read_open(archive_p, None, open_cb, read_cb, close_cb)
+        yield libarchive.read.ArchiveRead(archive_p)
+class LibArchiveFileSystem(AbstractArchiveFileSystem):
+    """Compressed archives as a file-system (read-only)
+    Supports the following formats:
+    tar, pax , cpio, ISO9660, zip, mtree, shar, ar, raw, xar, lha/lzh, rar
+    Microsoft CAB, 7-Zip, WARC
+    See the libarchive documentation for further restrictions.
+    https://www.libarchive.org/
+    Keeps file object open while instance lives. It only works in seekable
+    file-like objects. In case the filesystem does not support this kind of
+    file object, it is recommended to cache locally.
+    This class is pickleable, but not necessarily thread-safe (depends on the
+    platform). See libarchive documentation for details.
+    """
+    root_marker = ""
+    protocol = "libarchive"
+    cachable = False
+    def __init__(
+        self,
+        fo="",
+        mode="r",
+        target_protocol=None,
+        target_options=None,
+        block_size=DEFAULT_BLOCK_SIZE,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        fo: str or file-like
+            Contains ZIP, and must exist. If a str, will fetch file using
+            :meth:`~fsspec.open_files`, which must return one file exactly.
+        mode: str
+            Currently, only 'r' accepted
+        target_protocol: str (optional)
+            If ``fo`` is a string, this value can be used to override the
+            FS protocol inferred from a URL
+        target_options: dict (optional)
+            Kwargs passed when instantiating the target FS, if ``fo`` is
+            a string.
+        """
+        super().__init__(self, **kwargs)
+        if mode != "r":
+            raise ValueError("Only read from archive files accepted")
+        if isinstance(fo, str):
+            files = open_files(fo, protocol=target_protocol, **(target_options or {}))
+            if len(files) != 1:
+                raise ValueError(
+                    f'Path "{fo}" did not resolve to exactly one file: "{files}"'
+                )
+            fo = files[0]
+        self.of = fo
+        self.fo = fo.__enter__()  # the whole instance is a context
+        self.block_size = block_size
+        self.dir_cache = None
+    @contextmanager
+    def _open_archive(self):
+        self.fo.seek(0)
+        with custom_reader(self.fo, block_size=self.block_size) as arc:
+            yield arc
+    @classmethod
+    def _strip_protocol(cls, path):
+        # file paths are always relative to the archive root
+        return super()._strip_protocol(path).lstrip("/")
+    def _get_dirs(self):
+        fields = {
+            "name": "pathname",
+            "size": "size",
+            "created": "ctime",
+            "mode": "mode",
+            "uid": "uid",
+            "gid": "gid",
+            "mtime": "mtime",
+        }
+        if self.dir_cache is not None:
+            return
+        self.dir_cache = {}
+        list_names = []
+        with self._open_archive() as arc:
+            for entry in arc:
+                if not entry.isdir and not entry.isfile:
+                    # Skip symbolic links, fifo entries, etc.
+                    continue
+                self.dir_cache.update(
+                    {
+                        dirname: {"name": dirname, "size": 0, "type": "directory"}
+                        for dirname in self._all_dirnames(set(entry.name))
+                    }
+                )
+                f = {key: getattr(entry, fields[key]) for key in fields}
+                f["type"] = "directory" if entry.isdir else "file"
+                list_names.append(entry.name)
+                self.dir_cache[f["name"]] = f
+        # libarchive does not seem to return an entry for the directories (at least
+        # not in all formats), so get the directories names from the files names
+        self.dir_cache.update(
+            {
+                dirname: {"name": dirname, "size": 0, "type": "directory"}
+                for dirname in self._all_dirnames(list_names)
+            }
+        )
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        autocommit=True,
+        cache_options=None,
+        **kwargs,
+    ):
+        path = self._strip_protocol(path)
+        if mode != "rb":
+            raise NotImplementedError
+        data = bytes()
+        with self._open_archive() as arc:
+            for entry in arc:
+                if entry.pathname != path:
+                    continue
+                if entry.size == 0:
+                    # empty file, so there are no blocks
+                    break
+                for block in entry.get_blocks(entry.size):
+                    data = block
+                    break
+                else:
+                    raise ValueError
+        return MemoryFile(fs=self, path=path, data=data)

.venv/Lib/site-packages/fsspec/implementations/local.py ADDED Viewed

	@@ -0,0 +1,418 @@

+import datetime
+import io
+import logging
+import os
+import os.path as osp
+import re
+import shutil
+import stat
+import tempfile
+from fsspec import AbstractFileSystem
+from fsspec.compression import compr
+from fsspec.core import get_compression
+from fsspec.utils import isfilelike, stringify_path
+logger = logging.getLogger("fsspec.local")
+class LocalFileSystem(AbstractFileSystem):
+    """Interface to files on local storage
+    Parameters
+    ----------
+    auto_mkdir: bool
+        Whether, when opening a file, the directory containing it should
+        be created (if it doesn't already exist). This is assumed by pyarrow
+        code.
+    """
+    root_marker = "/"
+    protocol = "file", "local"
+    local_file = True
+    def __init__(self, auto_mkdir=False, **kwargs):
+        super().__init__(**kwargs)
+        self.auto_mkdir = auto_mkdir
+    @property
+    def fsid(self):
+        return "local"
+    def mkdir(self, path, create_parents=True, **kwargs):
+        path = self._strip_protocol(path)
+        if self.exists(path):
+            raise FileExistsError(path)
+        if create_parents:
+            self.makedirs(path, exist_ok=True)
+        else:
+            os.mkdir(path, **kwargs)
+    def makedirs(self, path, exist_ok=False):
+        path = self._strip_protocol(path)
+        os.makedirs(path, exist_ok=exist_ok)
+    def rmdir(self, path):
+        path = self._strip_protocol(path)
+        os.rmdir(path)
+    def ls(self, path, detail=False, **kwargs):
+        path = self._strip_protocol(path)
+        info = self.info(path)
+        if info["type"] == "directory":
+            with os.scandir(path) as it:
+                infos = [self.info(f) for f in it]
+        else:
+            infos = [info]
+        if not detail:
+            return [i["name"] for i in infos]
+        return infos
+    def info(self, path, **kwargs):
+        if isinstance(path, os.DirEntry):
+            # scandir DirEntry
+            out = path.stat(follow_symlinks=False)
+            link = path.is_symlink()
+            if path.is_dir(follow_symlinks=False):
+                t = "directory"
+            elif path.is_file(follow_symlinks=False):
+                t = "file"
+            else:
+                t = "other"
+            path = self._strip_protocol(path.path)
+        else:
+            # str or path-like
+            path = self._strip_protocol(path)
+            out = os.stat(path, follow_symlinks=False)
+            link = stat.S_ISLNK(out.st_mode)
+            if link:
+                out = os.stat(path, follow_symlinks=True)
+            if stat.S_ISDIR(out.st_mode):
+                t = "directory"
+            elif stat.S_ISREG(out.st_mode):
+                t = "file"
+            else:
+                t = "other"
+        result = {
+            "name": path,
+            "size": out.st_size,
+            "type": t,
+            "created": out.st_ctime,
+            "islink": link,
+        }
+        for field in ["mode", "uid", "gid", "mtime", "ino", "nlink"]:
+            result[field] = getattr(out, f"st_{field}")
+        if result["islink"]:
+            result["destination"] = os.readlink(path)
+            try:
+                out2 = os.stat(path, follow_symlinks=True)
+                result["size"] = out2.st_size
+            except OSError:
+                result["size"] = 0
+        return result
+    def lexists(self, path, **kwargs):
+        return osp.lexists(path)
+    def cp_file(self, path1, path2, **kwargs):
+        path1 = self._strip_protocol(path1).rstrip("/")
+        path2 = self._strip_protocol(path2).rstrip("/")
+        if self.auto_mkdir:
+            self.makedirs(self._parent(path2), exist_ok=True)
+        if self.isfile(path1):
+            shutil.copyfile(path1, path2)
+        elif self.isdir(path1):
+            self.mkdirs(path2, exist_ok=True)
+        else:
+            raise FileNotFoundError(path1)
+    def get_file(self, path1, path2, callback=None, **kwargs):
+        if isfilelike(path2):
+            with open(path1, "rb") as f:
+                shutil.copyfileobj(f, path2)
+        else:
+            return self.cp_file(path1, path2, **kwargs)
+    def put_file(self, path1, path2, callback=None, **kwargs):
+        return self.cp_file(path1, path2, **kwargs)
+    def mv_file(self, path1, path2, **kwargs):
+        path1 = self._strip_protocol(path1).rstrip("/")
+        path2 = self._strip_protocol(path2).rstrip("/")
+        shutil.move(path1, path2)
+    def link(self, src, dst, **kwargs):
+        src = self._strip_protocol(src)
+        dst = self._strip_protocol(dst)
+        os.link(src, dst, **kwargs)
+    def symlink(self, src, dst, **kwargs):
+        src = self._strip_protocol(src)
+        dst = self._strip_protocol(dst)
+        os.symlink(src, dst, **kwargs)
+    def islink(self, path) -> bool:
+        return os.path.islink(self._strip_protocol(path))
+    def rm_file(self, path):
+        os.remove(self._strip_protocol(path))
+    def rm(self, path, recursive=False, maxdepth=None):
+        if not isinstance(path, list):
+            path = [path]
+        for p in path:
+            p = self._strip_protocol(p).rstrip("/")
+            if self.isdir(p):
+                if not recursive:
+                    raise ValueError("Cannot delete directory, set recursive=True")
+                if osp.abspath(p) == os.getcwd():
+                    raise ValueError("Cannot delete current working directory")
+                shutil.rmtree(p)
+            else:
+                os.remove(p)
+    def unstrip_protocol(self, name):
+        name = self._strip_protocol(name)  # normalise for local/win/...
+        return f"file://{name}"
+    def _open(self, path, mode="rb", block_size=None, **kwargs):
+        path = self._strip_protocol(path)
+        if self.auto_mkdir and "w" in mode:
+            self.makedirs(self._parent(path), exist_ok=True)
+        return LocalFileOpener(path, mode, fs=self, **kwargs)
+    def touch(self, path, truncate=True, **kwargs):
+        path = self._strip_protocol(path)
+        if self.auto_mkdir:
+            self.makedirs(self._parent(path), exist_ok=True)
+        if self.exists(path):
+            os.utime(path, None)
+        else:
+            open(path, "a").close()
+        if truncate:
+            os.truncate(path, 0)
+    def created(self, path):
+        info = self.info(path=path)
+        return datetime.datetime.fromtimestamp(
+            info["created"], tz=datetime.timezone.utc
+        )
+    def modified(self, path):
+        info = self.info(path=path)
+        return datetime.datetime.fromtimestamp(info["mtime"], tz=datetime.timezone.utc)
+    @classmethod
+    def _parent(cls, path):
+        path = cls._strip_protocol(path).rstrip("/")
+        if "/" in path:
+            return path.rsplit("/", 1)[0]
+        else:
+            return cls.root_marker
+    @classmethod
+    def _strip_protocol(cls, path):
+        path = stringify_path(path)
+        if path.startswith("file://"):
+            path = path[7:]
+        elif path.startswith("file:"):
+            path = path[5:]
+        elif path.startswith("local://"):
+            path = path[8:]
+        elif path.startswith("local:"):
+            path = path[6:]
+        return make_path_posix(path).rstrip("/") or cls.root_marker
+    def _isfilestore(self):
+        # Inheriting from DaskFileSystem makes this False (S3, etc. were)
+        # the original motivation. But we are a posix-like file system.
+        # See https://github.com/dask/dask/issues/5526
+        return True
+    def chmod(self, path, mode):
+        path = stringify_path(path)
+        return os.chmod(path, mode)
+def make_path_posix(path, sep=os.sep):
+    """Make path generic"""
+    if isinstance(path, (list, set, tuple)):
+        return type(path)(make_path_posix(p) for p in path)
+    if "~" in path:
+        path = osp.expanduser(path)
+    if sep == "/":
+        # most common fast case for posix
+        if path.startswith("/"):
+            return path
+        if path.startswith("./"):
+            path = path[2:]
+        return f"{os.getcwd()}/{path}"
+    if (
+        (sep not in path and "/" not in path)
+        or (sep == "/" and not path.startswith("/"))
+        or (sep == "\\" and ":" not in path and not path.startswith("\\\\"))
+    ):
+        # relative path like "path" or "rel\\path" (win) or rel/path"
+        if os.sep == "\\":
+            # abspath made some more '\\' separators
+            return make_path_posix(osp.abspath(path))
+        else:
+            return f"{os.getcwd()}/{path}"
+    if path.startswith("file://"):
+        path = path[7:]
+    if re.match("/[A-Za-z]:", path):
+        # for windows file URI like "file:///C:/folder/file"
+        # or "file:///C:\\dir\\file"
+        path = path[1:].replace("\\", "/").replace("//", "/")
+    if path.startswith("\\\\"):
+        # special case for windows UNC/DFS-style paths, do nothing,
+        # just flip the slashes around (case below does not work!)
+        return path.replace("\\", "/")
+    if re.match("[A-Za-z]:", path):
+        # windows full path like "C:\\local\\path"
+        return path.lstrip("\\").replace("\\", "/").replace("//", "/")
+    if path.startswith("\\"):
+        # windows network path like "\\server\\path"
+        return "/" + path.lstrip("\\").replace("\\", "/").replace("//", "/")
+    return path
+def trailing_sep(path):
+    """Return True if the path ends with a path separator.
+    A forward slash is always considered a path separator, even on Operating
+    Systems that normally use a backslash.
+    """
+    # TODO: if all incoming paths were posix-compliant then separator would
+    # always be a forward slash, simplifying this function.
+    # See https://github.com/fsspec/filesystem_spec/pull/1250
+    return path.endswith(os.sep) or (os.altsep is not None and path.endswith(os.altsep))
+class LocalFileOpener(io.IOBase):
+    def __init__(
+        self, path, mode, autocommit=True, fs=None, compression=None, **kwargs
+    ):
+        logger.debug("open file: %s", path)
+        self.path = path
+        self.mode = mode
+        self.fs = fs
+        self.f = None
+        self.autocommit = autocommit
+        self.compression = get_compression(path, compression)
+        self.blocksize = io.DEFAULT_BUFFER_SIZE
+        self._open()
+    def _open(self):
+        if self.f is None or self.f.closed:
+            if self.autocommit or "w" not in self.mode:
+                self.f = open(self.path, mode=self.mode)
+                if self.compression:
+                    compress = compr[self.compression]
+                    self.f = compress(self.f, mode=self.mode)
+            else:
+                # TODO: check if path is writable?
+                i, name = tempfile.mkstemp()
+                os.close(i)  # we want normal open and normal buffered file
+                self.temp = name
+                self.f = open(name, mode=self.mode)
+            if "w" not in self.mode:
+                self.size = self.f.seek(0, 2)
+                self.f.seek(0)
+                self.f.size = self.size
+    def _fetch_range(self, start, end):
+        # probably only used by cached FS
+        if "r" not in self.mode:
+            raise ValueError
+        self._open()
+        self.f.seek(start)
+        return self.f.read(end - start)
+    def __setstate__(self, state):
+        self.f = None
+        loc = state.pop("loc", None)
+        self.__dict__.update(state)
+        if "r" in state["mode"]:
+            self.f = None
+            self._open()
+            self.f.seek(loc)
+    def __getstate__(self):
+        d = self.__dict__.copy()
+        d.pop("f")
+        if "r" in self.mode:
+            d["loc"] = self.f.tell()
+        else:
+            if not self.f.closed:
+                raise ValueError("Cannot serialise open write-mode local file")
+        return d
+    def commit(self):
+        if self.autocommit:
+            raise RuntimeError("Can only commit if not already set to autocommit")
+        shutil.move(self.temp, self.path)
+    def discard(self):
+        if self.autocommit:
+            raise RuntimeError("Cannot discard if set to autocommit")
+        os.remove(self.temp)
+    def readable(self) -> bool:
+        return True
+    def writable(self) -> bool:
+        return "r" not in self.mode
+    def read(self, *args, **kwargs):
+        return self.f.read(*args, **kwargs)
+    def write(self, *args, **kwargs):
+        return self.f.write(*args, **kwargs)
+    def tell(self, *args, **kwargs):
+        return self.f.tell(*args, **kwargs)
+    def seek(self, *args, **kwargs):
+        return self.f.seek(*args, **kwargs)
+    def seekable(self, *args, **kwargs):
+        return self.f.seekable(*args, **kwargs)
+    def readline(self, *args, **kwargs):
+        return self.f.readline(*args, **kwargs)
+    def readlines(self, *args, **kwargs):
+        return self.f.readlines(*args, **kwargs)
+    def close(self):
+        return self.f.close()
+    def truncate(self, size=None) -> int:
+        return self.f.truncate(size)
+    @property
+    def closed(self):
+        return self.f.closed
+    def fileno(self):
+        return self.raw.fileno()
+    def flush(self) -> None:
+        self.f.flush()
+    def __iter__(self):
+        return self.f.__iter__()
+    def __getattr__(self, item):
+        return getattr(self.f, item)
+    def __enter__(self):
+        self._incontext = True
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        self._incontext = False
+        self.f.__exit__(exc_type, exc_value, traceback)

.venv/Lib/site-packages/fsspec/implementations/memory.py ADDED Viewed

	@@ -0,0 +1,292 @@

+from __future__ import annotations
+import logging
+from datetime import datetime, timezone
+from errno import ENOTEMPTY
+from io import BytesIO
+from typing import Any, ClassVar
+from fsspec import AbstractFileSystem
+logger = logging.getLogger("fsspec.memoryfs")
+class MemoryFileSystem(AbstractFileSystem):
+    """A filesystem based on a dict of BytesIO objects
+    This is a global filesystem so instances of this class all point to the same
+    in memory filesystem.
+    """
+    store: ClassVar[dict[str, Any]] = {}  # global, do not overwrite!
+    pseudo_dirs = [""]  # global, do not overwrite!
+    protocol = "memory"
+    root_marker = "/"
+    @classmethod
+    def _strip_protocol(cls, path):
+        if path.startswith("memory://"):
+            path = path[len("memory://") :]
+        if "::" in path or "://" in path:
+            return path.rstrip("/")
+        path = path.lstrip("/").rstrip("/")
+        return "/" + path if path else ""
+    def ls(self, path, detail=True, **kwargs):
+        path = self._strip_protocol(path)
+        if path in self.store:
+            # there is a key with this exact name
+            if not detail:
+                return [path]
+            return [
+                {
+                    "name": path,
+                    "size": self.store[path].size,
+                    "type": "file",
+                    "created": self.store[path].created.timestamp(),
+                }
+            ]
+        paths = set()
+        starter = path + "/"
+        out = []
+        for p2 in tuple(self.store):
+            if p2.startswith(starter):
+                if "/" not in p2[len(starter) :]:
+                    # exact child
+                    out.append(
+                        {
+                            "name": p2,
+                            "size": self.store[p2].size,
+                            "type": "file",
+                            "created": self.store[p2].created.timestamp(),
+                        }
+                    )
+                elif len(p2) > len(starter):
+                    # implied child directory
+                    ppath = starter + p2[len(starter) :].split("/", 1)[0]
+                    if ppath not in paths:
+                        out = out or []
+                        out.append(
+                            {
+                                "name": ppath,
+                                "size": 0,
+                                "type": "directory",
+                            }
+                        )
+                        paths.add(ppath)
+        for p2 in self.pseudo_dirs:
+            if p2.startswith(starter):
+                if "/" not in p2[len(starter) :]:
+                    # exact child pdir
+                    if p2 not in paths:
+                        out.append({"name": p2, "size": 0, "type": "directory"})
+                        paths.add(p2)
+                else:
+                    # directory implied by deeper pdir
+                    ppath = starter + p2[len(starter) :].split("/", 1)[0]
+                    if ppath not in paths:
+                        out.append({"name": ppath, "size": 0, "type": "directory"})
+                        paths.add(ppath)
+        if not out:
+            if path in self.pseudo_dirs:
+                # empty dir
+                return []
+            raise FileNotFoundError(path)
+        if detail:
+            return out
+        return sorted([f["name"] for f in out])
+    def mkdir(self, path, create_parents=True, **kwargs):
+        path = self._strip_protocol(path)
+        if path in self.store or path in self.pseudo_dirs:
+            raise FileExistsError(path)
+        if self._parent(path).strip("/") and self.isfile(self._parent(path)):
+            raise NotADirectoryError(self._parent(path))
+        if create_parents and self._parent(path).strip("/"):
+            try:
+                self.mkdir(self._parent(path), create_parents, **kwargs)
+            except FileExistsError:
+                pass
+        if path and path not in self.pseudo_dirs:
+            self.pseudo_dirs.append(path)
+    def makedirs(self, path, exist_ok=False):
+        try:
+            self.mkdir(path, create_parents=True)
+        except FileExistsError:
+            if not exist_ok:
+                raise
+    def pipe_file(self, path, value, **kwargs):
+        """Set the bytes of given file
+        Avoids copies of the data if possible
+        """
+        self.open(path, "wb", data=value)
+    def rmdir(self, path):
+        path = self._strip_protocol(path)
+        if path == "":
+            # silently avoid deleting FS root
+            return
+        if path in self.pseudo_dirs:
+            if not self.ls(path):
+                self.pseudo_dirs.remove(path)
+            else:
+                raise OSError(ENOTEMPTY, "Directory not empty", path)
+        else:
+            raise FileNotFoundError(path)
+    def info(self, path, **kwargs):
+        path = self._strip_protocol(path)
+        if path in self.pseudo_dirs or any(
+            p.startswith(path + "/") for p in list(self.store) + self.pseudo_dirs
+        ):
+            return {
+                "name": path,
+                "size": 0,
+                "type": "directory",
+            }
+        elif path in self.store:
+            filelike = self.store[path]
+            return {
+                "name": path,
+                "size": filelike.size,
+                "type": "file",
+                "created": getattr(filelike, "created", None),
+            }
+        else:
+            raise FileNotFoundError(path)
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        autocommit=True,
+        cache_options=None,
+        **kwargs,
+    ):
+        path = self._strip_protocol(path)
+        if path in self.pseudo_dirs:
+            raise IsADirectoryError(path)
+        parent = path
+        while len(parent) > 1:
+            parent = self._parent(parent)
+            if self.isfile(parent):
+                raise FileExistsError(parent)
+        if mode in ["rb", "ab", "r+b"]:
+            if path in self.store:
+                f = self.store[path]
+                if mode == "ab":
+                    # position at the end of file
+                    f.seek(0, 2)
+                else:
+                    # position at the beginning of file
+                    f.seek(0)
+                return f
+            else:
+                raise FileNotFoundError(path)
+        elif mode == "wb":
+            m = MemoryFile(self, path, kwargs.get("data"))
+            if not self._intrans:
+                m.commit()
+            return m
+        else:
+            name = self.__class__.__name__
+            raise ValueError(f"unsupported file mode for {name}: {mode!r}")
+    def cp_file(self, path1, path2, **kwargs):
+        path1 = self._strip_protocol(path1)
+        path2 = self._strip_protocol(path2)
+        if self.isfile(path1):
+            self.store[path2] = MemoryFile(
+                self, path2, self.store[path1].getvalue()
+            )  # implicit copy
+        elif self.isdir(path1):
+            if path2 not in self.pseudo_dirs:
+                self.pseudo_dirs.append(path2)
+        else:
+            raise FileNotFoundError(path1)
+    def cat_file(self, path, start=None, end=None, **kwargs):
+        path = self._strip_protocol(path)
+        try:
+            return bytes(self.store[path].getbuffer()[start:end])
+        except KeyError:
+            raise FileNotFoundError(path)
+    def _rm(self, path):
+        path = self._strip_protocol(path)
+        try:
+            del self.store[path]
+        except KeyError as e:
+            raise FileNotFoundError(path) from e
+    def modified(self, path):
+        path = self._strip_protocol(path)
+        try:
+            return self.store[path].modified
+        except KeyError:
+            raise FileNotFoundError(path)
+    def created(self, path):
+        path = self._strip_protocol(path)
+        try:
+            return self.store[path].created
+        except KeyError:
+            raise FileNotFoundError(path)
+    def rm(self, path, recursive=False, maxdepth=None):
+        if isinstance(path, str):
+            path = self._strip_protocol(path)
+        else:
+            path = [self._strip_protocol(p) for p in path]
+        paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
+        for p in reversed(paths):
+            # If the expanded path doesn't exist, it is only because the expanded
+            # path was a directory that does not exist in self.pseudo_dirs. This
+            # is possible if you directly create files without making the
+            # directories first.
+            if not self.exists(p):
+                continue
+            if self.isfile(p):
+                self.rm_file(p)
+            else:
+                self.rmdir(p)
+class MemoryFile(BytesIO):
+    """A BytesIO which can't close and works as a context manager
+    Can initialise with data. Each path should only be active once at any moment.
+    No need to provide fs, path if auto-committing (default)
+    """
+    def __init__(self, fs=None, path=None, data=None):
+        logger.debug("open file %s", path)
+        self.fs = fs
+        self.path = path
+        self.created = datetime.now(tz=timezone.utc)
+        self.modified = datetime.now(tz=timezone.utc)
+        if data:
+            super().__init__(data)
+            self.seek(0)
+    @property
+    def size(self):
+        return self.getbuffer().nbytes
+    def __enter__(self):
+        return self
+    def close(self):
+        pass
+    def discard(self):
+        pass
+    def commit(self):
+        self.fs.store[self.path] = self
+        self.modified = datetime.now(tz=timezone.utc)

.venv/Lib/site-packages/fsspec/implementations/reference.py ADDED Viewed

	@@ -0,0 +1,1160 @@

+import base64
+import collections
+import io
+import itertools
+import logging
+import math
+import os
+from functools import lru_cache
+from typing import TYPE_CHECKING
+import fsspec.core
+try:
+    import ujson as json
+except ImportError:
+    if not TYPE_CHECKING:
+        import json
+from ..asyn import AsyncFileSystem
+from ..callbacks import DEFAULT_CALLBACK
+from ..core import filesystem, open, split_protocol
+from ..utils import isfilelike, merge_offset_ranges, other_paths
+logger = logging.getLogger("fsspec.reference")
+class ReferenceNotReachable(RuntimeError):
+    def __init__(self, reference, target, *args):
+        super().__init__(*args)
+        self.reference = reference
+        self.target = target
+    def __str__(self):
+        return f'Reference "{self.reference}" failed to fetch target {self.target}'
+def _first(d):
+    return list(d.values())[0]
+def _prot_in_references(path, references):
+    ref = references.get(path)
+    if isinstance(ref, (list, tuple)):
+        return split_protocol(ref[0])[0] if ref[0] else ref[0]
+def _protocol_groups(paths, references):
+    if isinstance(paths, str):
+        return {_prot_in_references(paths, references): [paths]}
+    out = {}
+    for path in paths:
+        protocol = _prot_in_references(path, references)
+        out.setdefault(protocol, []).append(path)
+    return out
+class RefsValuesView(collections.abc.ValuesView):
+    def __iter__(self):
+        for val in self._mapping.zmetadata.values():
+            yield json.dumps(val).encode()
+        yield from self._mapping._items.values()
+        for field in self._mapping.listdir():
+            chunk_sizes = self._mapping._get_chunk_sizes(field)
+            if len(chunk_sizes) == 0:
+                yield self._mapping[field + "/0"]
+                continue
+            yield from self._mapping._generate_all_records(field)
+class RefsItemsView(collections.abc.ItemsView):
+    def __iter__(self):
+        return zip(self._mapping.keys(), self._mapping.values())
+def ravel_multi_index(idx, sizes):
+    val = 0
+    mult = 1
+    for i, s in zip(idx[::-1], sizes[::-1]):
+        val += i * mult
+        mult *= s
+    return val
+class LazyReferenceMapper(collections.abc.MutableMapping):
+    """This interface can be used to read/write references from Parquet stores.
+    It is not intended for other types of references.
+    It can be used with Kerchunk's MultiZarrToZarr method to combine
+    references into a parquet store.
+    Examples of this use-case can be found here:
+    https://fsspec.github.io/kerchunk/advanced.html?highlight=parquet#parquet-storage"""
+    # import is class level to prevent numpy dep requirement for fsspec
+    @property
+    def np(self):
+        import numpy as np
+        return np
+    @property
+    def pd(self):
+        import pandas as pd
+        return pd
+    def __init__(
+        self, root, fs=None, out_root=None, cache_size=128, categorical_threshold=10
+    ):
+        """
+        This instance will be writable, storing changes in memory until full partitions
+        are accumulated or .flush() is called.
+        To create an empty lazy store, use .create()
+        Parameters
+        ----------
+        root : str
+            Root of parquet store
+        fs : fsspec.AbstractFileSystem
+            fsspec filesystem object, default is local filesystem.
+        cache_size : int, default=128
+            Maximum size of LRU cache, where cache_size*record_size denotes
+            the total number of references that can be loaded in memory at once.
+        categorical_threshold : int
+            Encode urls as pandas.Categorical to reduce memory footprint if the ratio
+            of the number of unique urls to total number of refs for each variable
+            is greater than or equal to this number. (default 10)
+        """
+        self.root = root
+        self.chunk_sizes = {}
+        self.out_root = out_root or self.root
+        self.cat_thresh = categorical_threshold
+        self.cache_size = cache_size
+        self.dirs = None
+        self.url = self.root + "/{field}/refs.{record}.parq"
+        # TODO: derive fs from `root`
+        self.fs = fsspec.filesystem("file") if fs is None else fs
+    def __getattr__(self, item):
+        if item in ("_items", "record_size", "zmetadata"):
+            self.setup()
+            # avoid possible recursion if setup fails somehow
+            return self.__dict__[item]
+        raise AttributeError(item)
+    def setup(self):
+        self._items = {}
+        self._items[".zmetadata"] = self.fs.cat_file(
+            "/".join([self.root, ".zmetadata"])
+        )
+        met = json.loads(self._items[".zmetadata"])
+        self.record_size = met["record_size"]
+        self.zmetadata = met["metadata"]
+        # Define function to open and decompress refs
+        @lru_cache(maxsize=self.cache_size)
+        def open_refs(field, record):
+            """cached parquet file loader"""
+            path = self.url.format(field=field, record=record)
+            data = io.BytesIO(self.fs.cat_file(path))
+            df = self.pd.read_parquet(data, engine="fastparquet")
+            refs = {c: df[c].values for c in df.columns}
+            return refs
+        self.open_refs = open_refs
+    @staticmethod
+    def create(root, storage_options=None, fs=None, record_size=10000, **kwargs):
+        """Make empty parquet reference set
+        First deletes the contents of the given directory, if it exists.
+        Parameters
+        ----------
+        root: str
+            Directory to contain the output; will be created
+        storage_options: dict | None
+            For making the filesystem to use for writing is fs is None
+        fs: FileSystem | None
+            Filesystem for writing
+        record_size: int
+            Number of references per parquet file
+        kwargs: passed to __init__
+        Returns
+        -------
+        LazyReferenceMapper instance
+        """
+        met = {"metadata": {}, "record_size": record_size}
+        if fs is None:
+            fs, root = fsspec.core.url_to_fs(root, **(storage_options or {}))
+        if fs.exists(root):
+            fs.rm(root, recursive=True)
+        fs.makedirs(root, exist_ok=True)
+        fs.pipe("/".join([root, ".zmetadata"]), json.dumps(met).encode())
+        return LazyReferenceMapper(root, fs, **kwargs)
+    def listdir(self, basename=True):
+        """List top-level directories"""
+        # cache me?
+        if self.dirs is None:
+            dirs = [p.split("/", 1)[0] for p in self.zmetadata]
+            self.dirs = {p for p in dirs if p and not p.startswith(".")}
+        listing = self.dirs
+        if basename:
+            listing = [os.path.basename(path) for path in listing]
+        return listing
+    def ls(self, path="", detail=True):
+        """Shortcut file listings"""
+        if not path:
+            dirnames = self.listdir()
+            others = set(
+                [".zmetadata"]
+                + [name for name in self.zmetadata if "/" not in name]
+                + [name for name in self._items if "/" not in name]
+            )
+            if detail is False:
+                others.update(dirnames)
+                return sorted(others)
+            dirinfo = [
+                {"name": name, "type": "directory", "size": 0} for name in dirnames
+            ]
+            fileinfo = [
+                {
+                    "name": name,
+                    "type": "file",
+                    "size": len(
+                        json.dumps(self.zmetadata[name])
+                        if name in self.zmetadata
+                        else self._items[name]
+                    ),
+                }
+                for name in others
+            ]
+            return sorted(dirinfo + fileinfo, key=lambda s: s["name"])
+        parts = path.split("/", 1)
+        if len(parts) > 1:
+            raise FileNotFoundError("Cannot list within directories right now")
+        field = parts[0]
+        others = set(
+            [name for name in self.zmetadata if name.startswith(f"{path}/")]
+            + [name for name in self._items if name.startswith(f"{path}/")]
+        )
+        fileinfo = [
+            {
+                "name": name,
+                "type": "file",
+                "size": len(
+                    json.dumps(self.zmetadata[name])
+                    if name in self.zmetadata
+                    else self._items[name]
+                ),
+            }
+            for name in others
+        ]
+        keys = self._keys_in_field(field)
+        if detail is False:
+            return list(others) + list(keys)
+        recs = self._generate_all_records(field)
+        recinfo = [
+            {"name": name, "type": "file", "size": rec[-1]}
+            for name, rec in zip(keys, recs)
+            if rec[0]  # filters out path==None, deleted/missing
+        ]
+        return fileinfo + recinfo
+    def _load_one_key(self, key):
+        """Get the reference for one key
+        Returns bytes, one-element list or three-element list.
+        """
+        if key in self._items:
+            return self._items[key]
+        elif key in self.zmetadata:
+            return json.dumps(self.zmetadata[key]).encode()
+        elif "/" not in key or self._is_meta(key):
+            raise KeyError(key)
+        field, sub_key = key.split("/")
+        record, ri, chunk_size = self._key_to_record(key)
+        maybe = self._items.get((field, record), {}).get(ri, False)
+        if maybe is None:
+            # explicitly deleted
+            raise KeyError
+        elif maybe:
+            return maybe
+        elif chunk_size == 0:
+            return b""
+        # Chunk keys can be loaded from row group and cached in LRU cache
+        try:
+            refs = self.open_refs(field, record)
+        except (ValueError, TypeError, FileNotFoundError):
+            raise KeyError(key)
+        columns = ["path", "offset", "size", "raw"]
+        selection = [refs[c][ri] if c in refs else None for c in columns]
+        raw = selection[-1]
+        if raw is not None:
+            return raw
+        if selection[0] is None:
+            raise KeyError("This reference does not exist or has been deleted")
+        if selection[1:3] == [0, 0]:
+            # URL only
+            return selection[:1]
+        # URL, offset, size
+        return selection[:3]
+    @lru_cache(4096)
+    def _key_to_record(self, key):
+        """Details needed to construct a reference for one key"""
+        field, chunk = key.split("/")
+        chunk_sizes = self._get_chunk_sizes(field)
+        if len(chunk_sizes) == 0:
+            return 0, 0, 0
+        chunk_idx = [int(c) for c in chunk.split(".")]
+        chunk_number = ravel_multi_index(chunk_idx, chunk_sizes)
+        record = chunk_number // self.record_size
+        ri = chunk_number % self.record_size
+        return record, ri, len(chunk_sizes)
+    def _get_chunk_sizes(self, field):
+        """The number of chunks along each axis for a given field"""
+        if field not in self.chunk_sizes:
+            zarray = self.zmetadata[f"{field}/.zarray"]
+            size_ratio = [
+                math.ceil(s / c) for s, c in zip(zarray["shape"], zarray["chunks"])
+            ]
+            self.chunk_sizes[field] = size_ratio or [1]
+        return self.chunk_sizes[field]
+    def _generate_record(self, field, record):
+        """The references for a given parquet file of a given field"""
+        refs = self.open_refs(field, record)
+        it = iter(zip(*refs.values()))
+        if len(refs) == 3:
+            # All urls
+            return (list(t) for t in it)
+        elif len(refs) == 1:
+            # All raws
+            return refs["raw"]
+        else:
+            # Mix of urls and raws
+            return (list(t[:3]) if not t[3] else t[3] for t in it)
+    def _generate_all_records(self, field):
+        """Load all the references within a field by iterating over the parquet files"""
+        nrec = 1
+        for ch in self._get_chunk_sizes(field):
+            nrec *= ch
+        nrec = math.ceil(nrec / self.record_size)
+        for record in range(nrec):
+            yield from self._generate_record(field, record)
+    def values(self):
+        return RefsValuesView(self)
+    def items(self):
+        return RefsItemsView(self)
+    def __hash__(self):
+        return id(self)
+    def __getitem__(self, key):
+        return self._load_one_key(key)
+    def __setitem__(self, key, value):
+        if "/" in key and not self._is_meta(key):
+            field, chunk = key.split("/")
+            record, i, _ = self._key_to_record(key)
+            subdict = self._items.setdefault((field, record), {})
+            subdict[i] = value
+            if len(subdict) == self.record_size:
+                self.write(field, record)
+        else:
+            # metadata or top-level
+            self._items[key] = value
+            new_value = json.loads(
+                value.decode() if isinstance(value, bytes) else value
+            )
+            self.zmetadata[key] = {**self.zmetadata.get(key, {}), **new_value}
+    @staticmethod
+    def _is_meta(key):
+        return key.startswith(".z") or "/.z" in key
+    def __delitem__(self, key):
+        if key in self._items:
+            del self._items[key]
+        elif key in self.zmetadata:
+            del self.zmetadata[key]
+        else:
+            if "/" in key and not self._is_meta(key):
+                field, chunk = key.split("/")
+                record, i, _ = self._key_to_record(key)
+                subdict = self._items.setdefault((field, record), {})
+                subdict[i] = None
+                if len(subdict) == self.record_size:
+                    self.write(field, record)
+            else:
+                # metadata or top-level
+                self._items[key] = None
+    def write(self, field, record, base_url=None, storage_options=None):
+        # extra requirements if writing
+        import kerchunk.df
+        import numpy as np
+        import pandas as pd
+        partition = self._items[(field, record)]
+        original = False
+        if len(partition) < self.record_size:
+            try:
+                original = self.open_refs(field, record)
+            except IOError:
+                pass
+        if original:
+            paths = original["path"]
+            offsets = original["offset"]
+            sizes = original["size"]
+            raws = original["raw"]
+        else:
+            paths = np.full(self.record_size, np.nan, dtype="O")
+            offsets = np.zeros(self.record_size, dtype="int64")
+            sizes = np.zeros(self.record_size, dtype="int64")
+            raws = np.full(self.record_size, np.nan, dtype="O")
+        for j, data in partition.items():
+            if isinstance(data, list):
+                if (
+                    str(paths.dtype) == "category"
+                    and data[0] not in paths.dtype.categories
+                ):
+                    paths = paths.add_categories(data[0])
+                paths[j] = data[0]
+                if len(data) > 1:
+                    offsets[j] = data[1]
+                    sizes[j] = data[2]
+            elif data is None:
+                # delete
+                paths[j] = None
+                offsets[j] = 0
+                sizes[j] = 0
+                raws[j] = None
+            else:
+                # this is the only call into kerchunk, could remove
+                raws[j] = kerchunk.df._proc_raw(data)
+        # TODO: only save needed columns
+        df = pd.DataFrame(
+            {
+                "path": paths,
+                "offset": offsets,
+                "size": sizes,
+                "raw": raws,
+            },
+            copy=False,
+        )
+        if df.path.count() / (df.path.nunique() or 1) > self.cat_thresh:
+            df["path"] = df["path"].astype("category")
+        object_encoding = {"raw": "bytes", "path": "utf8"}
+        has_nulls = ["path", "raw"]
+        fn = f"{base_url or self.out_root}/{field}/refs.{record}.parq"
+        self.fs.mkdirs(f"{base_url or self.out_root}/{field}", exist_ok=True)
+        df.to_parquet(
+            fn,
+            engine="fastparquet",
+            storage_options=storage_options
+            or getattr(self.fs, "storage_options", None),
+            compression="zstd",
+            index=False,
+            stats=False,
+            object_encoding=object_encoding,
+            has_nulls=has_nulls,
+            # **kwargs,
+        )
+        partition.clear()
+        self._items.pop((field, record))
+    def flush(self, base_url=None, storage_options=None):
+        """Output any modified or deleted keys
+        Parameters
+        ----------
+        base_url: str
+            Location of the output
+        """
+        # write what we have so far and clear sub chunks
+        for thing in list(self._items):
+            if isinstance(thing, tuple):
+                field, record = thing
+                self.write(
+                    field,
+                    record,
+                    base_url=base_url,
+                    storage_options=storage_options,
+                )
+        # gather .zmetadata from self._items and write that too
+        for k in list(self._items):
+            if k != ".zmetadata" and ".z" in k:
+                self.zmetadata[k] = json.loads(self._items.pop(k))
+        met = {"metadata": self.zmetadata, "record_size": self.record_size}
+        self._items[".zmetadata"] = json.dumps(met).encode()
+        self.fs.pipe(
+            "/".join([base_url or self.out_root, ".zmetadata"]),
+            self._items[".zmetadata"],
+        )
+        # TODO: only clear those that we wrote to?
+        self.open_refs.cache_clear()
+    def __len__(self):
+        # Caveat: This counts expected references, not actual - but is fast
+        count = 0
+        for field in self.listdir():
+            if field.startswith("."):
+                count += 1
+            else:
+                count += math.prod(self._get_chunk_sizes(field))
+        count += len(self.zmetadata)  # all metadata keys
+        # any other files not in reference partitions
+        count += sum(1 for _ in self._items if not isinstance(_, tuple))
+        return count
+    def __iter__(self):
+        # Caveat: returns only existing keys, so the number of these does not
+        #  match len(self)
+        metas = set(self.zmetadata)
+        metas.update(self._items)
+        for bit in metas:
+            if isinstance(bit, str):
+                yield bit
+        for field in self.listdir():
+            for k in self._keys_in_field(field):
+                if k in self:
+                    yield k
+    def __contains__(self, item):
+        try:
+            self._load_one_key(item)
+            return True
+        except KeyError:
+            return False
+    def _keys_in_field(self, field):
+        """List key names in given field
+        Produces strings like "field/x.y" appropriate from the chunking of the array
+        """
+        chunk_sizes = self._get_chunk_sizes(field)
+        if len(chunk_sizes) == 0:
+            yield field + "/0"
+            return
+        inds = itertools.product(*(range(i) for i in chunk_sizes))
+        for ind in inds:
+            yield field + "/" + ".".join([str(c) for c in ind])
+class ReferenceFileSystem(AsyncFileSystem):
+    """View byte ranges of some other file as a file system
+    Initial version: single file system target, which must support
+    async, and must allow start and end args in _cat_file. Later versions
+    may allow multiple arbitrary URLs for the targets.
+    This FileSystem is read-only. It is designed to be used with async
+    targets (for now). This FileSystem only allows whole-file access, no
+    ``open``. We do not get original file details from the target FS.
+    Configuration is by passing a dict of references at init, or a URL to
+    a JSON file containing the same; this dict
+    can also contain concrete data for some set of paths.
+    Reference dict format:
+    {path0: bytes_data, path1: (target_url, offset, size)}
+    https://github.com/fsspec/kerchunk/blob/main/README.md
+    """
+    protocol = "reference"
+    def __init__(
+        self,
+        fo,
+        target=None,
+        ref_storage_args=None,
+        target_protocol=None,
+        target_options=None,
+        remote_protocol=None,
+        remote_options=None,
+        fs=None,
+        template_overrides=None,
+        simple_templates=True,
+        max_gap=64_000,
+        max_block=256_000_000,
+        cache_size=128,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        fo : dict or str
+            The set of references to use for this instance, with a structure as above.
+            If str referencing a JSON file, will use fsspec.open, in conjunction
+            with target_options and target_protocol to open and parse JSON at this
+            location. If a directory, then assume references are a set of parquet
+            files to be loaded lazily.
+        target : str
+            For any references having target_url as None, this is the default file
+            target to use
+        ref_storage_args : dict
+            If references is a str, use these kwargs for loading the JSON file.
+            Deprecated: use target_options instead.
+        target_protocol : str
+            Used for loading the reference file, if it is a path. If None, protocol
+            will be derived from the given path
+        target_options : dict
+            Extra FS options for loading the reference file ``fo``, if given as a path
+        remote_protocol : str
+            The protocol of the filesystem on which the references will be evaluated
+            (unless fs is provided). If not given, will be derived from the first
+            URL that has a protocol in the templates or in the references, in that
+            order.
+        remote_options : dict
+            kwargs to go with remote_protocol
+        fs : AbstractFileSystem | dict(str, (AbstractFileSystem | dict))
+            Directly provide a file system(s):
+                - a single filesystem instance
+                - a dict of protocol:filesystem, where each value is either a filesystem
+                  instance, or a dict of kwargs that can be used to create in
+                  instance for the given protocol
+            If this is given, remote_options and remote_protocol are ignored.
+        template_overrides : dict
+            Swap out any templates in the references file with these - useful for
+            testing.
+        simple_templates: bool
+            Whether templates can be processed with simple replace (True) or if
+            jinja  is needed (False, much slower). All reference sets produced by
+            ``kerchunk`` are simple in this sense, but the spec allows for complex.
+        max_gap, max_block: int
+            For merging multiple concurrent requests to the same remote file.
+            Neighboring byte ranges will only be merged when their
+            inter-range gap is <= ``max_gap``. Default is 64KB. Set to 0
+            to only merge when it requires no extra bytes. Pass a negative
+            number to disable merging, appropriate for local target files.
+            Neighboring byte ranges will only be merged when the size of
+            the aggregated range is <= ``max_block``. Default is 256MB.
+        cache_size : int
+            Maximum size of LRU cache, where cache_size*record_size denotes
+            the total number of references that can be loaded in memory at once.
+            Only used for lazily loaded references.
+        kwargs : passed to parent class
+        """
+        super().__init__(**kwargs)
+        self.target = target
+        self.template_overrides = template_overrides
+        self.simple_templates = simple_templates
+        self.templates = {}
+        self.fss = {}
+        self._dircache = {}
+        self.max_gap = max_gap
+        self.max_block = max_block
+        if isinstance(fo, str):
+            dic = dict(
+                **(ref_storage_args or target_options or {}), protocol=target_protocol
+            )
+            ref_fs, fo2 = fsspec.core.url_to_fs(fo, **dic)
+            if ref_fs.isfile(fo2):
+                # text JSON
+                with fsspec.open(fo, "rb", **dic) as f:
+                    logger.info("Read reference from URL %s", fo)
+                    text = json.load(f)
+                self._process_references(text, template_overrides)
+            else:
+                # Lazy parquet refs
+                logger.info("Open lazy reference dict from URL %s", fo)
+                self.references = LazyReferenceMapper(
+                    fo2,
+                    fs=ref_fs,
+                    cache_size=cache_size,
+                )
+        else:
+            # dictionaries
+            self._process_references(fo, template_overrides)
+        if isinstance(fs, dict):
+            self.fss = {
+                k: (
+                    fsspec.filesystem(k.split(":", 1)[0], **opts)
+                    if isinstance(opts, dict)
+                    else opts
+                )
+                for k, opts in fs.items()
+            }
+            if None not in self.fss:
+                self.fss[None] = filesystem("file")
+            return
+        if fs is not None:
+            # single remote FS
+            remote_protocol = (
+                fs.protocol[0] if isinstance(fs.protocol, tuple) else fs.protocol
+            )
+            self.fss[remote_protocol] = fs
+        if remote_protocol is None:
+            # get single protocol from any templates
+            for ref in self.templates.values():
+                if callable(ref):
+                    ref = ref()
+                protocol, _ = fsspec.core.split_protocol(ref)
+                if protocol and protocol not in self.fss:
+                    fs = filesystem(protocol, **(remote_options or {}))
+                    self.fss[protocol] = fs
+        if remote_protocol is None:
+            # get single protocol from references
+            # TODO: warning here, since this can be very expensive?
+            for ref in self.references.values():
+                if callable(ref):
+                    ref = ref()
+                if isinstance(ref, list) and ref[0]:
+                    protocol, _ = fsspec.core.split_protocol(ref[0])
+                    if protocol not in self.fss:
+                        fs = filesystem(protocol, **(remote_options or {}))
+                        self.fss[protocol] = fs
+                        # only use first remote URL
+                        break
+        if remote_protocol and remote_protocol not in self.fss:
+            fs = filesystem(remote_protocol, **(remote_options or {}))
+            self.fss[remote_protocol] = fs
+        self.fss[None] = fs or filesystem("file")  # default one
+    def _cat_common(self, path, start=None, end=None):
+        path = self._strip_protocol(path)
+        logger.debug(f"cat: {path}")
+        try:
+            part = self.references[path]
+        except KeyError:
+            raise FileNotFoundError(path)
+        if isinstance(part, str):
+            part = part.encode()
+        if isinstance(part, bytes):
+            logger.debug(f"Reference: {path}, type bytes")
+            if part.startswith(b"base64:"):
+                part = base64.b64decode(part[7:])
+            return part, None, None
+        if len(part) == 1:
+            logger.debug(f"Reference: {path}, whole file => {part}")
+            url = part[0]
+            start1, end1 = start, end
+        else:
+            url, start0, size = part
+            logger.debug(f"Reference: {path} => {url}, offset {start0}, size {size}")
+            end0 = start0 + size
+            if start is not None:
+                if start >= 0:
+                    start1 = start0 + start
+                else:
+                    start1 = end0 + start
+            else:
+                start1 = start0
+            if end is not None:
+                if end >= 0:
+                    end1 = start0 + end
+                else:
+                    end1 = end0 + end
+            else:
+                end1 = end0
+        if url is None:
+            url = self.target
+        return url, start1, end1
+    async def _cat_file(self, path, start=None, end=None, **kwargs):
+        part_or_url, start0, end0 = self._cat_common(path, start=start, end=end)
+        if isinstance(part_or_url, bytes):
+            return part_or_url[start:end]
+        protocol, _ = split_protocol(part_or_url)
+        try:
+            await self.fss[protocol]._cat_file(part_or_url, start=start, end=end)
+        except Exception as e:
+            raise ReferenceNotReachable(path, part_or_url) from e
+    def cat_file(self, path, start=None, end=None, **kwargs):
+        part_or_url, start0, end0 = self._cat_common(path, start=start, end=end)
+        if isinstance(part_or_url, bytes):
+            return part_or_url[start:end]
+        protocol, _ = split_protocol(part_or_url)
+        try:
+            return self.fss[protocol].cat_file(part_or_url, start=start0, end=end0)
+        except Exception as e:
+            raise ReferenceNotReachable(path, part_or_url) from e
+    def pipe_file(self, path, value, **_):
+        """Temporarily add binary data or reference as a file"""
+        self.references[path] = value
+    async def _get_file(self, rpath, lpath, **kwargs):
+        if self.isdir(rpath):
+            return os.makedirs(lpath, exist_ok=True)
+        data = await self._cat_file(rpath)
+        with open(lpath, "wb") as f:
+            f.write(data)
+    def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, **kwargs):
+        if self.isdir(rpath):
+            return os.makedirs(lpath, exist_ok=True)
+        data = self.cat_file(rpath, **kwargs)
+        callback.set_size(len(data))
+        if isfilelike(lpath):
+            lpath.write(data)
+        else:
+            with open(lpath, "wb") as f:
+                f.write(data)
+        callback.absolute_update(len(data))
+    def get(self, rpath, lpath, recursive=False, **kwargs):
+        if recursive:
+            # trigger directory build
+            self.ls("")
+        rpath = self.expand_path(rpath, recursive=recursive)
+        fs = fsspec.filesystem("file", auto_mkdir=True)
+        targets = other_paths(rpath, lpath)
+        if recursive:
+            data = self.cat([r for r in rpath if not self.isdir(r)])
+        else:
+            data = self.cat(rpath)
+        for remote, local in zip(rpath, targets):
+            if remote in data:
+                fs.pipe_file(local, data[remote])
+    def cat(self, path, recursive=False, on_error="raise", **kwargs):
+        if isinstance(path, str) and recursive:
+            raise NotImplementedError
+        if isinstance(path, list) and (recursive or any("*" in p for p in path)):
+            raise NotImplementedError
+        # TODO: if references is lazy, pre-fetch all paths in batch before access
+        proto_dict = _protocol_groups(path, self.references)
+        out = {}
+        for proto, paths in proto_dict.items():
+            fs = self.fss[proto]
+            urls, starts, ends, valid_paths = [], [], [], []
+            for p in paths:
+                # find references or label not-found. Early exit if any not
+                # found and on_error is "raise"
+                try:
+                    u, s, e = self._cat_common(p)
+                except FileNotFoundError as err:
+                    if on_error == "raise":
+                        raise
+                    if on_error != "omit":
+                        out[p] = err
+                else:
+                    urls.append(u)
+                    starts.append(s)
+                    ends.append(e)
+                    valid_paths.append(p)
+            # process references into form for merging
+            urls2 = []
+            starts2 = []
+            ends2 = []
+            paths2 = []
+            whole_files = set()
+            for u, s, e, p in zip(urls, starts, ends, valid_paths):
+                if isinstance(u, bytes):
+                    # data
+                    out[p] = u
+                elif s is None:
+                    # whole file - limits are None, None, but no further
+                    # entries take for this file
+                    whole_files.add(u)
+                    urls2.append(u)
+                    starts2.append(s)
+                    ends2.append(e)
+                    paths2.append(p)
+            for u, s, e, p in zip(urls, starts, ends, valid_paths):
+                # second run to account for files that are to be loaded whole
+                if s is not None and u not in whole_files:
+                    urls2.append(u)
+                    starts2.append(s)
+                    ends2.append(e)
+                    paths2.append(p)
+            # merge and fetch consolidated ranges
+            new_paths, new_starts, new_ends = merge_offset_ranges(
+                list(urls2),
+                list(starts2),
+                list(ends2),
+                sort=True,
+                max_gap=self.max_gap,
+                max_block=self.max_block,
+            )
+            bytes_out = fs.cat_ranges(new_paths, new_starts, new_ends)
+            # unbundle from merged bytes - simple approach
+            for u, s, e, p in zip(urls, starts, ends, valid_paths):
+                if p in out:
+                    continue  # was bytes, already handled
+                for np, ns, ne, b in zip(new_paths, new_starts, new_ends, bytes_out):
+                    if np == u and (ns is None or ne is None):
+                        if isinstance(b, Exception):
+                            out[p] = b
+                        else:
+                            out[p] = b[s:e]
+                    elif np == u and s >= ns and e <= ne:
+                        if isinstance(b, Exception):
+                            out[p] = b
+                        else:
+                            out[p] = b[s - ns : (e - ne) or None]
+        for k, v in out.copy().items():
+            # these were valid references, but fetch failed, so transform exc
+            if isinstance(v, Exception) and k in self.references:
+                ex = out[k]
+                new_ex = ReferenceNotReachable(k, self.references[k])
+                new_ex.__cause__ = ex
+                if on_error == "raise":
+                    raise new_ex
+                elif on_error != "omit":
+                    out[k] = new_ex
+        if len(out) == 1 and isinstance(path, str) and "*" not in path:
+            return _first(out)
+        return out
+    def _process_references(self, references, template_overrides=None):
+        vers = references.get("version", None)
+        if vers is None:
+            self._process_references0(references)
+        elif vers == 1:
+            self._process_references1(references, template_overrides=template_overrides)
+        else:
+            raise ValueError(f"Unknown reference spec version: {vers}")
+        # TODO: we make dircache by iterating over all entries, but for Spec >= 1,
+        #  can replace with programmatic. Is it even needed for mapper interface?
+    def _process_references0(self, references):
+        """Make reference dict for Spec Version 0"""
+        self.references = references
+    def _process_references1(self, references, template_overrides=None):
+        if not self.simple_templates or self.templates:
+            import jinja2
+        self.references = {}
+        self._process_templates(references.get("templates", {}))
+        @lru_cache(1000)
+        def _render_jinja(u):
+            return jinja2.Template(u).render(**self.templates)
+        for k, v in references.get("refs", {}).items():
+            if isinstance(v, str):
+                if v.startswith("base64:"):
+                    self.references[k] = base64.b64decode(v[7:])
+                self.references[k] = v
+            elif self.templates:
+                u = v[0]
+                if "{{" in u:
+                    if self.simple_templates:
+                        u = (
+                            u.replace("{{", "{")
+                            .replace("}}", "}")
+                            .format(**self.templates)
+                        )
+                    else:
+                        u = _render_jinja(u)
+                self.references[k] = [u] if len(v) == 1 else [u, v[1], v[2]]
+            else:
+                self.references[k] = v
+        self.references.update(self._process_gen(references.get("gen", [])))
+    def _process_templates(self, tmp):
+        self.templates = {}
+        if self.template_overrides is not None:
+            tmp.update(self.template_overrides)
+        for k, v in tmp.items():
+            if "{{" in v:
+                import jinja2
+                self.templates[k] = lambda temp=v, **kwargs: jinja2.Template(
+                    temp
+                ).render(**kwargs)
+            else:
+                self.templates[k] = v
+    def _process_gen(self, gens):
+        out = {}
+        for gen in gens:
+            dimension = {
+                k: v
+                if isinstance(v, list)
+                else range(v.get("start", 0), v["stop"], v.get("step", 1))
+                for k, v in gen["dimensions"].items()
+            }
+            products = (
+                dict(zip(dimension.keys(), values))
+                for values in itertools.product(*dimension.values())
+            )
+            for pr in products:
+                import jinja2
+                key = jinja2.Template(gen["key"]).render(**pr, **self.templates)
+                url = jinja2.Template(gen["url"]).render(**pr, **self.templates)
+                if ("offset" in gen) and ("length" in gen):
+                    offset = int(
+                        jinja2.Template(gen["offset"]).render(**pr, **self.templates)
+                    )
+                    length = int(
+                        jinja2.Template(gen["length"]).render(**pr, **self.templates)
+                    )
+                    out[key] = [url, offset, length]
+                elif ("offset" in gen) ^ ("length" in gen):
+                    raise ValueError(
+                        "Both 'offset' and 'length' are required for a "
+                        "reference generator entry if either is provided."
+                    )
+                else:
+                    out[key] = [url]
+        return out
+    def _dircache_from_items(self):
+        self.dircache = {"": []}
+        it = self.references.items()
+        for path, part in it:
+            if isinstance(part, (bytes, str)):
+                size = len(part)
+            elif len(part) == 1:
+                size = None
+            else:
+                _, _, size = part
+            par = path.rsplit("/", 1)[0] if "/" in path else ""
+            par0 = par
+            subdirs = [par0]
+            while par0 and par0 not in self.dircache:
+                # collect parent directories
+                par0 = self._parent(par0)
+                subdirs.append(par0)
+            subdirs = subdirs[::-1]
+            for parent, child in zip(subdirs, subdirs[1:]):
+                # register newly discovered directories
+                assert child not in self.dircache
+                assert parent in self.dircache
+                self.dircache[parent].append(
+                    {"name": child, "type": "directory", "size": 0}
+                )
+                self.dircache[child] = []
+            self.dircache[par].append({"name": path, "type": "file", "size": size})
+    def _open(self, path, mode="rb", block_size=None, cache_options=None, **kwargs):
+        data = self.cat_file(path)  # load whole chunk into memory
+        return io.BytesIO(data)
+    def ls(self, path, detail=True, **kwargs):
+        path = self._strip_protocol(path)
+        if isinstance(self.references, LazyReferenceMapper):
+            try:
+                return self.references.ls(path, detail)
+            except KeyError:
+                pass
+            raise FileNotFoundError(f"'{path}' is not a known key")
+        if not self.dircache:
+            self._dircache_from_items()
+        out = self._ls_from_cache(path)
+        if out is None:
+            raise FileNotFoundError(path)
+        if detail:
+            return out
+        return [o["name"] for o in out]
+    def exists(self, path, **kwargs):  # overwrite auto-sync version
+        return self.isdir(path) or self.isfile(path)
+    def isdir(self, path):  # overwrite auto-sync version
+        if self.dircache:
+            return path in self.dircache
+        elif isinstance(self.references, LazyReferenceMapper):
+            return path in self.references.listdir("")
+        else:
+            # this may be faster than building dircache for single calls, but
+            # by looping will be slow for many calls; could cache it?
+            return any(_.startswith(f"{path}/") for _ in self.references)
+    def isfile(self, path):  # overwrite auto-sync version
+        return path in self.references
+    async def _ls(self, path, detail=True, **kwargs):  # calls fast sync code
+        return self.ls(path, detail, **kwargs)
+    def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
+        if withdirs:
+            return super().find(
+                path, maxdepth=maxdepth, withdirs=withdirs, detail=detail, **kwargs
+            )
+        if path:
+            path = self._strip_protocol(path)
+            r = sorted(k for k in self.references if k.startswith(path))
+        else:
+            r = sorted(self.references)
+        if detail:
+            if not self.dircache:
+                self._dircache_from_items()
+            return {k: self._ls_from_cache(k)[0] for k in r}
+        else:
+            return r
+    def info(self, path, **kwargs):
+        out = self.references.get(path)
+        if out is not None:
+            if isinstance(out, (str, bytes)):
+                # decode base64 here
+                return {"name": path, "type": "file", "size": len(out)}
+            elif len(out) > 1:
+                return {"name": path, "type": "file", "size": out[2]}
+            else:
+                out0 = [{"name": path, "type": "file", "size": None}]
+        else:
+            out = self.ls(path, True)
+            out0 = [o for o in out if o["name"] == path]
+            if not out0:
+                return {"name": path, "type": "directory", "size": 0}
+        if out0[0]["size"] is None:
+            # if this is a whole remote file, update size using remote FS
+            prot, _ = split_protocol(self.references[path][0])
+            out0[0]["size"] = self.fss[prot].size(self.references[path][0])
+        return out0[0]
+    async def _info(self, path, **kwargs):  # calls fast sync code
+        return self.info(path)
+    async def _rm_file(self, path, **kwargs):
+        self.references.pop(
+            path, None
+        )  # ignores FileNotFound, just as well for directories
+        self.dircache.clear()  # this is a bit heavy handed
+    async def _pipe_file(self, path, data):
+        # can be str or bytes
+        self.references[path] = data
+        self.dircache.clear()  # this is a bit heavy handed
+    async def _put_file(self, lpath, rpath, **kwargs):
+        # puts binary
+        with open(lpath, "rb") as f:
+            self.references[rpath] = f.read()
+        self.dircache.clear()  # this is a bit heavy handed
+    def save_json(self, url, **storage_options):
+        """Write modified references into new location"""
+        out = {}
+        for k, v in self.references.items():
+            if isinstance(v, bytes):
+                try:
+                    out[k] = v.decode("ascii")
+                except UnicodeDecodeError:
+                    out[k] = (b"base64:" + base64.b64encode(v)).decode()
+            else:
+                out[k] = v
+        with fsspec.open(url, "wb", **storage_options) as f:
+            f.write(json.dumps({"version": 1, "refs": out}).encode())

.venv/Lib/site-packages/fsspec/implementations/sftp.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import datetime
+import logging
+import os
+import types
+import uuid
+from stat import S_ISDIR, S_ISLNK
+import paramiko
+from .. import AbstractFileSystem
+from ..utils import infer_storage_options
+logger = logging.getLogger("fsspec.sftp")
+class SFTPFileSystem(AbstractFileSystem):
+    """Files over SFTP/SSH
+    Peer-to-peer filesystem over SSH using paramiko.
+    Note: if using this with the ``open`` or ``open_files``, with full URLs,
+    there is no way to tell if a path is relative, so all paths are assumed
+    to be absolute.
+    """
+    protocol = "sftp", "ssh"
+    def __init__(self, host, **ssh_kwargs):
+        """
+        Parameters
+        ----------
+        host: str
+            Hostname or IP as a string
+        temppath: str
+            Location on the server to put files, when within a transaction
+        ssh_kwargs: dict
+            Parameters passed on to connection. See details in
+            https://docs.paramiko.org/en/3.3/api/client.html#paramiko.client.SSHClient.connect
+            May include port, username, password...
+        """
+        if self._cached:
+            return
+        super().__init__(**ssh_kwargs)
+        self.temppath = ssh_kwargs.pop("temppath", "/tmp")  # remote temp directory
+        self.host = host
+        self.ssh_kwargs = ssh_kwargs
+        self._connect()
+    def _connect(self):
+        logger.debug("Connecting to SFTP server %s", self.host)
+        self.client = paramiko.SSHClient()
+        self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        self.client.connect(self.host, **self.ssh_kwargs)
+        self.ftp = self.client.open_sftp()
+    @classmethod
+    def _strip_protocol(cls, path):
+        return infer_storage_options(path)["path"]
+    @staticmethod
+    def _get_kwargs_from_urls(urlpath):
+        out = infer_storage_options(urlpath)
+        out.pop("path", None)
+        out.pop("protocol", None)
+        return out
+    def mkdir(self, path, create_parents=True, mode=511):
+        logger.debug("Creating folder %s", path)
+        if self.exists(path):
+            raise FileExistsError(f"File exists: {path}")
+        if create_parents:
+            self.makedirs(path)
+        else:
+            self.ftp.mkdir(path, mode)
+    def makedirs(self, path, exist_ok=False, mode=511):
+        if self.exists(path) and not exist_ok:
+            raise FileExistsError(f"File exists: {path}")
+        parts = path.split("/")
+        new_path = "/" if path[:1] == "/" else ""
+        for part in parts:
+            if part:
+                new_path = f"{new_path}/{part}" if new_path else part
+                if not self.exists(new_path):
+                    self.ftp.mkdir(new_path, mode)
+    def rmdir(self, path):
+        logger.debug("Removing folder %s", path)
+        self.ftp.rmdir(path)
+    def info(self, path):
+        stat = self._decode_stat(self.ftp.stat(path))
+        stat["name"] = path
+        return stat
+    @staticmethod
+    def _decode_stat(stat, parent_path=None):
+        if S_ISDIR(stat.st_mode):
+            t = "directory"
+        elif S_ISLNK(stat.st_mode):
+            t = "link"
+        else:
+            t = "file"
+        out = {
+            "name": "",
+            "size": stat.st_size,
+            "type": t,
+            "uid": stat.st_uid,
+            "gid": stat.st_gid,
+            "time": datetime.datetime.fromtimestamp(
+                stat.st_atime, tz=datetime.timezone.utc
+            ),
+            "mtime": datetime.datetime.fromtimestamp(
+                stat.st_mtime, tz=datetime.timezone.utc
+            ),
+        }
+        if parent_path:
+            out["name"] = "/".join([parent_path.rstrip("/"), stat.filename])
+        return out
+    def ls(self, path, detail=False):
+        logger.debug("Listing folder %s", path)
+        stats = [self._decode_stat(stat, path) for stat in self.ftp.listdir_iter(path)]
+        if detail:
+            return stats
+        else:
+            paths = [stat["name"] for stat in stats]
+            return sorted(paths)
+    def put(self, lpath, rpath, callback=None, **kwargs):
+        logger.debug("Put file %s into %s", lpath, rpath)
+        self.ftp.put(lpath, rpath)
+    def get_file(self, rpath, lpath, **kwargs):
+        if self.isdir(rpath):
+            os.makedirs(lpath, exist_ok=True)
+        else:
+            self.ftp.get(self._strip_protocol(rpath), lpath)
+    def _open(self, path, mode="rb", block_size=None, **kwargs):
+        """
+        block_size: int or None
+            If 0, no buffering, if 1, line buffering, if >1, buffer that many
+            bytes, if None use default from paramiko.
+        """
+        logger.debug("Opening file %s", path)
+        if kwargs.get("autocommit", True) is False:
+            # writes to temporary file, move on commit
+            path2 = "/".join([self.temppath, str(uuid.uuid4())])
+            f = self.ftp.open(path2, mode, bufsize=block_size if block_size else -1)
+            f.temppath = path2
+            f.targetpath = path
+            f.fs = self
+            f.commit = types.MethodType(commit_a_file, f)
+            f.discard = types.MethodType(discard_a_file, f)
+        else:
+            f = self.ftp.open(path, mode, bufsize=block_size if block_size else -1)
+        return f
+    def _rm(self, path):
+        if self.isdir(path):
+            self.ftp.rmdir(path)
+        else:
+            self.ftp.remove(path)
+    def mv(self, old, new):
+        logger.debug("Renaming %s into %s", old, new)
+        self.ftp.posix_rename(old, new)
+def commit_a_file(self):
+    self.fs.mv(self.temppath, self.targetpath)
+def discard_a_file(self):
+    self.fs._rm(self.temppath)

.venv/Lib/site-packages/fsspec/implementations/smb.py ADDED Viewed

	@@ -0,0 +1,324 @@

+"""
+This module contains SMBFileSystem class responsible for handling access to
+Windows Samba network shares by using package smbprotocol
+"""
+import datetime
+import uuid
+from stat import S_ISDIR, S_ISLNK
+import smbclient
+from .. import AbstractFileSystem
+from ..utils import infer_storage_options
+# ! pylint: disable=bad-continuation
+class SMBFileSystem(AbstractFileSystem):
+    """Allow reading and writing to Windows and Samba network shares.
+    When using `fsspec.open()` for getting a file-like object the URI
+    should be specified as this format:
+    ``smb://workgroup;user:password@server:port/share/folder/file.csv``.
+    Example::
+        >>> import fsspec
+        >>> with fsspec.open(
+        ...     'smb://myuser:[email protected]/' 'share/folder/file.csv'
+        ... ) as smbfile:
+        ...     df = pd.read_csv(smbfile, sep='|', header=None)
+    Note that you need to pass in a valid hostname or IP address for the host
+    component of the URL. Do not use the Windows/NetBIOS machine name for the
+    host component.
+    The first component of the path in the URL points to the name of the shared
+    folder. Subsequent path components will point to the directory/folder/file.
+    The URL components ``workgroup`` , ``user``, ``password`` and ``port`` may be
+    optional.
+    .. note::
+        For working this source require `smbprotocol`_ to be installed, e.g.::
+            $ pip install smbprotocol
+            # or
+            # pip install smbprotocol[kerberos]
+    .. _smbprotocol: https://github.com/jborean93/smbprotocol#requirements
+    Note: if using this with the ``open`` or ``open_files``, with full URLs,
+    there is no way to tell if a path is relative, so all paths are assumed
+    to be absolute.
+    """
+    protocol = "smb"
+    # pylint: disable=too-many-arguments
+    def __init__(
+        self,
+        host,
+        port=None,
+        username=None,
+        password=None,
+        timeout=60,
+        encrypt=None,
+        share_access=None,
+        **kwargs,
+    ):
+        """
+        You can use _get_kwargs_from_urls to get some kwargs from
+        a reasonable SMB url.
+        Authentication will be anonymous or integrated if username/password are not
+        given.
+        Parameters
+        ----------
+        host: str
+            The remote server name/ip to connect to
+        port: int or None
+            Port to connect with. Usually 445, sometimes 139.
+        username: str or None
+            Username to connect with. Required if Kerberos auth is not being used.
+        password: str or None
+            User's password on the server, if using username
+        timeout: int
+            Connection timeout in seconds
+        encrypt: bool
+            Whether to force encryption or not, once this has been set to True
+            the session cannot be changed back to False.
+        share_access: str or None
+            Specifies the default access applied to file open operations
+            performed with this file system object.
+            This affects whether other processes can concurrently open a handle
+            to the same file.
+            - None (the default): exclusively locks the file until closed.
+            - 'r': Allow other handles to be opened with read access.
+            - 'w': Allow other handles to be opened with write access.
+            - 'd': Allow other handles to be opened with delete access.
+        """
+        super().__init__(**kwargs)
+        self.host = host
+        self.port = port
+        self.username = username
+        self.password = password
+        self.timeout = timeout
+        self.encrypt = encrypt
+        self.temppath = kwargs.pop("temppath", "")
+        self.share_access = share_access
+        self._connect()
+    @property
+    def _port(self):
+        return 445 if self.port is None else self.port
+    def _connect(self):
+        smbclient.register_session(
+            self.host,
+            username=self.username,
+            password=self.password,
+            port=self._port,
+            encrypt=self.encrypt,
+            connection_timeout=self.timeout,
+        )
+    @classmethod
+    def _strip_protocol(cls, path):
+        return infer_storage_options(path)["path"]
+    @staticmethod
+    def _get_kwargs_from_urls(path):
+        # smb://workgroup;user:password@host:port/share/folder/file.csv
+        out = infer_storage_options(path)
+        out.pop("path", None)
+        out.pop("protocol", None)
+        return out
+    def mkdir(self, path, create_parents=True, **kwargs):
+        wpath = _as_unc_path(self.host, path)
+        if create_parents:
+            smbclient.makedirs(wpath, exist_ok=False, port=self._port, **kwargs)
+        else:
+            smbclient.mkdir(wpath, port=self._port, **kwargs)
+    def makedirs(self, path, exist_ok=False):
+        if _share_has_path(path):
+            wpath = _as_unc_path(self.host, path)
+            smbclient.makedirs(wpath, exist_ok=exist_ok, port=self._port)
+    def rmdir(self, path):
+        if _share_has_path(path):
+            wpath = _as_unc_path(self.host, path)
+            smbclient.rmdir(wpath, port=self._port)
+    def info(self, path, **kwargs):
+        wpath = _as_unc_path(self.host, path)
+        stats = smbclient.stat(wpath, port=self._port, **kwargs)
+        if S_ISDIR(stats.st_mode):
+            stype = "directory"
+        elif S_ISLNK(stats.st_mode):
+            stype = "link"
+        else:
+            stype = "file"
+        res = {
+            "name": path + "/" if stype == "directory" else path,
+            "size": stats.st_size,
+            "type": stype,
+            "uid": stats.st_uid,
+            "gid": stats.st_gid,
+            "time": stats.st_atime,
+            "mtime": stats.st_mtime,
+        }
+        return res
+    def created(self, path):
+        """Return the created timestamp of a file as a datetime.datetime"""
+        wpath = _as_unc_path(self.host, path)
+        stats = smbclient.stat(wpath, port=self._port)
+        return datetime.datetime.fromtimestamp(stats.st_ctime, tz=datetime.timezone.utc)
+    def modified(self, path):
+        """Return the modified timestamp of a file as a datetime.datetime"""
+        wpath = _as_unc_path(self.host, path)
+        stats = smbclient.stat(wpath, port=self._port)
+        return datetime.datetime.fromtimestamp(stats.st_mtime, tz=datetime.timezone.utc)
+    def ls(self, path, detail=True, **kwargs):
+        unc = _as_unc_path(self.host, path)
+        listed = smbclient.listdir(unc, port=self._port, **kwargs)
+        dirs = ["/".join([path.rstrip("/"), p]) for p in listed]
+        if detail:
+            dirs = [self.info(d) for d in dirs]
+        return dirs
+    # pylint: disable=too-many-arguments
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=-1,
+        autocommit=True,
+        cache_options=None,
+        **kwargs,
+    ):
+        """
+        block_size: int or None
+            If 0, no buffering, 1, line buffering, >1, buffer that many bytes
+        Notes
+        -----
+        By specifying 'share_access' in 'kwargs' it is possible to override the
+        default shared access setting applied in the constructor of this object.
+        """
+        bls = block_size if block_size is not None and block_size >= 0 else -1
+        wpath = _as_unc_path(self.host, path)
+        share_access = kwargs.pop("share_access", self.share_access)
+        if "w" in mode and autocommit is False:
+            temp = _as_temp_path(self.host, path, self.temppath)
+            return SMBFileOpener(
+                wpath, temp, mode, port=self._port, block_size=bls, **kwargs
+            )
+        return smbclient.open_file(
+            wpath,
+            mode,
+            buffering=bls,
+            share_access=share_access,
+            port=self._port,
+            **kwargs,
+        )
+    def copy(self, path1, path2, **kwargs):
+        """Copy within two locations in the same filesystem"""
+        wpath1 = _as_unc_path(self.host, path1)
+        wpath2 = _as_unc_path(self.host, path2)
+        smbclient.copyfile(wpath1, wpath2, port=self._port, **kwargs)
+    def _rm(self, path):
+        if _share_has_path(path):
+            wpath = _as_unc_path(self.host, path)
+            stats = smbclient.stat(wpath, port=self._port)
+            if S_ISDIR(stats.st_mode):
+                smbclient.rmdir(wpath, port=self._port)
+            else:
+                smbclient.remove(wpath, port=self._port)
+    def mv(self, path1, path2, recursive=None, maxdepth=None, **kwargs):
+        wpath1 = _as_unc_path(self.host, path1)
+        wpath2 = _as_unc_path(self.host, path2)
+        smbclient.rename(wpath1, wpath2, port=self._port, **kwargs)
+def _as_unc_path(host, path):
+    rpath = path.replace("/", "\\")
+    unc = f"\\\\{host}{rpath}"
+    return unc
+def _as_temp_path(host, path, temppath):
+    share = path.split("/")[1]
+    temp_file = f"/{share}{temppath}/{uuid.uuid4()}"
+    unc = _as_unc_path(host, temp_file)
+    return unc
+def _share_has_path(path):
+    parts = path.count("/")
+    if path.endswith("/"):
+        return parts > 2
+    return parts > 1
+class SMBFileOpener:
+    """writes to remote temporary file, move on commit"""
+    def __init__(self, path, temp, mode, port=445, block_size=-1, **kwargs):
+        self.path = path
+        self.temp = temp
+        self.mode = mode
+        self.block_size = block_size
+        self.kwargs = kwargs
+        self.smbfile = None
+        self._incontext = False
+        self.port = port
+        self._open()
+    def _open(self):
+        if self.smbfile is None or self.smbfile.closed:
+            self.smbfile = smbclient.open_file(
+                self.temp,
+                self.mode,
+                port=self.port,
+                buffering=self.block_size,
+                **self.kwargs,
+            )
+    def commit(self):
+        """Move temp file to definitive on success."""
+        # TODO: use transaction support in SMB protocol
+        smbclient.replace(self.temp, self.path, port=self.port)
+    def discard(self):
+        """Remove the temp file on failure."""
+        smbclient.remove(self.temp, port=self.port)
+    def __fspath__(self):
+        return self.path
+    def __iter__(self):
+        return self.smbfile.__iter__()
+    def __getattr__(self, item):
+        return getattr(self.smbfile, item)
+    def __enter__(self):
+        self._incontext = True
+        return self.smbfile.__enter__()
+    def __exit__(self, exc_type, exc_value, traceback):
+        self._incontext = False
+        self.smbfile.__exit__(exc_type, exc_value, traceback)

.venv/Lib/site-packages/fsspec/implementations/tar.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import logging
+import tarfile
+import fsspec
+from fsspec.archive import AbstractArchiveFileSystem
+from fsspec.compression import compr
+from fsspec.utils import infer_compression
+typemap = {b"0": "file", b"5": "directory"}
+logger = logging.getLogger("tar")
+class TarFileSystem(AbstractArchiveFileSystem):
+    """Compressed Tar archives as a file-system (read-only)
+    Supports the following formats:
+    tar.gz, tar.bz2, tar.xz
+    """
+    root_marker = ""
+    protocol = "tar"
+    cachable = False
+    def __init__(
+        self,
+        fo="",
+        index_store=None,
+        target_options=None,
+        target_protocol=None,
+        compression=None,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        target_options = target_options or {}
+        if isinstance(fo, str):
+            self.of = fsspec.open(fo, protocol=target_protocol, **target_options)
+            fo = self.of.open()  # keep the reference
+        # Try to infer compression.
+        if compression is None:
+            name = None
+            # Try different ways to get hold of the filename. `fo` might either
+            # be a `fsspec.LocalFileOpener`, an `io.BufferedReader` or an
+            # `fsspec.AbstractFileSystem` instance.
+            try:
+                # Amended io.BufferedReader or similar.
+                # This uses a "protocol extension" where original filenames are
+                # propagated to archive-like filesystems in order to let them
+                # infer the right compression appropriately.
+                if hasattr(fo, "original"):
+                    name = fo.original
+                # fsspec.LocalFileOpener
+                elif hasattr(fo, "path"):
+                    name = fo.path
+                # io.BufferedReader
+                elif hasattr(fo, "name"):
+                    name = fo.name
+                # fsspec.AbstractFileSystem
+                elif hasattr(fo, "info"):
+                    name = fo.info()["name"]
+            except Exception as ex:
+                logger.warning(
+                    f"Unable to determine file name, not inferring compression: {ex}"
+                )
+            if name is not None:
+                compression = infer_compression(name)
+                logger.info(f"Inferred compression {compression} from file name {name}")
+        if compression is not None:
+            # TODO: tarfile already implements compression with modes like "'r:gz'",
+            #  but then would seek to offset in the file work?
+            fo = compr[compression](fo)
+        self._fo_ref = fo
+        self.fo = fo  # the whole instance is a context
+        self.tar = tarfile.TarFile(fileobj=self.fo)
+        self.dir_cache = None
+        self.index_store = index_store
+        self.index = None
+        self._index()
+    def _index(self):
+        # TODO: load and set saved index, if exists
+        out = {}
+        for ti in self.tar:
+            info = ti.get_info()
+            info["type"] = typemap.get(info["type"], "file")
+            name = ti.get_info()["name"].rstrip("/")
+            out[name] = (info, ti.offset_data)
+        self.index = out
+        # TODO: save index to self.index_store here, if set
+    def _get_dirs(self):
+        if self.dir_cache is not None:
+            return
+        # This enables ls to get directories as children as well as files
+        self.dir_cache = {
+            dirname: {"name": dirname, "size": 0, "type": "directory"}
+            for dirname in self._all_dirnames(self.tar.getnames())
+        }
+        for member in self.tar.getmembers():
+            info = member.get_info()
+            info["name"] = info["name"].rstrip("/")
+            info["type"] = typemap.get(info["type"], "file")
+            self.dir_cache[info["name"]] = info
+    def _open(self, path, mode="rb", **kwargs):
+        if mode != "rb":
+            raise ValueError("Read-only filesystem implementation")
+        details, offset = self.index[path]
+        if details["type"] != "file":
+            raise ValueError("Can only handle regular files")
+        return self.tar.extractfile(path)

.venv/Lib/site-packages/fsspec/implementations/webhdfs.py ADDED Viewed

	@@ -0,0 +1,486 @@

+# https://hadoop.apache.org/docs/r1.0.4/webhdfs.html
+import logging
+import os
+import secrets
+import shutil
+import tempfile
+import uuid
+from contextlib import suppress
+from urllib.parse import quote
+import requests
+from ..spec import AbstractBufferedFile, AbstractFileSystem
+from ..utils import infer_storage_options, tokenize
+logger = logging.getLogger("webhdfs")
+class WebHDFS(AbstractFileSystem):
+    """
+    Interface to HDFS over HTTP using the WebHDFS API. Supports also HttpFS gateways.
+    Four auth mechanisms are supported:
+    insecure: no auth is done, and the user is assumed to be whoever they
+        say they are (parameter ``user``), or a predefined value such as
+        "dr.who" if not given
+    spnego: when kerberos authentication is enabled, auth is negotiated by
+        requests_kerberos https://github.com/requests/requests-kerberos .
+        This establishes a session based on existing kinit login and/or
+        specified principal/password; parameters are passed with ``kerb_kwargs``
+    token: uses an existing Hadoop delegation token from another secured
+        service. Indeed, this client can also generate such tokens when
+        not insecure. Note that tokens expire, but can be renewed (by a
+        previously specified user) and may allow for proxying.
+    basic-auth: used when both parameter ``user`` and parameter ``password``
+        are provided.
+    """
+    tempdir = str(tempfile.gettempdir())
+    protocol = "webhdfs", "webHDFS"
+    def __init__(
+        self,
+        host,
+        port=50070,
+        kerberos=False,
+        token=None,
+        user=None,
+        password=None,
+        proxy_to=None,
+        kerb_kwargs=None,
+        data_proxy=None,
+        use_https=False,
+        session_cert=None,
+        session_verify=True,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        host: str
+            Name-node address
+        port: int
+            Port for webHDFS
+        kerberos: bool
+            Whether to authenticate with kerberos for this connection
+        token: str or None
+            If given, use this token on every call to authenticate. A user
+            and user-proxy may be encoded in the token and should not be also
+            given
+        user: str or None
+            If given, assert the user name to connect with
+        password: str or None
+            If given, assert the password to use for basic auth. If password
+            is provided, user must be provided also
+        proxy_to: str or None
+            If given, the user has the authority to proxy, and this value is
+            the user in who's name actions are taken
+        kerb_kwargs: dict
+            Any extra arguments for HTTPKerberosAuth, see
+            `<https://github.com/requests/requests-kerberos/blob/master/requests_kerberos/kerberos_.py>`_
+        data_proxy: dict, callable or None
+            If given, map data-node addresses. This can be necessary if the
+            HDFS cluster is behind a proxy, running on Docker or otherwise has
+            a mismatch between the host-names given by the name-node and the
+            address by which to refer to them from the client. If a dict,
+            maps host names ``host->data_proxy[host]``; if a callable, full
+            URLs are passed, and function must conform to
+            ``url->data_proxy(url)``.
+        use_https: bool
+            Whether to connect to the Name-node using HTTPS instead of HTTP
+        session_cert: str or Tuple[str, str] or None
+            Path to a certificate file, or tuple of (cert, key) files to use
+            for the requests.Session
+        session_verify: str, bool or None
+            Path to a certificate file to use for verifying the requests.Session.
+        kwargs
+        """
+        if self._cached:
+            return
+        super().__init__(**kwargs)
+        self.url = (
+            f"{'https' if use_https else 'http'}://{host}:{port}/webhdfs/v1"  # noqa
+        )
+        self.kerb = kerberos
+        self.kerb_kwargs = kerb_kwargs or {}
+        self.pars = {}
+        self.proxy = data_proxy or {}
+        if token is not None:
+            if user is not None or proxy_to is not None:
+                raise ValueError(
+                    "If passing a delegation token, must not set "
+                    "user or proxy_to, as these are encoded in the"
+                    " token"
+                )
+            self.pars["delegation"] = token
+        self.user = user
+        self.password = password
+        if password is not None:
+            if user is None:
+                raise ValueError(
+                    "If passing a password, the user must also be"
+                    "set in order to set up the basic-auth"
+                )
+        else:
+            if user is not None:
+                self.pars["user.name"] = user
+        if proxy_to is not None:
+            self.pars["doas"] = proxy_to
+        if kerberos and user is not None:
+            raise ValueError(
+                "If using Kerberos auth, do not specify the "
+                "user, this is handled by kinit."
+            )
+        self.session_cert = session_cert
+        self.session_verify = session_verify
+        self._connect()
+        self._fsid = f"webhdfs_{tokenize(host, port)}"
+    @property
+    def fsid(self):
+        return self._fsid
+    def _connect(self):
+        self.session = requests.Session()
+        if self.session_cert:
+            self.session.cert = self.session_cert
+        self.session.verify = self.session_verify
+        if self.kerb:
+            from requests_kerberos import HTTPKerberosAuth
+            self.session.auth = HTTPKerberosAuth(**self.kerb_kwargs)
+        if self.user is not None and self.password is not None:
+            from requests.auth import HTTPBasicAuth
+            self.session.auth = HTTPBasicAuth(self.user, self.password)
+    def _call(self, op, method="get", path=None, data=None, redirect=True, **kwargs):
+        url = self._apply_proxy(self.url + quote(path or "", safe="/="))
+        args = kwargs.copy()
+        args.update(self.pars)
+        args["op"] = op.upper()
+        logger.debug("sending %s with %s", url, method)
+        out = self.session.request(
+            method=method.upper(),
+            url=url,
+            params=args,
+            data=data,
+            allow_redirects=redirect,
+        )
+        if out.status_code in [400, 401, 403, 404, 500]:
+            try:
+                err = out.json()
+                msg = err["RemoteException"]["message"]
+                exp = err["RemoteException"]["exception"]
+            except (ValueError, KeyError):
+                pass
+            else:
+                if exp in ["IllegalArgumentException", "UnsupportedOperationException"]:
+                    raise ValueError(msg)
+                elif exp in ["SecurityException", "AccessControlException"]:
+                    raise PermissionError(msg)
+                elif exp in ["FileNotFoundException"]:
+                    raise FileNotFoundError(msg)
+                else:
+                    raise RuntimeError(msg)
+        out.raise_for_status()
+        return out
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        autocommit=True,
+        replication=None,
+        permissions=None,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        path: str
+            File location
+        mode: str
+            'rb', 'wb', etc.
+        block_size: int
+            Client buffer size for read-ahead or write buffer
+        autocommit: bool
+            If False, writes to temporary file that only gets put in final
+            location upon commit
+        replication: int
+            Number of copies of file on the cluster, write mode only
+        permissions: str or int
+            posix permissions, write mode only
+        kwargs
+        Returns
+        -------
+        WebHDFile instance
+        """
+        block_size = block_size or self.blocksize
+        return WebHDFile(
+            self,
+            path,
+            mode=mode,
+            block_size=block_size,
+            tempdir=self.tempdir,
+            autocommit=autocommit,
+            replication=replication,
+            permissions=permissions,
+        )
+    @staticmethod
+    def _process_info(info):
+        info["type"] = info["type"].lower()
+        info["size"] = info["length"]
+        return info
+    @classmethod
+    def _strip_protocol(cls, path):
+        return infer_storage_options(path)["path"]
+    @staticmethod
+    def _get_kwargs_from_urls(urlpath):
+        out = infer_storage_options(urlpath)
+        out.pop("path", None)
+        out.pop("protocol", None)
+        if "username" in out:
+            out["user"] = out.pop("username")
+        return out
+    def info(self, path):
+        out = self._call("GETFILESTATUS", path=path)
+        info = out.json()["FileStatus"]
+        info["name"] = path
+        return self._process_info(info)
+    def ls(self, path, detail=False):
+        out = self._call("LISTSTATUS", path=path)
+        infos = out.json()["FileStatuses"]["FileStatus"]
+        for info in infos:
+            self._process_info(info)
+            info["name"] = path.rstrip("/") + "/" + info["pathSuffix"]
+        if detail:
+            return sorted(infos, key=lambda i: i["name"])
+        else:
+            return sorted(info["name"] for info in infos)
+    def content_summary(self, path):
+        """Total numbers of files, directories and bytes under path"""
+        out = self._call("GETCONTENTSUMMARY", path=path)
+        return out.json()["ContentSummary"]
+    def ukey(self, path):
+        """Checksum info of file, giving method and result"""
+        out = self._call("GETFILECHECKSUM", path=path, redirect=False)
+        if "Location" in out.headers:
+            location = self._apply_proxy(out.headers["Location"])
+            out2 = self.session.get(location)
+            out2.raise_for_status()
+            return out2.json()["FileChecksum"]
+        else:
+            out.raise_for_status()
+            return out.json()["FileChecksum"]
+    def home_directory(self):
+        """Get user's home directory"""
+        out = self._call("GETHOMEDIRECTORY")
+        return out.json()["Path"]
+    def get_delegation_token(self, renewer=None):
+        """Retrieve token which can give the same authority to other uses
+        Parameters
+        ----------
+        renewer: str or None
+            User who may use this token; if None, will be current user
+        """
+        if renewer:
+            out = self._call("GETDELEGATIONTOKEN", renewer=renewer)
+        else:
+            out = self._call("GETDELEGATIONTOKEN")
+        t = out.json()["Token"]
+        if t is None:
+            raise ValueError("No token available for this user/security context")
+        return t["urlString"]
+    def renew_delegation_token(self, token):
+        """Make token live longer. Returns new expiry time"""
+        out = self._call("RENEWDELEGATIONTOKEN", method="put", token=token)
+        return out.json()["long"]
+    def cancel_delegation_token(self, token):
+        """Stop the token from being useful"""
+        self._call("CANCELDELEGATIONTOKEN", method="put", token=token)
+    def chmod(self, path, mod):
+        """Set the permission at path
+        Parameters
+        ----------
+        path: str
+            location to set (file or directory)
+        mod: str or int
+            posix epresentation or permission, give as oct string, e.g, '777'
+            or 0o777
+        """
+        self._call("SETPERMISSION", method="put", path=path, permission=mod)
+    def chown(self, path, owner=None, group=None):
+        """Change owning user and/or group"""
+        kwargs = {}
+        if owner is not None:
+            kwargs["owner"] = owner
+        if group is not None:
+            kwargs["group"] = group
+        self._call("SETOWNER", method="put", path=path, **kwargs)
+    def set_replication(self, path, replication):
+        """
+        Set file replication factor
+        Parameters
+        ----------
+        path: str
+            File location (not for directories)
+        replication: int
+            Number of copies of file on the cluster. Should be smaller than
+            number of data nodes; normally 3 on most systems.
+        """
+        self._call("SETREPLICATION", path=path, method="put", replication=replication)
+    def mkdir(self, path, **kwargs):
+        self._call("MKDIRS", method="put", path=path)
+    def makedirs(self, path, exist_ok=False):
+        if exist_ok is False and self.exists(path):
+            raise FileExistsError(path)
+        self.mkdir(path)
+    def mv(self, path1, path2, **kwargs):
+        self._call("RENAME", method="put", path=path1, destination=path2)
+    def rm(self, path, recursive=False, **kwargs):
+        self._call(
+            "DELETE",
+            method="delete",
+            path=path,
+            recursive="true" if recursive else "false",
+        )
+    def rm_file(self, path, **kwargs):
+        self.rm(path)
+    def cp_file(self, lpath, rpath, **kwargs):
+        with self.open(lpath) as lstream:
+            tmp_fname = "/".join([self._parent(rpath), f".tmp.{secrets.token_hex(16)}"])
+            # Perform an atomic copy (stream to a temporary file and
+            # move it to the actual destination).
+            try:
+                with self.open(tmp_fname, "wb") as rstream:
+                    shutil.copyfileobj(lstream, rstream)
+                self.mv(tmp_fname, rpath)
+            except BaseException:  # noqa
+                with suppress(FileNotFoundError):
+                    self.rm(tmp_fname)
+                raise
+    def _apply_proxy(self, location):
+        if self.proxy and callable(self.proxy):
+            location = self.proxy(location)
+        elif self.proxy:
+            # as a dict
+            for k, v in self.proxy.items():
+                location = location.replace(k, v, 1)
+        return location
+class WebHDFile(AbstractBufferedFile):
+    """A file living in HDFS over webHDFS"""
+    def __init__(self, fs, path, **kwargs):
+        super().__init__(fs, path, **kwargs)
+        kwargs = kwargs.copy()
+        if kwargs.get("permissions", None) is None:
+            kwargs.pop("permissions", None)
+        if kwargs.get("replication", None) is None:
+            kwargs.pop("replication", None)
+        self.permissions = kwargs.pop("permissions", 511)
+        tempdir = kwargs.pop("tempdir")
+        if kwargs.pop("autocommit", False) is False:
+            self.target = self.path
+            self.path = os.path.join(tempdir, str(uuid.uuid4()))
+    def _upload_chunk(self, final=False):
+        """Write one part of a multi-block file upload
+        Parameters
+        ==========
+        final: bool
+            This is the last block, so should complete file, if
+            self.autocommit is True.
+        """
+        out = self.fs.session.post(
+            self.location,
+            data=self.buffer.getvalue(),
+            headers={"content-type": "application/octet-stream"},
+        )
+        out.raise_for_status()
+        return True
+    def _initiate_upload(self):
+        """Create remote file/upload"""
+        kwargs = self.kwargs.copy()
+        if "a" in self.mode:
+            op, method = "APPEND", "POST"
+        else:
+            op, method = "CREATE", "PUT"
+            kwargs["overwrite"] = "true"
+        out = self.fs._call(op, method, self.path, redirect=False, **kwargs)
+        location = self.fs._apply_proxy(out.headers["Location"])
+        if "w" in self.mode:
+            # create empty file to append to
+            out2 = self.fs.session.put(
+                location, headers={"content-type": "application/octet-stream"}
+            )
+            out2.raise_for_status()
+            # after creating empty file, change location to append to
+            out2 = self.fs._call("APPEND", "POST", self.path, redirect=False, **kwargs)
+            self.location = self.fs._apply_proxy(out2.headers["Location"])
+    def _fetch_range(self, start, end):
+        start = max(start, 0)
+        end = min(self.size, end)
+        if start >= end or start >= self.size:
+            return b""
+        out = self.fs._call(
+            "OPEN", path=self.path, offset=start, length=end - start, redirect=False
+        )
+        out.raise_for_status()
+        if "Location" in out.headers:
+            location = out.headers["Location"]
+            out2 = self.fs.session.get(self.fs._apply_proxy(location))
+            return out2.content
+        else:
+            return out.content
+    def commit(self):
+        self.fs.mv(self.path, self.target)
+    def discard(self):
+        self.fs.rm(self.path)

.venv/Lib/site-packages/fsspec/implementations/zip.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import zipfile
+import fsspec
+from fsspec.archive import AbstractArchiveFileSystem
+class ZipFileSystem(AbstractArchiveFileSystem):
+    """Read/Write contents of ZIP archive as a file-system
+    Keeps file object open while instance lives.
+    This class is pickleable, but not necessarily thread-safe
+    """
+    root_marker = ""
+    protocol = "zip"
+    cachable = False
+    def __init__(
+        self,
+        fo="",
+        mode="r",
+        target_protocol=None,
+        target_options=None,
+        compression=zipfile.ZIP_STORED,
+        allowZip64=True,
+        compresslevel=None,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        fo: str or file-like
+            Contains ZIP, and must exist. If a str, will fetch file using
+            :meth:`~fsspec.open_files`, which must return one file exactly.
+        mode: str
+            Accept: "r", "w", "a"
+        target_protocol: str (optional)
+            If ``fo`` is a string, this value can be used to override the
+            FS protocol inferred from a URL
+        target_options: dict (optional)
+            Kwargs passed when instantiating the target FS, if ``fo`` is
+            a string.
+        compression, allowZip64, compresslevel: passed to ZipFile
+            Only relevant when creating a ZIP
+        """
+        super().__init__(self, **kwargs)
+        if mode not in set("rwa"):
+            raise ValueError(f"mode '{mode}' no understood")
+        self.mode = mode
+        if isinstance(fo, str):
+            if mode == "a":
+                m = "r+b"
+            else:
+                m = mode + "b"
+            fo = fsspec.open(
+                fo, mode=m, protocol=target_protocol, **(target_options or {})
+            )
+        self.of = fo
+        self.fo = fo.__enter__()  # the whole instance is a context
+        self.zip = zipfile.ZipFile(
+            self.fo,
+            mode=mode,
+            compression=compression,
+            allowZip64=allowZip64,
+            compresslevel=compresslevel,
+        )
+        self.dir_cache = None
+    @classmethod
+    def _strip_protocol(cls, path):
+        # zip file paths are always relative to the archive root
+        return super()._strip_protocol(path).lstrip("/")
+    def __del__(self):
+        if hasattr(self, "zip"):
+            self.close()
+            del self.zip
+    def close(self):
+        """Commits any write changes to the file. Done on ``del`` too."""
+        self.zip.close()
+    def _get_dirs(self):
+        if self.dir_cache is None or self.mode in set("wa"):
+            # when writing, dir_cache is always in the ZipFile's attributes,
+            # not read from the file.
+            files = self.zip.infolist()
+            self.dir_cache = {
+                dirname.rstrip("/"): {
+                    "name": dirname.rstrip("/"),
+                    "size": 0,
+                    "type": "directory",
+                }
+                for dirname in self._all_dirnames(self.zip.namelist())
+            }
+            for z in files:
+                f = {s: getattr(z, s, None) for s in zipfile.ZipInfo.__slots__}
+                f.update(
+                    {
+                        "name": z.filename.rstrip("/"),
+                        "size": z.file_size,
+                        "type": ("directory" if z.is_dir() else "file"),
+                    }
+                )
+                self.dir_cache[f["name"]] = f
+    def pipe_file(self, path, value, **kwargs):
+        # override upstream, because we know the exact file size in this case
+        self.zip.writestr(path, value, **kwargs)
+    def _open(
+        self,
+        path,
+        mode="rb",
+        block_size=None,
+        autocommit=True,
+        cache_options=None,
+        **kwargs,
+    ):
+        path = self._strip_protocol(path)
+        if "r" in mode and self.mode in set("wa"):
+            if self.exists(path):
+                raise OSError("ZipFS can only be open for reading or writing, not both")
+            raise FileNotFoundError(path)
+        if "r" in self.mode and "w" in mode:
+            raise OSError("ZipFS can only be open for reading or writing, not both")
+        out = self.zip.open(path, mode.strip("b"))
+        if "r" in mode:
+            info = self.info(path)
+            out.size = info["size"]
+            out.name = info["name"]
+        return out

.venv/Lib/site-packages/fsspec/tests/abstract/__init__.py ADDED Viewed

	@@ -0,0 +1,287 @@

+import os
+from hashlib import md5
+import pytest
+from fsspec.implementations.local import LocalFileSystem
+from fsspec.tests.abstract.copy import AbstractCopyTests  # noqa
+from fsspec.tests.abstract.get import AbstractGetTests  # noqa
+from fsspec.tests.abstract.put import AbstractPutTests  # noqa
+class BaseAbstractFixtures:
+    """
+    Abstract base class containing fixtures that are used by but never need to
+    be overridden in derived filesystem-specific classes to run the abstract
+    tests on such filesystems.
+    """
+    @pytest.fixture
+    def fs_bulk_operations_scenario_0(self, fs, fs_join, fs_path):
+        """
+        Scenario on remote filesystem that is used for many cp/get/put tests.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._bulk_operations_scenario_0(fs, fs_join, fs_path)
+        yield source
+        fs.rm(source, recursive=True)
+    @pytest.fixture
+    def fs_glob_edge_cases_files(self, fs, fs_join, fs_path):
+        """
+        Scenario on remote filesystem that is used for glob edge cases cp/get/put tests.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._glob_edge_cases_files(fs, fs_join, fs_path)
+        yield source
+        fs.rm(source, recursive=True)
+    @pytest.fixture
+    def fs_dir_and_file_with_same_name_prefix(self, fs, fs_join, fs_path):
+        """
+        Scenario on remote filesystem that is used to check cp/get/put on directory
+        and file with the same name prefixes.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._dir_and_file_with_same_name_prefix(fs, fs_join, fs_path)
+        yield source
+        fs.rm(source, recursive=True)
+    @pytest.fixture
+    def fs_10_files_with_hashed_names(self, fs, fs_join, fs_path):
+        """
+        Scenario on remote filesystem that is used to check cp/get/put files order
+        when source and destination are lists.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._10_files_with_hashed_names(fs, fs_join, fs_path)
+        yield source
+        fs.rm(source, recursive=True)
+    @pytest.fixture
+    def fs_target(self, fs, fs_join, fs_path):
+        """
+        Return name of remote directory that does not yet exist to copy into.
+        Cleans up at the end of each test it which it is used.
+        """
+        target = fs_join(fs_path, "target")
+        yield target
+        if fs.exists(target):
+            fs.rm(target, recursive=True)
+    @pytest.fixture
+    def local_bulk_operations_scenario_0(self, local_fs, local_join, local_path):
+        """
+        Scenario on local filesystem that is used for many cp/get/put tests.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._bulk_operations_scenario_0(local_fs, local_join, local_path)
+        yield source
+        local_fs.rm(source, recursive=True)
+    @pytest.fixture
+    def local_glob_edge_cases_files(self, local_fs, local_join, local_path):
+        """
+        Scenario on local filesystem that is used for glob edge cases cp/get/put tests.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._glob_edge_cases_files(local_fs, local_join, local_path)
+        yield source
+        local_fs.rm(source, recursive=True)
+    @pytest.fixture
+    def local_dir_and_file_with_same_name_prefix(
+        self, local_fs, local_join, local_path
+    ):
+        """
+        Scenario on local filesystem that is used to check cp/get/put on directory
+        and file with the same name prefixes.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._dir_and_file_with_same_name_prefix(
+            local_fs, local_join, local_path
+        )
+        yield source
+        local_fs.rm(source, recursive=True)
+    @pytest.fixture
+    def local_10_files_with_hashed_names(self, local_fs, local_join, local_path):
+        """
+        Scenario on local filesystem that is used to check cp/get/put files order
+        when source and destination are lists.
+        Cleans up at the end of each test it which it is used.
+        """
+        source = self._10_files_with_hashed_names(local_fs, local_join, local_path)
+        yield source
+        local_fs.rm(source, recursive=True)
+    @pytest.fixture
+    def local_target(self, local_fs, local_join, local_path):
+        """
+        Return name of local directory that does not yet exist to copy into.
+        Cleans up at the end of each test it which it is used.
+        """
+        target = local_join(local_path, "target")
+        yield target
+        if local_fs.exists(target):
+            local_fs.rm(target, recursive=True)
+    def _glob_edge_cases_files(self, some_fs, some_join, some_path):
+        """
+        Scenario that is used for glob edge cases cp/get/put tests.
+        Creates the following directory and file structure:
+        📁 source
+        ├── 📄 file1
+        ├── 📄 file2
+        ├── 📁 subdir0
+        │   ├── 📄 subfile1
+        │   ├── 📄 subfile2
+        │   └── 📁 nesteddir
+        │       └── 📄 nestedfile
+        └── 📁 subdir1
+            ├── 📄 subfile1
+            ├── 📄 subfile2
+            └── 📁 nesteddir
+                └── 📄 nestedfile
+        """
+        source = some_join(some_path, "source")
+        some_fs.touch(some_join(source, "file1"))
+        some_fs.touch(some_join(source, "file2"))
+        for subdir_idx in range(2):
+            subdir = some_join(source, f"subdir{subdir_idx}")
+            nesteddir = some_join(subdir, "nesteddir")
+            some_fs.makedirs(nesteddir)
+            some_fs.touch(some_join(subdir, "subfile1"))
+            some_fs.touch(some_join(subdir, "subfile2"))
+            some_fs.touch(some_join(nesteddir, "nestedfile"))
+        return source
+    def _bulk_operations_scenario_0(self, some_fs, some_join, some_path):
+        """
+        Scenario that is used for many cp/get/put tests. Creates the following
+        directory and file structure:
+        📁 source
+        ├── 📄 file1
+        ├── 📄 file2
+        └── 📁 subdir
+            ├── 📄 subfile1
+            ├── 📄 subfile2
+            └── 📁 nesteddir
+                └── 📄 nestedfile
+        """
+        source = some_join(some_path, "source")
+        subdir = some_join(source, "subdir")
+        nesteddir = some_join(subdir, "nesteddir")
+        some_fs.makedirs(nesteddir)
+        some_fs.touch(some_join(source, "file1"))
+        some_fs.touch(some_join(source, "file2"))
+        some_fs.touch(some_join(subdir, "subfile1"))
+        some_fs.touch(some_join(subdir, "subfile2"))
+        some_fs.touch(some_join(nesteddir, "nestedfile"))
+        return source
+    def _dir_and_file_with_same_name_prefix(self, some_fs, some_join, some_path):
+        """
+        Scenario that is used to check cp/get/put on directory and file with
+        the same name prefixes. Creates the following directory and file structure:
+        📁 source
+        ├── 📄 subdir.txt
+        └── 📁 subdir
+            └── 📄 subfile.txt
+        """
+        source = some_join(some_path, "source")
+        subdir = some_join(source, "subdir")
+        file = some_join(source, "subdir.txt")
+        subfile = some_join(subdir, "subfile.txt")
+        some_fs.makedirs(subdir)
+        some_fs.touch(file)
+        some_fs.touch(subfile)
+        return source
+    def _10_files_with_hashed_names(self, some_fs, some_join, some_path):
+        """
+        Scenario that is used to check cp/get/put files order when source and
+        destination are lists. Creates the following directory and file structure:
+        📁 source
+        └── 📄 {hashed([0-9])}.txt
+        """
+        source = some_join(some_path, "source")
+        for i in range(10):
+            hashed_i = md5(str(i).encode("utf-8")).hexdigest()
+            path = some_join(source, f"{hashed_i}.txt")
+            some_fs.pipe(path=path, value=f"{i}".encode("utf-8"))
+        return source
+class AbstractFixtures(BaseAbstractFixtures):
+    """
+    Abstract base class containing fixtures that may be overridden in derived
+    filesystem-specific classes to run the abstract tests on such filesystems.
+    For any particular filesystem some of these fixtures must be overridden,
+    such as ``fs`` and ``fs_path``, and others may be overridden if the
+    default functions here are not appropriate, such as ``fs_join``.
+    """
+    @pytest.fixture
+    def fs(self):
+        raise NotImplementedError("This function must be overridden in derived classes")
+    @pytest.fixture
+    def fs_join(self):
+        """
+        Return a function that joins its arguments together into a path.
+        Most fsspec implementations join paths in a platform-dependent way,
+        but some will override this to always use a forward slash.
+        """
+        return os.path.join
+    @pytest.fixture
+    def fs_path(self):
+        raise NotImplementedError("This function must be overridden in derived classes")
+    @pytest.fixture(scope="class")
+    def local_fs(self):
+        # Maybe need an option for auto_mkdir=False?  This is only relevant
+        # for certain implementations.
+        return LocalFileSystem(auto_mkdir=True)
+    @pytest.fixture
+    def local_join(self):
+        """
+        Return a function that joins its arguments together into a path, on
+        the local filesystem.
+        """
+        return os.path.join
+    @pytest.fixture
+    def local_path(self, tmpdir):
+        return tmpdir
+    @pytest.fixture
+    def supports_empty_directories(self):
+        """
+        Return whether this implementation supports empty directories.
+        """
+        return True
+    @pytest.fixture
+    def fs_sanitize_path(self):
+        return lambda x: x

.venv/Lib/site-packages/fsspec/tests/abstract/common.py ADDED Viewed

	@@ -0,0 +1,175 @@

+GLOB_EDGE_CASES_TESTS = {
+    "argnames": ("path", "recursive", "maxdepth", "expected"),
+    "argvalues": [
+        ("fil?1", False, None, ["file1"]),
+        ("fil?1", True, None, ["file1"]),
+        ("file[1-2]", False, None, ["file1", "file2"]),
+        ("file[1-2]", True, None, ["file1", "file2"]),
+        ("*", False, None, ["file1", "file2"]),
+        (
+            "*",
+            True,
+            None,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        ("*", True, 1, ["file1", "file2"]),
+        (
+            "*",
+            True,
+            2,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+            ],
+        ),
+        ("*1", False, None, ["file1"]),
+        (
+            "*1",
+            True,
+            None,
+            [
+                "file1",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        ("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]),
+        (
+            "**",
+            False,
+            None,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        (
+            "**",
+            True,
+            None,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        ("**", True, 1, ["file1", "file2"]),
+        (
+            "**",
+            True,
+            2,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        (
+            "**",
+            False,
+            2,
+            [
+                "file1",
+                "file2",
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+            ],
+        ),
+        ("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
+        (
+            "**/*1",
+            True,
+            None,
+            [
+                "file1",
+                "subdir0/subfile1",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        ("**/*1", True, 1, ["file1"]),
+        (
+            "**/*1",
+            True,
+            2,
+            ["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
+        ),
+        ("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
+        ("**/subdir0", False, None, []),
+        ("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
+        ("**/subdir0/nested*", False, 2, []),
+        ("**/subdir0/nested*", True, 2, ["nestedfile"]),
+        ("subdir[1-2]", False, None, []),
+        ("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
+        ("subdir[1-2]", True, 2, ["subfile1", "subfile2"]),
+        ("subdir[0-1]", False, None, []),
+        (
+            "subdir[0-1]",
+            True,
+            None,
+            [
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir0/nesteddir/nestedfile",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+                "subdir1/nesteddir/nestedfile",
+            ],
+        ),
+        (
+            "subdir[0-1]/*fil[e]*",
+            False,
+            None,
+            [
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+            ],
+        ),
+        (
+            "subdir[0-1]/*fil[e]*",
+            True,
+            None,
+            [
+                "subdir0/subfile1",
+                "subdir0/subfile2",
+                "subdir1/subfile1",
+                "subdir1/subfile2",
+            ],
+        ),
+    ],
+}

.venv/Lib/site-packages/fsspec/tests/abstract/copy.py ADDED Viewed

	@@ -0,0 +1,557 @@

+from hashlib import md5
+from itertools import product
+import pytest
+from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
+class AbstractCopyTests:
+    def test_copy_file_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1a
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            fs.touch(fs_join(target, "dummy"))
+        assert fs.isdir(target)
+        target_file2 = fs_join(target, "file2")
+        target_subfile1 = fs_join(target, "subfile1")
+        # Copy from source directory
+        fs.cp(fs_join(source, "file2"), target)
+        assert fs.isfile(target_file2)
+        # Copy from sub directory
+        fs.cp(fs_join(source, "subdir", "subfile1"), target)
+        assert fs.isfile(target_subfile1)
+        # Remove copied files
+        fs.rm([target_file2, target_subfile1])
+        assert not fs.exists(target_file2)
+        assert not fs.exists(target_subfile1)
+        # Repeat with trailing slash on target
+        fs.cp(fs_join(source, "file2"), target + "/")
+        assert fs.isdir(target)
+        assert fs.isfile(target_file2)
+        fs.cp(fs_join(source, "subdir", "subfile1"), target + "/")
+        assert fs.isfile(target_subfile1)
+    def test_copy_file_to_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # Copy scenario 1b
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        fs.cp(
+            fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
+        )  # Note trailing slash
+        assert fs.isdir(target)
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+    def test_copy_file_to_file_in_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1c
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            fs.touch(fs_join(target, "dummy"))
+        assert fs.isdir(target)
+        fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
+        assert fs.isfile(fs_join(target, "newfile"))
+    def test_copy_file_to_file_in_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # Copy scenario 1d
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        fs.cp(
+            fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir", "newfile")
+        )
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "newfile"))
+    def test_copy_directory_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1e
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = target + "/" if target_slash else target
+            # Without recursive does nothing
+            fs.cp(s, t)
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+            # With recursive
+            fs.cp(s, t, recursive=True)
+            if source_slash:
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert fs.isdir(fs_join(target, "nesteddir"))
+                assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                        fs_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert fs.isdir(fs_join(target, "subdir"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile2"))
+                assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
+                assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
+                fs.rm(fs_join(target, "subdir"), recursive=True)
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+            # Limit recursive by maxdepth
+            fs.cp(s, t, recursive=True, maxdepth=1)
+            if source_slash:
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert not fs.exists(fs_join(target, "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert fs.isdir(fs_join(target, "subdir"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile2"))
+                assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
+                fs.rm(fs_join(target, "subdir"), recursive=True)
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+    def test_copy_directory_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1f
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = fs_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive does nothing
+            fs.cp(s, t)
+            if supports_empty_directories:
+                assert fs.ls(target) == []
+            else:
+                with pytest.raises(FileNotFoundError):
+                    fs.ls(target)
+            # With recursive
+            fs.cp(s, t, recursive=True)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
+            assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+            # Limit recursive by maxdepth
+            fs.cp(s, t, recursive=True, maxdepth=1)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+    def test_copy_glob_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1g
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            # Without recursive
+            fs.cp(fs_join(source, "subdir", "*"), t)
+            assert fs.isfile(fs_join(target, "subfile1"))
+            assert fs.isfile(fs_join(target, "subfile2"))
+            assert not fs.isdir(fs_join(target, "nesteddir"))
+            assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(
+                [
+                    fs_join(target, "subfile1"),
+                    fs_join(target, "subfile2"),
+                ],
+                recursive=True,
+            )
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert fs.isdir(fs_join(target, "nesteddir"))
+                assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                        fs_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+                assert fs.ls(target, detail=False) == (
+                    [] if supports_empty_directories else [dummy]
+                )
+                # Limit recursive by maxdepth
+                fs.cp(
+                    fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
+                )
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert not fs.exists(fs_join(target, "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+                assert fs.ls(target, detail=False) == (
+                    [] if supports_empty_directories else [dummy]
+                )
+    def test_copy_glob_to_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # Copy scenario 1h
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        for target_slash in [False, True]:
+            t = fs_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive
+            fs.cp(fs_join(source, "subdir", "*"), t)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            assert not fs.exists(fs_join(target, "newdir", "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
+                assert fs.isdir(fs_join(target, "newdir"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+                assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
+                assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                assert not fs.exists(fs_join(target, "newdir", "subdir"))
+                fs.rm(fs_join(target, "newdir"), recursive=True)
+                assert not fs.exists(fs_join(target, "newdir"))
+                # Limit recursive by maxdepth
+                fs.cp(
+                    fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
+                )
+                assert fs.isdir(fs_join(target, "newdir"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+                assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                assert not fs.exists(fs_join(target, "newdir", "subdir"))
+                fs.rm(fs_join(target, "newdir"), recursive=True)
+                assert not fs.exists(fs_join(target, "newdir"))
+    @pytest.mark.parametrize(
+        GLOB_EDGE_CASES_TESTS["argnames"],
+        GLOB_EDGE_CASES_TESTS["argvalues"],
+    )
+    def test_copy_glob_edge_cases(
+        self,
+        path,
+        recursive,
+        maxdepth,
+        expected,
+        fs,
+        fs_join,
+        fs_glob_edge_cases_files,
+        fs_target,
+        fs_sanitize_path,
+    ):
+        # Copy scenario 1g
+        source = fs_glob_edge_cases_files
+        target = fs_target
+        for new_dir, target_slash in product([True, False], [True, False]):
+            fs.mkdir(target)
+            t = fs_join(target, "newdir") if new_dir else target
+            t = t + "/" if target_slash else t
+            fs.copy(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
+            output = fs.find(target)
+            if new_dir:
+                prefixed_expected = [
+                    fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
+                ]
+            else:
+                prefixed_expected = [
+                    fs_sanitize_path(fs_join(target, p)) for p in expected
+                ]
+            assert sorted(output) == sorted(prefixed_expected)
+            try:
+                fs.rm(target, recursive=True)
+            except FileNotFoundError:
+                pass
+    def test_copy_list_of_files_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        fs_target,
+        supports_empty_directories,
+    ):
+        # Copy scenario 2a
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        source_files = [
+            fs_join(source, "file1"),
+            fs_join(source, "file2"),
+            fs_join(source, "subdir", "subfile1"),
+        ]
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            fs.cp(source_files, t)
+            assert fs.isfile(fs_join(target, "file1"))
+            assert fs.isfile(fs_join(target, "file2"))
+            assert fs.isfile(fs_join(target, "subfile1"))
+            fs.rm(
+                [
+                    fs_join(target, "file1"),
+                    fs_join(target, "file2"),
+                    fs_join(target, "subfile1"),
+                ],
+                recursive=True,
+            )
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+    def test_copy_list_of_files_to_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # Copy scenario 2b
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        source_files = [
+            fs_join(source, "file1"),
+            fs_join(source, "file2"),
+            fs_join(source, "subdir", "subfile1"),
+        ]
+        fs.cp(source_files, fs_join(target, "newdir") + "/")  # Note trailing slash
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "file1"))
+        assert fs.isfile(fs_join(target, "newdir", "file2"))
+        assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+    def test_copy_two_files_new_directory(
+        self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
+    ):
+        # This is a duplicate of test_copy_list_of_files_to_new_directory and
+        # can eventually be removed.
+        source = fs_bulk_operations_scenario_0
+        target = fs_target
+        assert not fs.exists(target)
+        fs.cp([fs_join(source, "file1"), fs_join(source, "file2")], target)
+        assert fs.isdir(target)
+        assert fs.isfile(fs_join(target, "file1"))
+        assert fs.isfile(fs_join(target, "file2"))
+    def test_copy_directory_without_files_with_same_name_prefix(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        fs_dir_and_file_with_same_name_prefix,
+        supports_empty_directories,
+    ):
+        # Create the test dirs
+        source = fs_dir_and_file_with_same_name_prefix
+        target = fs_target
+        # Test without glob
+        fs.cp(fs_join(source, "subdir"), target, recursive=True)
+        assert fs.isfile(fs_join(target, "subfile.txt"))
+        assert not fs.isfile(fs_join(target, "subdir.txt"))
+        fs.rm([fs_join(target, "subfile.txt")])
+        if supports_empty_directories:
+            assert fs.ls(target) == []
+        else:
+            assert not fs.exists(target)
+        # Test with glob
+        fs.cp(fs_join(source, "subdir*"), target, recursive=True)
+        assert fs.isdir(fs_join(target, "subdir"))
+        assert fs.isfile(fs_join(target, "subdir", "subfile.txt"))
+        assert fs.isfile(fs_join(target, "subdir.txt"))
+    def test_copy_with_source_and_destination_as_list(
+        self, fs, fs_target, fs_join, fs_10_files_with_hashed_names
+    ):
+        # Create the test dir
+        source = fs_10_files_with_hashed_names
+        target = fs_target
+        # Create list of files for source and destination
+        source_files = []
+        destination_files = []
+        for i in range(10):
+            hashed_i = md5(str(i).encode("utf-8")).hexdigest()
+            source_files.append(fs_join(source, f"{hashed_i}.txt"))
+            destination_files.append(fs_join(target, f"{hashed_i}.txt"))
+        # Copy and assert order was kept
+        fs.copy(path1=source_files, path2=destination_files)
+        for i in range(10):
+            file_content = fs.cat(destination_files[i]).decode("utf-8")
+            assert file_content == str(i)

.venv/Lib/site-packages/fsspec/tests/abstract/get.py ADDED Viewed

	@@ -0,0 +1,587 @@

+from hashlib import md5
+from itertools import product
+import pytest
+from fsspec.implementations.local import make_path_posix
+from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
+class AbstractGetTests:
+    def test_get_file_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1a
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        assert local_fs.isdir(target)
+        target_file2 = local_join(target, "file2")
+        target_subfile1 = local_join(target, "subfile1")
+        # Copy from source directory
+        fs.get(fs_join(source, "file2"), target)
+        assert local_fs.isfile(target_file2)
+        # Copy from sub directory
+        fs.get(fs_join(source, "subdir", "subfile1"), target)
+        assert local_fs.isfile(target_subfile1)
+        # Remove copied files
+        local_fs.rm([target_file2, target_subfile1])
+        assert not local_fs.exists(target_file2)
+        assert not local_fs.exists(target_subfile1)
+        # Repeat with trailing slash on target
+        fs.get(fs_join(source, "file2"), target + "/")
+        assert local_fs.isdir(target)
+        assert local_fs.isfile(target_file2)
+        fs.get(fs_join(source, "subdir", "subfile1"), target + "/")
+        assert local_fs.isfile(target_subfile1)
+    def test_get_file_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1b
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        fs.get(
+            fs_join(source, "subdir", "subfile1"), local_join(target, "newdir/")
+        )  # Note trailing slash
+        assert local_fs.isdir(target)
+        assert local_fs.isdir(local_join(target, "newdir"))
+        assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+    def test_get_file_to_file_in_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1c
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        fs.get(fs_join(source, "subdir", "subfile1"), local_join(target, "newfile"))
+        assert local_fs.isfile(local_join(target, "newfile"))
+    def test_get_file_to_file_in_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1d
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        fs.get(
+            fs_join(source, "subdir", "subfile1"),
+            local_join(target, "newdir", "newfile"),
+        )
+        assert local_fs.isdir(local_join(target, "newdir"))
+        assert local_fs.isfile(local_join(target, "newdir", "newfile"))
+    def test_get_directory_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1e
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        assert local_fs.isdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = target + "/" if target_slash else target
+            # Without recursive does nothing
+            fs.get(s, t)
+            assert local_fs.ls(target) == []
+            # With recursive
+            fs.get(s, t, recursive=True)
+            if source_slash:
+                assert local_fs.isfile(local_join(target, "subfile1"))
+                assert local_fs.isfile(local_join(target, "subfile2"))
+                assert local_fs.isdir(local_join(target, "nesteddir"))
+                assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
+                assert not local_fs.exists(local_join(target, "subdir"))
+                local_fs.rm(
+                    [
+                        local_join(target, "subfile1"),
+                        local_join(target, "subfile2"),
+                        local_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert local_fs.isdir(local_join(target, "subdir"))
+                assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
+                assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
+                assert local_fs.isdir(local_join(target, "subdir", "nesteddir"))
+                assert local_fs.isfile(
+                    local_join(target, "subdir", "nesteddir", "nestedfile")
+                )
+                local_fs.rm(local_join(target, "subdir"), recursive=True)
+            assert local_fs.ls(target) == []
+            # Limit recursive by maxdepth
+            fs.get(s, t, recursive=True, maxdepth=1)
+            if source_slash:
+                assert local_fs.isfile(local_join(target, "subfile1"))
+                assert local_fs.isfile(local_join(target, "subfile2"))
+                assert not local_fs.exists(local_join(target, "nesteddir"))
+                assert not local_fs.exists(local_join(target, "subdir"))
+                local_fs.rm(
+                    [
+                        local_join(target, "subfile1"),
+                        local_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert local_fs.isdir(local_join(target, "subdir"))
+                assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
+                assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
+                assert not local_fs.exists(local_join(target, "subdir", "nesteddir"))
+                local_fs.rm(local_join(target, "subdir"), recursive=True)
+            assert local_fs.ls(target) == []
+    def test_get_directory_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1f
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = local_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive does nothing
+            fs.get(s, t)
+            assert local_fs.ls(target) == []
+            # With recursive
+            fs.get(s, t, recursive=True)
+            assert local_fs.isdir(local_join(target, "newdir"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
+            assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
+            assert local_fs.isfile(
+                local_join(target, "newdir", "nesteddir", "nestedfile")
+            )
+            assert not local_fs.exists(local_join(target, "subdir"))
+            local_fs.rm(local_join(target, "newdir"), recursive=True)
+            assert local_fs.ls(target) == []
+            # Limit recursive by maxdepth
+            fs.get(s, t, recursive=True, maxdepth=1)
+            assert local_fs.isdir(local_join(target, "newdir"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
+            assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
+            assert not local_fs.exists(local_join(target, "subdir"))
+            local_fs.rm(local_join(target, "newdir"), recursive=True)
+            assert not local_fs.exists(local_join(target, "newdir"))
+    def test_get_glob_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1g
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            # Without recursive
+            fs.get(fs_join(source, "subdir", "*"), t)
+            assert local_fs.isfile(local_join(target, "subfile1"))
+            assert local_fs.isfile(local_join(target, "subfile2"))
+            assert not local_fs.isdir(local_join(target, "nesteddir"))
+            assert not local_fs.exists(local_join(target, "nesteddir", "nestedfile"))
+            assert not local_fs.exists(local_join(target, "subdir"))
+            local_fs.rm(
+                [
+                    local_join(target, "subfile1"),
+                    local_join(target, "subfile2"),
+                ],
+                recursive=True,
+            )
+            assert local_fs.ls(target) == []
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
+                assert local_fs.isfile(local_join(target, "subfile1"))
+                assert local_fs.isfile(local_join(target, "subfile2"))
+                assert local_fs.isdir(local_join(target, "nesteddir"))
+                assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
+                assert not local_fs.exists(local_join(target, "subdir"))
+                local_fs.rm(
+                    [
+                        local_join(target, "subfile1"),
+                        local_join(target, "subfile2"),
+                        local_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+                assert local_fs.ls(target) == []
+                # Limit recursive by maxdepth
+                fs.get(
+                    fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
+                )
+                assert local_fs.isfile(local_join(target, "subfile1"))
+                assert local_fs.isfile(local_join(target, "subfile2"))
+                assert not local_fs.exists(local_join(target, "nesteddir"))
+                assert not local_fs.exists(local_join(target, "subdir"))
+                local_fs.rm(
+                    [
+                        local_join(target, "subfile1"),
+                        local_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+                assert local_fs.ls(target) == []
+    def test_get_glob_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1h
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        for target_slash in [False, True]:
+            t = fs_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive
+            fs.get(fs_join(source, "subdir", "*"), t)
+            assert local_fs.isdir(local_join(target, "newdir"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+            assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
+            assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
+            assert not local_fs.exists(
+                local_join(target, "newdir", "nesteddir", "nestedfile")
+            )
+            assert not local_fs.exists(local_join(target, "subdir"))
+            assert not local_fs.exists(local_join(target, "newdir", "subdir"))
+            local_fs.rm(local_join(target, "newdir"), recursive=True)
+            assert local_fs.ls(target) == []
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
+                assert local_fs.isdir(local_join(target, "newdir"))
+                assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+                assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
+                assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
+                assert local_fs.isfile(
+                    local_join(target, "newdir", "nesteddir", "nestedfile")
+                )
+                assert not local_fs.exists(local_join(target, "subdir"))
+                assert not local_fs.exists(local_join(target, "newdir", "subdir"))
+                local_fs.rm(local_join(target, "newdir"), recursive=True)
+                assert not local_fs.exists(local_join(target, "newdir"))
+                # Limit recursive by maxdepth
+                fs.get(
+                    fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
+                )
+                assert local_fs.isdir(local_join(target, "newdir"))
+                assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+                assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
+                assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
+                assert not local_fs.exists(local_join(target, "subdir"))
+                assert not local_fs.exists(local_join(target, "newdir", "subdir"))
+                local_fs.rm(local_fs.ls(target, detail=False), recursive=True)
+                assert not local_fs.exists(local_join(target, "newdir"))
+    @pytest.mark.parametrize(
+        GLOB_EDGE_CASES_TESTS["argnames"],
+        GLOB_EDGE_CASES_TESTS["argvalues"],
+    )
+    def test_get_glob_edge_cases(
+        self,
+        path,
+        recursive,
+        maxdepth,
+        expected,
+        fs,
+        fs_join,
+        fs_glob_edge_cases_files,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 1g
+        source = fs_glob_edge_cases_files
+        target = local_target
+        for new_dir, target_slash in product([True, False], [True, False]):
+            local_fs.mkdir(target)
+            t = local_join(target, "newdir") if new_dir else target
+            t = t + "/" if target_slash else t
+            fs.get(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
+            output = local_fs.find(target)
+            if new_dir:
+                prefixed_expected = [
+                    make_path_posix(local_join(target, "newdir", p)) for p in expected
+                ]
+            else:
+                prefixed_expected = [
+                    make_path_posix(local_join(target, p)) for p in expected
+                ]
+            assert sorted(output) == sorted(prefixed_expected)
+            try:
+                local_fs.rm(target, recursive=True)
+            except FileNotFoundError:
+                pass
+    def test_get_list_of_files_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 2a
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        source_files = [
+            fs_join(source, "file1"),
+            fs_join(source, "file2"),
+            fs_join(source, "subdir", "subfile1"),
+        ]
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            fs.get(source_files, t)
+            assert local_fs.isfile(local_join(target, "file1"))
+            assert local_fs.isfile(local_join(target, "file2"))
+            assert local_fs.isfile(local_join(target, "subfile1"))
+            local_fs.rm(
+                [
+                    local_join(target, "file1"),
+                    local_join(target, "file2"),
+                    local_join(target, "subfile1"),
+                ],
+                recursive=True,
+            )
+            assert local_fs.ls(target) == []
+    def test_get_list_of_files_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_bulk_operations_scenario_0,
+        local_fs,
+        local_join,
+        local_target,
+    ):
+        # Copy scenario 2b
+        source = fs_bulk_operations_scenario_0
+        target = local_target
+        local_fs.mkdir(target)
+        source_files = [
+            fs_join(source, "file1"),
+            fs_join(source, "file2"),
+            fs_join(source, "subdir", "subfile1"),
+        ]
+        fs.get(source_files, local_join(target, "newdir") + "/")  # Note trailing slash
+        assert local_fs.isdir(local_join(target, "newdir"))
+        assert local_fs.isfile(local_join(target, "newdir", "file1"))
+        assert local_fs.isfile(local_join(target, "newdir", "file2"))
+        assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
+    def test_get_directory_recursive(
+        self, fs, fs_join, fs_path, local_fs, local_join, local_target
+    ):
+        # https://github.com/fsspec/filesystem_spec/issues/1062
+        # Recursive cp/get/put of source directory into non-existent target directory.
+        src = fs_join(fs_path, "src")
+        src_file = fs_join(src, "file")
+        fs.mkdir(src)
+        fs.touch(src_file)
+        target = local_target
+        # get without slash
+        assert not local_fs.exists(target)
+        for loop in range(2):
+            fs.get(src, target, recursive=True)
+            assert local_fs.isdir(target)
+            if loop == 0:
+                assert local_fs.isfile(local_join(target, "file"))
+                assert not local_fs.exists(local_join(target, "src"))
+            else:
+                assert local_fs.isfile(local_join(target, "file"))
+                assert local_fs.isdir(local_join(target, "src"))
+                assert local_fs.isfile(local_join(target, "src", "file"))
+        local_fs.rm(target, recursive=True)
+        # get with slash
+        assert not local_fs.exists(target)
+        for loop in range(2):
+            fs.get(src + "/", target, recursive=True)
+            assert local_fs.isdir(target)
+            assert local_fs.isfile(local_join(target, "file"))
+            assert not local_fs.exists(local_join(target, "src"))
+    def test_get_directory_without_files_with_same_name_prefix(
+        self,
+        fs,
+        fs_join,
+        local_fs,
+        local_join,
+        local_target,
+        fs_dir_and_file_with_same_name_prefix,
+    ):
+        # Create the test dirs
+        source = fs_dir_and_file_with_same_name_prefix
+        target = local_target
+        # Test without glob
+        fs.get(fs_join(source, "subdir"), target, recursive=True)
+        assert local_fs.isfile(local_join(target, "subfile.txt"))
+        assert not local_fs.isfile(local_join(target, "subdir.txt"))
+        local_fs.rm([local_join(target, "subfile.txt")])
+        assert local_fs.ls(target) == []
+        # Test with glob
+        fs.get(fs_join(source, "subdir*"), target, recursive=True)
+        assert local_fs.isdir(local_join(target, "subdir"))
+        assert local_fs.isfile(local_join(target, "subdir", "subfile.txt"))
+        assert local_fs.isfile(local_join(target, "subdir.txt"))
+    def test_get_with_source_and_destination_as_list(
+        self,
+        fs,
+        fs_join,
+        local_fs,
+        local_join,
+        local_target,
+        fs_10_files_with_hashed_names,
+    ):
+        # Create the test dir
+        source = fs_10_files_with_hashed_names
+        target = local_target
+        # Create list of files for source and destination
+        source_files = []
+        destination_files = []
+        for i in range(10):
+            hashed_i = md5(str(i).encode("utf-8")).hexdigest()
+            source_files.append(fs_join(source, f"{hashed_i}.txt"))
+            destination_files.append(
+                make_path_posix(local_join(target, f"{hashed_i}.txt"))
+            )
+        # Copy and assert order was kept
+        fs.get(rpath=source_files, lpath=destination_files)
+        for i in range(10):
+            file_content = local_fs.cat(destination_files[i]).decode("utf-8")
+            assert file_content == str(i)

.venv/Lib/site-packages/fsspec/tests/abstract/put.py ADDED Viewed

	@@ -0,0 +1,591 @@

+from hashlib import md5
+from itertools import product
+import pytest
+from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
+class AbstractPutTests:
+    def test_put_file_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        local_join,
+        local_bulk_operations_scenario_0,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1a
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            fs.touch(fs_join(target, "dummy"))
+        assert fs.isdir(target)
+        target_file2 = fs_join(target, "file2")
+        target_subfile1 = fs_join(target, "subfile1")
+        # Copy from source directory
+        fs.put(local_join(source, "file2"), target)
+        assert fs.isfile(target_file2)
+        # Copy from sub directory
+        fs.put(local_join(source, "subdir", "subfile1"), target)
+        assert fs.isfile(target_subfile1)
+        # Remove copied files
+        fs.rm([target_file2, target_subfile1])
+        assert not fs.exists(target_file2)
+        assert not fs.exists(target_subfile1)
+        # Repeat with trailing slash on target
+        fs.put(local_join(source, "file2"), target + "/")
+        assert fs.isdir(target)
+        assert fs.isfile(target_file2)
+        fs.put(local_join(source, "subdir", "subfile1"), target + "/")
+        assert fs.isfile(target_subfile1)
+    def test_put_file_to_new_directory(
+        self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
+    ):
+        # Copy scenario 1b
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        fs.put(
+            local_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
+        )  # Note trailing slash
+        assert fs.isdir(target)
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+    def test_put_file_to_file_in_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        local_join,
+        supports_empty_directories,
+        local_bulk_operations_scenario_0,
+    ):
+        # Copy scenario 1c
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            fs.touch(fs_join(target, "dummy"))
+        assert fs.isdir(target)
+        fs.put(local_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
+        assert fs.isfile(fs_join(target, "newfile"))
+    def test_put_file_to_file_in_new_directory(
+        self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
+    ):
+        # Copy scenario 1d
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        fs.put(
+            local_join(source, "subdir", "subfile1"),
+            fs_join(target, "newdir", "newfile"),
+        )
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "newfile"))
+    def test_put_directory_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        local_bulk_operations_scenario_0,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1e
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = target + "/" if target_slash else target
+            # Without recursive does nothing
+            fs.put(s, t)
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+            # With recursive
+            fs.put(s, t, recursive=True)
+            if source_slash:
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert fs.isdir(fs_join(target, "nesteddir"))
+                assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                        fs_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert fs.isdir(fs_join(target, "subdir"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile2"))
+                assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
+                assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
+                fs.rm(fs_join(target, "subdir"), recursive=True)
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+            # Limit recursive by maxdepth
+            fs.put(s, t, recursive=True, maxdepth=1)
+            if source_slash:
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert not fs.exists(fs_join(target, "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+            else:
+                assert fs.isdir(fs_join(target, "subdir"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "subdir", "subfile2"))
+                assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
+                fs.rm(fs_join(target, "subdir"), recursive=True)
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+    def test_put_directory_to_new_directory(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        local_bulk_operations_scenario_0,
+        supports_empty_directories,
+    ):
+        # Copy scenario 1f
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        for source_slash, target_slash in zip([False, True], [False, True]):
+            s = fs_join(source, "subdir")
+            if source_slash:
+                s += "/"
+            t = fs_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive does nothing
+            fs.put(s, t)
+            if supports_empty_directories:
+                assert fs.ls(target) == []
+            else:
+                with pytest.raises(FileNotFoundError):
+                    fs.ls(target)
+            # With recursive
+            fs.put(s, t, recursive=True)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
+            assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+            # Limit recursive by maxdepth
+            fs.put(s, t, recursive=True, maxdepth=1)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+    def test_put_glob_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        local_join,
+        supports_empty_directories,
+        local_bulk_operations_scenario_0,
+    ):
+        # Copy scenario 1g
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            # Without recursive
+            fs.put(local_join(source, "subdir", "*"), t)
+            assert fs.isfile(fs_join(target, "subfile1"))
+            assert fs.isfile(fs_join(target, "subfile2"))
+            assert not fs.isdir(fs_join(target, "nesteddir"))
+            assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            fs.rm(
+                [
+                    fs_join(target, "subfile1"),
+                    fs_join(target, "subfile2"),
+                ],
+                recursive=True,
+            )
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert fs.isdir(fs_join(target, "nesteddir"))
+                assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                        fs_join(target, "nesteddir"),
+                    ],
+                    recursive=True,
+                )
+                assert fs.ls(target, detail=False) == (
+                    [] if supports_empty_directories else [dummy]
+                )
+                # Limit recursive by maxdepth
+                fs.put(
+                    local_join(source, "subdir", glob),
+                    t,
+                    recursive=recursive,
+                    maxdepth=1,
+                )
+                assert fs.isfile(fs_join(target, "subfile1"))
+                assert fs.isfile(fs_join(target, "subfile2"))
+                assert not fs.exists(fs_join(target, "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                fs.rm(
+                    [
+                        fs_join(target, "subfile1"),
+                        fs_join(target, "subfile2"),
+                    ],
+                    recursive=True,
+                )
+                assert fs.ls(target, detail=False) == (
+                    [] if supports_empty_directories else [dummy]
+                )
+    def test_put_glob_to_new_directory(
+        self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
+    ):
+        # Copy scenario 1h
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        for target_slash in [False, True]:
+            t = fs_join(target, "newdir")
+            if target_slash:
+                t += "/"
+            # Without recursive
+            fs.put(local_join(source, "subdir", "*"), t)
+            assert fs.isdir(fs_join(target, "newdir"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+            assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+            assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+            assert not fs.exists(fs_join(target, "subdir"))
+            assert not fs.exists(fs_join(target, "newdir", "subdir"))
+            fs.rm(fs_join(target, "newdir"), recursive=True)
+            assert not fs.exists(fs_join(target, "newdir"))
+            # With recursive
+            for glob, recursive in zip(["*", "**"], [True, False]):
+                fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
+                assert fs.isdir(fs_join(target, "newdir"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+                assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
+                assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                assert not fs.exists(fs_join(target, "newdir", "subdir"))
+                fs.rm(fs_join(target, "newdir"), recursive=True)
+                assert not fs.exists(fs_join(target, "newdir"))
+                # Limit recursive by maxdepth
+                fs.put(
+                    local_join(source, "subdir", glob),
+                    t,
+                    recursive=recursive,
+                    maxdepth=1,
+                )
+                assert fs.isdir(fs_join(target, "newdir"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+                assert fs.isfile(fs_join(target, "newdir", "subfile2"))
+                assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
+                assert not fs.exists(fs_join(target, "subdir"))
+                assert not fs.exists(fs_join(target, "newdir", "subdir"))
+                fs.rm(fs_join(target, "newdir"), recursive=True)
+                assert not fs.exists(fs_join(target, "newdir"))
+    @pytest.mark.parametrize(
+        GLOB_EDGE_CASES_TESTS["argnames"],
+        GLOB_EDGE_CASES_TESTS["argvalues"],
+    )
+    def test_put_glob_edge_cases(
+        self,
+        path,
+        recursive,
+        maxdepth,
+        expected,
+        fs,
+        fs_join,
+        fs_target,
+        local_glob_edge_cases_files,
+        local_join,
+        fs_sanitize_path,
+    ):
+        # Copy scenario 1g
+        source = local_glob_edge_cases_files
+        target = fs_target
+        for new_dir, target_slash in product([True, False], [True, False]):
+            fs.mkdir(target)
+            t = fs_join(target, "newdir") if new_dir else target
+            t = t + "/" if target_slash else t
+            fs.put(local_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
+            output = fs.find(target)
+            if new_dir:
+                prefixed_expected = [
+                    fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
+                ]
+            else:
+                prefixed_expected = [
+                    fs_sanitize_path(fs_join(target, p)) for p in expected
+                ]
+            assert sorted(output) == sorted(prefixed_expected)
+            try:
+                fs.rm(target, recursive=True)
+            except FileNotFoundError:
+                pass
+    def test_put_list_of_files_to_existing_directory(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        local_join,
+        local_bulk_operations_scenario_0,
+        supports_empty_directories,
+    ):
+        # Copy scenario 2a
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        if not supports_empty_directories:
+            # Force target directory to exist by adding a dummy file
+            dummy = fs_join(target, "dummy")
+            fs.touch(dummy)
+        assert fs.isdir(target)
+        source_files = [
+            local_join(source, "file1"),
+            local_join(source, "file2"),
+            local_join(source, "subdir", "subfile1"),
+        ]
+        for target_slash in [False, True]:
+            t = target + "/" if target_slash else target
+            fs.put(source_files, t)
+            assert fs.isfile(fs_join(target, "file1"))
+            assert fs.isfile(fs_join(target, "file2"))
+            assert fs.isfile(fs_join(target, "subfile1"))
+            fs.rm(
+                [
+                    fs_join(target, "file1"),
+                    fs_join(target, "file2"),
+                    fs_join(target, "subfile1"),
+                ],
+                recursive=True,
+            )
+            assert fs.ls(target, detail=False) == (
+                [] if supports_empty_directories else [dummy]
+            )
+    def test_put_list_of_files_to_new_directory(
+        self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
+    ):
+        # Copy scenario 2b
+        source = local_bulk_operations_scenario_0
+        target = fs_target
+        fs.mkdir(target)
+        source_files = [
+            local_join(source, "file1"),
+            local_join(source, "file2"),
+            local_join(source, "subdir", "subfile1"),
+        ]
+        fs.put(source_files, fs_join(target, "newdir") + "/")  # Note trailing slash
+        assert fs.isdir(fs_join(target, "newdir"))
+        assert fs.isfile(fs_join(target, "newdir", "file1"))
+        assert fs.isfile(fs_join(target, "newdir", "file2"))
+        assert fs.isfile(fs_join(target, "newdir", "subfile1"))
+    def test_put_directory_recursive(
+        self, fs, fs_join, fs_target, local_fs, local_join, local_path
+    ):
+        # https://github.com/fsspec/filesystem_spec/issues/1062
+        # Recursive cp/get/put of source directory into non-existent target directory.
+        src = local_join(local_path, "src")
+        src_file = local_join(src, "file")
+        local_fs.mkdir(src)
+        local_fs.touch(src_file)
+        target = fs_target
+        # put without slash
+        assert not fs.exists(target)
+        for loop in range(2):
+            fs.put(src, target, recursive=True)
+            assert fs.isdir(target)
+            if loop == 0:
+                assert fs.isfile(fs_join(target, "file"))
+                assert not fs.exists(fs_join(target, "src"))
+            else:
+                assert fs.isfile(fs_join(target, "file"))
+                assert fs.isdir(fs_join(target, "src"))
+                assert fs.isfile(fs_join(target, "src", "file"))
+        fs.rm(target, recursive=True)
+        # put with slash
+        assert not fs.exists(target)
+        for loop in range(2):
+            fs.put(src + "/", target, recursive=True)
+            assert fs.isdir(target)
+            assert fs.isfile(fs_join(target, "file"))
+            assert not fs.exists(fs_join(target, "src"))
+    def test_put_directory_without_files_with_same_name_prefix(
+        self,
+        fs,
+        fs_join,
+        fs_target,
+        local_join,
+        local_dir_and_file_with_same_name_prefix,
+        supports_empty_directories,
+    ):
+        # Create the test dirs
+        source = local_dir_and_file_with_same_name_prefix
+        target = fs_target
+        # Test without glob
+        fs.put(local_join(source, "subdir"), fs_target, recursive=True)
+        assert fs.isfile(fs_join(fs_target, "subfile.txt"))
+        assert not fs.isfile(fs_join(fs_target, "subdir.txt"))
+        fs.rm([fs_join(target, "subfile.txt")])
+        if supports_empty_directories:
+            assert fs.ls(target) == []
+        else:
+            assert not fs.exists(target)
+        # Test with glob
+        fs.put(local_join(source, "subdir*"), fs_target, recursive=True)
+        assert fs.isdir(fs_join(fs_target, "subdir"))
+        assert fs.isfile(fs_join(fs_target, "subdir", "subfile.txt"))
+        assert fs.isfile(fs_join(fs_target, "subdir.txt"))
+    def test_copy_with_source_and_destination_as_list(
+        self, fs, fs_target, fs_join, local_join, local_10_files_with_hashed_names
+    ):
+        # Create the test dir
+        source = local_10_files_with_hashed_names
+        target = fs_target
+        # Create list of files for source and destination
+        source_files = []
+        destination_files = []
+        for i in range(10):
+            hashed_i = md5(str(i).encode("utf-8")).hexdigest()
+            source_files.append(local_join(source, f"{hashed_i}.txt"))
+            destination_files.append(fs_join(target, f"{hashed_i}.txt"))
+        # Copy and assert order was kept
+        fs.put(lpath=source_files, rpath=destination_files)
+        for i in range(10):
+            file_content = fs.cat(destination_files[i]).decode("utf-8")
+            assert file_content == str(i)

.venv/Lib/site-packages/fugashi-1.4.0.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ uv

.venv/Lib/site-packages/fugashi-1.4.0.dist-info/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2019 Paul O'Leary McCann
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

.venv/Lib/site-packages/fugashi-1.4.0.dist-info/LICENSE.mecab ADDED Viewed

	@@ -0,0 +1,29 @@

+Copyright (c) 2001-2008, Taku Kudo
+Copyright (c) 2004-2008, Nippon Telegraph and Telephone Corporation
+All rights reserved.
+Redistribution and use in source and binary forms, with or without modification, are
+permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above
+   copyright notice, this list of conditions and the
+   following disclaimer.
+ * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the
+   following disclaimer in the documentation and/or other
+   materials provided with the distribution.
+ * Neither the name of the Nippon Telegraph and Telegraph Corporation
+   nor the names of its contributors may be used to endorse or
+   promote products derived from this software without specific
+   prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.venv/Lib/site-packages/fugashi-1.4.0.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,157 @@

+Metadata-Version: 2.1
+Name: fugashi
+Version: 1.4.0
+Summary: A Cython MeCab wrapper for fast, pythonic Japanese tokenization.
+Home-page: https://github.com/polm/fugashi
+Author: Paul O'Leary McCann
+Author-email: [email protected]
+License: MIT
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Natural Language :: Japanese
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+License-File: LICENSE.mecab
+Provides-Extra: unidic
+Requires-Dist: unidic; extra == "unidic"
+Provides-Extra: unidic-lite
+Requires-Dist: unidic-lite; extra == "unidic-lite"
+[![Open in Streamlit](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://fugashi.streamlit.app)
+[![Current PyPI packages](https://badge.fury.io/py/fugashi.svg)](https://pypi.org/project/fugashi/)
+![Test Status](https://github.com/polm/fugashi/workflows/test-manylinux/badge.svg)
+[![PyPI - Downloads](https://img.shields.io/pypi/dm/fugashi)](https://pypi.org/project/fugashi/)
+![Supported Platforms](https://img.shields.io/badge/platforms-linux%20macosx%20windows-blue)
+# fugashi
+<img src="https://github.com/polm/fugashi/raw/master/fugashi.png" width=125 height=125 alt="fugashi by Irasutoya" />
+fugashi is a Cython wrapper for [MeCab](https://taku910.github.io/mecab/), a
+Japanese tokenizer and morphological analysis tool.  Wheels are provided for
+Linux, OSX (Intel), and Win64, and UniDic is [easy to install](#installing-a-dictionary).
+**issueを英語で書く必要はありません。**
+Check out the [interactive demo][], see the [blog post](https://www.dampfkraft.com/nlp/fugashi.html) for background
+on why fugashi exists and some of the design decisions, or see [this
+guide][guide] for a basic introduction to Japanese tokenization.
+[guide]: https://www.dampfkraft.com/nlp/how-to-tokenize-japanese.html
+[interactive demo]: https://fugashi.streamlit.app
+If you are on a platform for which wheels are not provided, you'll need to
+install MeCab first. It's recommended you install [from
+source](https://github.com/taku910/mecab). If you need to build from source on
+Windows, [@chezou's fork](https://github.com/chezou/mecab) is recommended; see
+[issue #44](https://github.com/polm/fugashi/issues/44#issuecomment-954426115)
+for an explanation of the problems with the official repo.
+Known platforms without wheels:
+- musl-based distros like alpine [#77](https://github.com/polm/fugashi/issues/77)
+- PowerPC
+- Windows 32bit
+## Usage
+```python
+from fugashi import Tagger
+tagger = Tagger('-Owakati')
+text = "麩菓子は、麩を主材料とした日本の菓子。"
+tagger.parse(text)
+# => '麩 菓子 は 、 麩 を 主材 料 と し た 日本 の 菓子 。'
+for word in tagger(text):
+    print(word, word.feature.lemma, word.pos, sep='\t')
+    # "feature" is the Unidic feature data as a named tuple
+```
+## Installing a Dictionary
+fugashi requires a dictionary. [UniDic](https://unidic.ninjal.ac.jp/) is
+recommended, and two easy-to-install versions are provided.
+  - [unidic-lite](https://github.com/polm/unidic-lite), a slightly modified version 2.1.2 of Unidic (from 2013) that's relatively small
+  - [unidic](https://github.com/polm/unidic-py), the latest UniDic 3.1.0, which is 770MB on disk and requires a separate download step
+If you just want to make sure things work you can start with `unidic-lite`, but
+for more serious processing `unidic` is recommended. For production use you'll
+generally want to generate your own dictionary too; for details see the [MeCab
+documentation](https://taku910.github.io/mecab/learn.html).
+To get either of these dictionaries, you can install them directly using `pip`
+or do the below:
+```sh
+pip install 'fugashi[unidic-lite]'
+# The full version of UniDic requires a separate download step
+pip install 'fugashi[unidic]'
+python -m unidic download
+```
+For more information on the different MeCab dictionaries available, see [this article](https://www.dampfkraft.com/nlp/japanese-tokenizer-dictionaries.html).
+## Dictionary Use
+fugashi is written with the assumption you'll use Unidic to process Japanese,
+but it supports arbitrary dictionaries.
+If you're using a dictionary besides Unidic you can use the GenericTagger like this:
+```python
+from fugashi import GenericTagger
+tagger = GenericTagger()
+# parse can be used as normal
+tagger.parse('something')
+# features from the dictionary can be accessed by field numbers
+for word in tagger(text):
+    print(word.surface, word.feature[0])
+```
+You can also create a dictionary wrapper to get feature information as a named tuple.
+```python
+from fugashi import GenericTagger, create_feature_wrapper
+CustomFeatures = create_feature_wrapper('CustomFeatures', 'alpha beta gamma')
+tagger = GenericTagger(wrapper=CustomFeatures)
+for word in tagger.parseToNodeList(text):
+    print(word.surface, word.feature.alpha)
+```
+## Citation
+If you use fugashi in research, it would be appreciated if you cite this paper. You can read it at [the ACL Anthology](https://www.aclweb.org/anthology/2020.nlposs-1.7/) or [on Arxiv](https://arxiv.org/abs/2010.06858).
+    @inproceedings{mccann-2020-fugashi,
+        title = "fugashi, a Tool for Tokenizing {J}apanese in Python",
+        author = "McCann, Paul",
+        booktitle = "Proceedings of Second Workshop for NLP Open Source Software (NLP-OSS)",
+        month = nov,
+        year = "2020",
+        address = "Online",
+        publisher = "Association for Computational Linguistics",
+        url = "https://www.aclweb.org/anthology/2020.nlposs-1.7",
+        pages = "44--51",
+        abstract = "Recent years have seen an increase in the number of large-scale multilingual NLP projects. However, even in such projects, languages with special processing requirements are often excluded. One such language is Japanese. Japanese is written without spaces, tokenization is non-trivial, and while high quality open source tokenizers exist they can be hard to use and lack English documentation. This paper introduces fugashi, a MeCab wrapper for Python, and gives an introduction to tokenizing Japanese.",
+    }
+## Alternatives
+If you have a problem with fugashi feel free to open an issue. However, there
+are some cases where it might be better to use a different library.
+- If you don't want to deal with installing MeCab at all, try [SudachiPy](https://github.com/WorksApplications/sudachi.rs).
+- If you need to work with Korean, try [pymecab-ko](https://github.com/NoUnique/pymecab-ko) or [KoNLPy](https://konlpy.org/en/latest/).
+## License and Copyright Notice
+fugashi is released under the terms of the [MIT license](./LICENSE). Please
+copy it far and wide.
+fugashi is a wrapper for MeCab, and fugashi wheels include MeCab binaries.
+MeCab is copyrighted free software by Taku Kudo `<[email protected]>` and Nippon
+Telegraph and Telephone Corporation, and is redistributed under the [BSD
+License](./LICENSE.mecab).

.venv/Lib/site-packages/fugashi-1.4.0.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,16 @@

+..\..\Scripts\fugashi-build-dict.exe,sha256=LZb2-amxDd5IoFQbVAzDDziSB4U-xnLpaTEW1UJqCRU,41432
+..\..\Scripts\fugashi-info.exe,sha256=gy05XrnUBSd9AORdNnNmHG6DkudXGw03U4uul0n6NTo,41420
+..\..\Scripts\fugashi.exe,sha256=dgp3IR-hWxA25GVyVKrbnMck3KTU7emSHsucry_rwZY,41420
+..\..\lib\site-packages\fugashi\libmecab.dll,sha256=2N3AeRQ3zoxKGHrnxpovaDzkI2g7el7P2hxM70NsHKs,1910784
+fugashi-1.4.0.dist-info/LICENSE,sha256=2vfu3p70KKWeqFRofnatHm5flYb_aZjXy2GJqHiQRvk,1097
+fugashi-1.4.0.dist-info/LICENSE.mecab,sha256=Pb-TvC2ag2gCYgej6C7fwu67r-83z1cBIU9C_dP4pxk,1631
+fugashi-1.4.0.dist-info/METADATA,sha256=lPJ1OXNya8_ikeo7cUopng_cDpk8Np9LOdULri2-X1g,7059
+fugashi-1.4.0.dist-info/RECORD,,
+fugashi-1.4.0.dist-info/WHEEL,sha256=zq3MnTB53_Huh0eFGROKhLNn5cmUbG6gUFCG6-LWXTY,99
+fugashi-1.4.0.dist-info/entry_points.txt,sha256=jV282mMQTVkhqOVFTdm_ZQ03pJndByW2JtrSa_a2Wms,121
+fugashi-1.4.0.dist-info/top_level.txt,sha256=1CQTgPUFi4hjTQg2nHdIR-oH6EfyXtpLhiUglCmuOoM,8
+fugashi-1.4.0.dist-info\INSTALLER,sha256=5hhM4Q4mYTT9z6QB6PGpUAW81PGNFrYrdXMj4oM_6ak,2
+fugashi-1.4.0.dist-info\REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+fugashi/__init__.py,sha256=KW98SIOE-cAtzst_n-YNtEhYznwWLTX6tm_5XJJlOPA,26
+fugashi/cli.py,sha256=wwLj3Nkl1Dtx1SjDeAAaYB3KWsRp5PALqmhdvKN4ZAk,1553
+fugashi/fugashi.cp39-win_amd64.pyd,sha256=XRyL_8gC8WWR6OLV-mdqnFtQHJ387AqiLnT6aiQzVag,112640

.venv/Lib/site-packages/fugashi-1.4.0.dist-info/REQUESTED ADDED Viewed

File without changes

.venv/Lib/site-packages/fugashi-1.4.0.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,5 @@

+Wheel-Version: 1.0
+Generator: setuptools (75.3.0)
+Root-Is-Purelib: false
+Tag: cp39-cp39-win_amd64

.venv/Lib/site-packages/fugashi-1.4.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+[console_scripts]
+fugashi = fugashi.cli:main
+fugashi-build-dict = fugashi.cli:build_dict
+fugashi-info = fugashi.cli:info

.venv/Lib/site-packages/fugashi-1.4.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ fugashi

.venv/Lib/site-packages/fugashi/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .fugashi import *
2	+

.venv/Lib/site-packages/fugashi/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (203 Bytes). View file

.venv/Lib/site-packages/fugashi/cli.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from fugashi import GenericTagger, Tagger, build_dictionary
+import sys
+import fileinput
+def main():
+    """
+    This is a simple wrapper for fugashi so you can test it from the command line.
+    Like the mecab binary, it treats each line of stdin as one sentence. You can
+    pass tagger arguments here too.
+    """
+    args = ' '.join(sys.argv[1:])
+    # This should work if you specify a different dictionary,
+    # but it should also work with the pip unidic.
+    # Try the GenericTagger and then try the Unidic tagger.
+    try:
+        tagger = GenericTagger(args, quiet=True)
+    except RuntimeError:
+        tagger = Tagger(args)
+    for line in fileinput.input([]):
+        print(tagger.parse(line.strip()))
+def info():
+    """Print configuration info."""
+    args = ' '.join(sys.argv[1:])
+    try:
+        tagger = GenericTagger(args, quiet=True)
+    except RuntimeError:
+        tagger = Tagger(args)
+    #TODO get the fugashi version here too
+    print("Fugashi dictionary info:")
+    print("-----")
+    for di in tagger.dictionary_info:
+        for field in 'version size charset filename'.split():
+            print( (field + ':').ljust(10), di[field])
+        print('-----')
+def build_dict():
+    """EXPERIMENTAL A wrapper for MeCab's user dictionary building command.
+    This also defaults to utf8.
+    """
+    # TODO simplify using pip-installed dictionaries as base
+    args = sys.argv[0] + " -f utf8 -t utf8 " + ' '.join(sys.argv[1:])
+    print(args)
+    build_dictionary(args)

.venv/Lib/site-packages/fugashi/fugashi.cp39-win_amd64.pyd ADDED Viewed

Binary file (113 kB). View file

.venv/Lib/site-packages/functorch/_C.cp39-win_amd64.pyd ADDED Viewed

Binary file (322 kB). View file

.venv/Lib/site-packages/functorch/__init__.py ADDED Viewed

	@@ -0,0 +1,39 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+from torch._functorch.deprecated import (
+    combine_state_for_ensemble,
+    functionalize,
+    grad,
+    grad_and_value,
+    hessian,
+    jacfwd,
+    jacrev,
+    jvp,
+    make_functional,
+    make_functional_with_buffers,
+    vjp,
+    vmap,
+)
+# utilities. Maybe these should go in their own namespace in the future?
+from torch._functorch.make_functional import (
+    FunctionalModule,
+    FunctionalModuleWithBuffers,
+)
+# Was never documented
+from torch._functorch.python_key import make_fx
+# Top-level APIs. Please think carefully before adding something to the
+# top-level namespace:
+# - private helper functions should go into torch._functorch
+# - very experimental things should go into functorch.experimental
+# - compilation related things should go into functorch.compile
+__version__ = torch.__version__

.venv/Lib/site-packages/functorch/_src/make_functional/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+# This file has moved to under torch/_functorch. It is not public API.
+# If you are not a PyTorch developer and you are relying on the following
+# imports, please file an issue.
+from torch._functorch.make_functional import _swap_state

.venv/Lib/site-packages/functorch/_src/vmap/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# This file has moved to under torch/_functorch. It is not public API.
+# If you are not a PyTorch developer and you are relying on the following
+# imports, please file an issue.
+from torch._functorch.vmap import (
+    _add_batch_dim,
+    _broadcast_to_and_flatten,
+    _create_batched_inputs,
+    _get_name,
+    _process_batched_inputs,
+    _remove_batch_dim,
+    _unwrap_batched,
+    _validate_and_get_batch_size,
+    Tensor,
+    tree_flatten,
+    tree_unflatten,
+)

.venv/Lib/site-packages/functorch/compile/__init__.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from torch._functorch import config
+from torch._functorch.aot_autograd import (
+    aot_function,
+    aot_module,
+    aot_module_simplified,
+    compiled_function,
+    compiled_module,
+    get_aot_compilation_context,
+    get_aot_graph_name,
+    get_graph_being_compiled,
+    make_boxed_compiler,
+    make_boxed_func,
+)
+from torch._functorch.compilers import (
+    debug_compile,
+    default_decompositions,
+    draw_graph_compile,
+    memory_efficient_fusion,
+    nnc_jit,
+    nop,
+    print_compile,
+    ts_compile,
+)
+from torch._functorch.fx_minifier import minifier
+from torch._functorch.partitioners import (
+    default_partition,
+    draw_graph,
+    min_cut_rematerialization_partition,
+)
+from torch._functorch.python_key import pythonkey_decompose

.venv/Lib/site-packages/functorch/dim/batch_tensor.py ADDED Viewed

	@@ -0,0 +1,26 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from contextlib import contextmanager
+from torch._C._functorch import _vmap_add_layers, _vmap_remove_layers
+_enabled = False
+@contextmanager
+def _enable_layers(dims):
+    global _enabled
+    assert not _enabled
+    input = sorted((d._level, d.size) for d in dims if not isinstance(d, int))
+    n = len(input)
+    try:
+        _vmap_add_layers(input)
+        _enabled = True
+        yield
+    finally:
+        _enabled = False
+        _vmap_remove_layers(n)

.venv/Lib/site-packages/functorch/dim/delayed_mul_tensor.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+from . import _Tensor, Tensor
+from .reference import _dims, _enable_layers, llist, ltuple
+class DelayedMulTensor(_Tensor):
+    def __init__(self, lhs, rhs):
+        self._lhs, self._rhs = lhs, rhs
+        self._data = None
+        self._levels_data = None
+        self._has_device = lhs._has_device or rhs._has_device
+        self._batchtensor_data = None
+        self._tensor_data = None
+    @property
+    def _levels(self):
+        if self._levels_data is None:
+            levels = llist(self._lhs._levels)
+            for l in self._rhs._levels:
+                if l not in levels:
+                    levels.append(l)
+            self._levels_data = ltuple(levels)
+        return self._levels_data
+    @property
+    def _batchtensor(self):
+        if self._batchtensor_data is None:
+            with _enable_layers(self._levels):
+                print("bt multiply fallback")
+                self._batchtensor_data = self._lhs._batchtensor * self._rhs._batchtensor
+        return self._batchtensor_data
+    @property
+    def _tensor(self):
+        if self._tensor_data is None:
+            self._tensor_data = Tensor.from_batched(
+                self._batchtensor, self._has_device
+            )._tensor
+        return self._tensor_data
+    @property
+    def ndim(self):
+        return self._batchtensor.ndim
+    @property
+    def dims(self):
+        return ltuple(super().dims)
+    def sum(self, dim):
+        dims = _dims(dim, 0, False, False)
+        n = ord("a")
+        all_levels = self._levels
+        def to_char(d):
+            return chr(n + all_levels.index(d))
+        plhs, levelslhs = self._lhs._tensor, self._lhs._levels
+        prhs, levelsrhs = self._rhs._tensor, self._rhs._levels
+        new_dims = tuple(d for d in self.dims if d not in dims)
+        new_levels = [l for l in self._levels if l not in dims]
+        fmt = "".join(
+            [
+                *(to_char(d) for d in levelslhs),
+                ",",
+                *(to_char(d) for d in levelsrhs),
+                "->",
+                *(to_char(d) for d in new_levels),
+            ]
+        )
+        result_data = torch.einsum(fmt, (plhs, prhs))
+        return Tensor.from_positional(result_data, new_levels, True)

.venv/Lib/site-packages/functorch/dim/dim.py ADDED Viewed

	@@ -0,0 +1,121 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import dis
+import inspect
+from dataclasses import dataclass
+from typing import Union
+from . import DimList
+_vmap_levels = []
+@dataclass
+class LevelInfo:
+    level: int
+    alive: bool = True
+class Dim:
+    def __init__(self, name: str, size: Union[None, int] = None):
+        self.name = name
+        self._size = None
+        self._vmap_level = None
+        if size is not None:
+            self.size = size
+    def __del__(self):
+        if self._vmap_level is not None:
+            _vmap_active_levels[self._vmap_stack].alive = False  # noqa: F821
+            while (
+                not _vmap_levels[-1].alive
+                and current_level() == _vmap_levels[-1].level  # noqa: F821
+            ):
+                _vmap_decrement_nesting()  # noqa: F821
+                _vmap_levels.pop()
+    @property
+    def size(self):
+        assert self.is_bound
+        return self._size
+    @size.setter
+    def size(self, size: int):
+        from . import DimensionBindError
+        if self._size is None:
+            self._size = size
+            self._vmap_level = _vmap_increment_nesting(size, "same")  # noqa: F821
+            self._vmap_stack = len(_vmap_levels)
+            _vmap_levels.append(LevelInfo(self._vmap_level))
+        elif self._size != size:
+            raise DimensionBindError(
+                f"Dim '{self}' previously bound to a dimension of size {self._size} cannot bind to a dimension of size {size}"
+            )
+    @property
+    def is_bound(self):
+        return self._size is not None
+    def __repr__(self):
+        return self.name
+def extract_name(inst):
+    assert inst.opname == "STORE_FAST" or inst.opname == "STORE_NAME"
+    return inst.argval
+_cache = {}
+def dims(lists=0):
+    frame = inspect.currentframe()
+    assert frame is not None
+    calling_frame = frame.f_back
+    assert calling_frame is not None
+    code, lasti = calling_frame.f_code, calling_frame.f_lasti
+    key = (code, lasti)
+    if key not in _cache:
+        first = lasti // 2 + 1
+        instructions = list(dis.get_instructions(calling_frame.f_code))
+        unpack = instructions[first]
+        if unpack.opname == "STORE_FAST" or unpack.opname == "STORE_NAME":
+            # just a single dim, not a list
+            name = unpack.argval
+            ctor = Dim if lists == 0 else DimList
+            _cache[key] = lambda: ctor(name=name)
+        else:
+            assert unpack.opname == "UNPACK_SEQUENCE"
+            ndims = unpack.argval
+            names = tuple(
+                extract_name(instructions[first + 1 + i]) for i in range(ndims)
+            )
+            first_list = len(names) - lists
+            _cache[key] = lambda: tuple(
+                Dim(n) if i < first_list else DimList(name=n)
+                for i, n in enumerate(names)
+            )
+    return _cache[key]()
+def _dim_set(positional, arg):
+    def convert(a):
+        if isinstance(a, Dim):
+            return a
+        else:
+            assert isinstance(a, int)
+            return positional[a]
+    if arg is None:
+        return positional
+    elif not isinstance(arg, (Dim, int)):
+        return tuple(convert(a) for a in arg)
+    else:
+        return (convert(arg),)

.venv/Lib/site-packages/functorch/dim/magic_trace.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import os
+import signal
+import subprocess
+from contextlib import contextmanager
+@contextmanager
+def magic_trace(output="trace.fxt", magic_trace_cache="/tmp/magic-trace"):
+    pid = os.getpid()
+    if not os.path.exists(magic_trace_cache):
+        print(f"Downloading magic_trace to: {magic_trace_cache}")
+        subprocess.run(
+            [
+                "wget",
+                "-O",
+                magic_trace_cache,
+                "-q",
+                "https://github.com/janestreet/magic-trace/releases/download/v1.0.2/magic-trace",
+            ]
+        )
+        subprocess.run(["chmod", "+x", magic_trace_cache])
+    args = [magic_trace_cache, "attach", "-pid", str(pid), "-o", output]
+    p = subprocess.Popen(args, stderr=subprocess.PIPE, encoding="utf-8")
+    while True:
+        x = p.stderr.readline()
+        print(x)
+        if "Attached" in x:
+            break
+    try:
+        yield
+    finally:
+        p.send_signal(signal.SIGINT)
+        r = p.wait()
+        print(p.stderr.read())
+        p.stderr.close()
+        if r != 0:
+            raise ValueError(f"magic_trace exited abnormally: {r}")

.venv/Lib/site-packages/functorch/dim/op_properties.py ADDED Viewed

	@@ -0,0 +1,312 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import torch
+# pointwise operators can go through a faster pathway
+tensor_magic_methods = ["add", ""]
+pointwise_magic_methods_with_reverse = (
+    "add",
+    "sub",
+    "mul",
+    "floordiv",
+    "div",
+    "truediv",
+    "mod",
+    "pow",
+    "lshift",
+    "rshift",
+    "and",
+    "or",
+    "xor",
+)
+pointwise_magic_methods = (
+    *(x for m in pointwise_magic_methods_with_reverse for x in (m, "r" + m)),
+    "eq",
+    "gt",
+    "le",
+    "lt",
+    "ge",
+    "gt",
+    "ne",
+    "neg",
+    "pos",
+    "abs",
+    "invert",
+    "iadd",
+    "isub",
+    "imul",
+    "ifloordiv",
+    "idiv",
+    "itruediv",
+    "imod",
+    "ipow",
+    "ilshift",
+    "irshift",
+    "iand",
+    "ior",
+    "ixor",
+    "int",
+    "long",
+    "float",
+    "complex",
+)
+pointwise_methods = (*(f"__{m}__" for m in pointwise_magic_methods),)
+pointwise = (
+    *(getattr(torch.Tensor, m) for m in pointwise_methods),
+    torch.nn.functional.dropout,
+    torch.where,
+    torch.Tensor.abs,
+    torch.abs,
+    torch.Tensor.acos,
+    torch.acos,
+    torch.Tensor.acosh,
+    torch.acosh,
+    torch.Tensor.add,
+    torch.add,
+    torch.Tensor.addcdiv,
+    torch.addcdiv,
+    torch.Tensor.addcmul,
+    torch.addcmul,
+    torch.Tensor.addr,
+    torch.addr,
+    torch.Tensor.angle,
+    torch.angle,
+    torch.Tensor.asin,
+    torch.asin,
+    torch.Tensor.asinh,
+    torch.asinh,
+    torch.Tensor.atan,
+    torch.atan,
+    torch.Tensor.atan2,
+    torch.atan2,
+    torch.Tensor.atanh,
+    torch.atanh,
+    torch.Tensor.bitwise_and,
+    torch.bitwise_and,
+    torch.Tensor.bitwise_left_shift,
+    torch.bitwise_left_shift,
+    torch.Tensor.bitwise_not,
+    torch.bitwise_not,
+    torch.Tensor.bitwise_or,
+    torch.bitwise_or,
+    torch.Tensor.bitwise_right_shift,
+    torch.bitwise_right_shift,
+    torch.Tensor.bitwise_xor,
+    torch.bitwise_xor,
+    torch.Tensor.ceil,
+    torch.ceil,
+    torch.celu,
+    torch.nn.functional.celu,
+    torch.Tensor.clamp,
+    torch.clamp,
+    torch.Tensor.clamp_max,
+    torch.clamp_max,
+    torch.Tensor.clamp_min,
+    torch.clamp_min,
+    torch.Tensor.copysign,
+    torch.copysign,
+    torch.Tensor.cos,
+    torch.cos,
+    torch.Tensor.cosh,
+    torch.cosh,
+    torch.Tensor.deg2rad,
+    torch.deg2rad,
+    torch.Tensor.digamma,
+    torch.digamma,
+    torch.Tensor.div,
+    torch.div,
+    torch.dropout,
+    torch.nn.functional.dropout,
+    torch.nn.functional.elu,
+    torch.Tensor.eq,
+    torch.eq,
+    torch.Tensor.erf,
+    torch.erf,
+    torch.Tensor.erfc,
+    torch.erfc,
+    torch.Tensor.erfinv,
+    torch.erfinv,
+    torch.Tensor.exp,
+    torch.exp,
+    torch.Tensor.exp2,
+    torch.exp2,
+    torch.Tensor.expm1,
+    torch.expm1,
+    torch.feature_dropout,
+    torch.Tensor.float_power,
+    torch.float_power,
+    torch.Tensor.floor,
+    torch.floor,
+    torch.Tensor.floor_divide,
+    torch.floor_divide,
+    torch.Tensor.fmod,
+    torch.fmod,
+    torch.Tensor.frac,
+    torch.frac,
+    torch.Tensor.frexp,
+    torch.frexp,
+    torch.Tensor.gcd,
+    torch.gcd,
+    torch.Tensor.ge,
+    torch.ge,
+    torch.nn.functional.gelu,
+    torch.nn.functional.glu,
+    torch.Tensor.gt,
+    torch.gt,
+    torch.Tensor.hardshrink,
+    torch.hardshrink,
+    torch.nn.functional.hardshrink,
+    torch.nn.functional.hardsigmoid,
+    torch.nn.functional.hardswish,
+    torch.nn.functional.hardtanh,
+    torch.Tensor.heaviside,
+    torch.heaviside,
+    torch.Tensor.hypot,
+    torch.hypot,
+    torch.Tensor.i0,
+    torch.i0,
+    torch.Tensor.igamma,
+    torch.igamma,
+    torch.Tensor.igammac,
+    torch.igammac,
+    torch.Tensor.isclose,
+    torch.isclose,
+    torch.Tensor.isfinite,
+    torch.isfinite,
+    torch.Tensor.isinf,
+    torch.isinf,
+    torch.Tensor.isnan,
+    torch.isnan,
+    torch.Tensor.isneginf,
+    torch.isneginf,
+    torch.Tensor.isposinf,
+    torch.isposinf,
+    torch.Tensor.isreal,
+    torch.isreal,
+    torch.Tensor.kron,
+    torch.kron,
+    torch.Tensor.lcm,
+    torch.lcm,
+    torch.Tensor.ldexp,
+    torch.ldexp,
+    torch.Tensor.le,
+    torch.le,
+    torch.nn.functional.leaky_relu,
+    torch.Tensor.lerp,
+    torch.lerp,
+    torch.Tensor.lgamma,
+    torch.lgamma,
+    torch.Tensor.log,
+    torch.log,
+    torch.Tensor.log10,
+    torch.log10,
+    torch.Tensor.log1p,
+    torch.log1p,
+    torch.Tensor.log2,
+    torch.log2,
+    torch.nn.functional.logsigmoid,
+    torch.Tensor.logical_and,
+    torch.logical_and,
+    torch.Tensor.logical_not,
+    torch.logical_not,
+    torch.Tensor.logical_or,
+    torch.logical_or,
+    torch.Tensor.logical_xor,
+    torch.logical_xor,
+    torch.Tensor.logit,
+    torch.logit,
+    torch.Tensor.lt,
+    torch.lt,
+    torch.Tensor.maximum,
+    torch.maximum,
+    torch.Tensor.minimum,
+    torch.minimum,
+    torch.nn.functional.mish,
+    torch.Tensor.mvlgamma,
+    torch.mvlgamma,
+    torch.Tensor.nan_to_num,
+    torch.nan_to_num,
+    torch.Tensor.ne,
+    torch.ne,
+    torch.Tensor.neg,
+    torch.neg,
+    torch.Tensor.nextafter,
+    torch.nextafter,
+    torch.Tensor.outer,
+    torch.outer,
+    torch.polar,
+    torch.Tensor.polygamma,
+    torch.polygamma,
+    torch.Tensor.positive,
+    torch.positive,
+    torch.Tensor.pow,
+    torch.pow,
+    torch.Tensor.prelu,
+    torch.prelu,
+    torch.nn.functional.prelu,
+    torch.Tensor.rad2deg,
+    torch.rad2deg,
+    torch.Tensor.reciprocal,
+    torch.reciprocal,
+    torch.Tensor.relu,
+    torch.relu,
+    torch.nn.functional.relu,
+    torch.nn.functional.relu6,
+    torch.Tensor.remainder,
+    torch.remainder,
+    torch.Tensor.round,
+    torch.round,
+    torch.rrelu,
+    torch.nn.functional.rrelu,
+    torch.Tensor.rsqrt,
+    torch.rsqrt,
+    torch.rsub,
+    torch.selu,
+    torch.nn.functional.selu,
+    torch.Tensor.sgn,
+    torch.sgn,
+    torch.Tensor.sigmoid,
+    torch.sigmoid,
+    torch.nn.functional.sigmoid,
+    torch.Tensor.sign,
+    torch.sign,
+    torch.Tensor.signbit,
+    torch.signbit,
+    torch.nn.functional.silu,
+    torch.Tensor.sin,
+    torch.sin,
+    torch.Tensor.sinc,
+    torch.sinc,
+    torch.Tensor.sinh,
+    torch.sinh,
+    torch.nn.functional.softplus,
+    torch.nn.functional.softshrink,
+    torch.Tensor.sqrt,
+    torch.sqrt,
+    torch.Tensor.square,
+    torch.square,
+    torch.Tensor.sub,
+    torch.sub,
+    torch.Tensor.tan,
+    torch.tan,
+    torch.Tensor.tanh,
+    torch.tanh,
+    torch.nn.functional.tanh,
+    torch.threshold,
+    torch.nn.functional.threshold,
+    torch.trapz,
+    torch.Tensor.true_divide,
+    torch.true_divide,
+    torch.Tensor.trunc,
+    torch.trunc,
+    torch.Tensor.xlogy,
+    torch.xlogy,
+    torch.rand_like,
+)

.venv/Lib/site-packages/functorch/dim/reference.py ADDED Viewed

	@@ -0,0 +1,645 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+# reference python implementations for C ops
+import torch
+from functorch._C import dim as _C
+from . import op_properties
+from .batch_tensor import _enable_layers
+from .tree_map import tree_flatten, tree_map
+DimList = _C.DimList
+import operator
+from functools import reduce
+# use dict to avoid writing C++ bindings for set
+pointwise = set(op_properties.pointwise)
+def prod(x):
+    return reduce(operator.mul, x, 1)
+def _wrap_dim(d, N, keepdim):
+    from . import Dim
+    if isinstance(d, Dim):
+        assert not keepdim, "cannot preserve first-class dimensions with keepdim=True"
+        return d
+    elif d >= 0:
+        return d - N
+    else:
+        return d
+def _dims(d, N, keepdim, single_dim):
+    from . import Dim
+    if isinstance(d, (Dim, int)):
+        return ltuple((_wrap_dim(d, N, keepdim),))
+    assert not single_dim, f"expected a single dimension or int but found: {d}"
+    return ltuple(_wrap_dim(x, N, keepdim) for x in d)
+def _bind_dims_to_size(lhs_size, rhs, lhs_debug):
+    from . import DimensionMismatchError
+    not_bound = tuple((i, r) for i, r in enumerate(rhs) if not r.is_bound)
+    if len(not_bound) == 1:
+        idx, d = not_bound[0]
+        rhs_so_far = prod(r.size for r in rhs if r.is_bound)
+        if lhs_size % rhs_so_far != 0:
+            rhs_s = tuple("?" if not r.is_bound else str(r.size) for r in rhs)
+            raise DimensionMismatchError(
+                f"inferred dimension does not evenly fit into larger dimension: {lhs_size} vs {rhs_s}"
+            )
+        new_size = lhs_size // rhs_so_far
+        d.size = new_size
+    elif len(not_bound) > 1:
+        rhs_s = tuple("?" if not r.is_bound else str(r.size) for r in rhs)
+        raise DimensionMismatchError(
+            f"cannot infer the size of two dimensions at once: {rhs} with sizes {rhs_s}"
+        )
+    else:
+        rhs_size = prod(r.size for r in rhs)
+        if lhs_size != rhs_size:
+            raise DimensionMismatchError(
+                f"Dimension sizes to do not match ({lhs_size} != {rhs_size}) when matching {lhs_debug} to {rhs}"
+            )
+def _tensor_levels(inp):
+    from . import _Tensor
+    if isinstance(inp, _Tensor):
+        return inp._tensor, llist(inp._levels), inp._has_device
+    else:
+        return inp, llist(range(-inp.ndim, 0)), True
+def _match_levels(v, from_levels, to_levels):
+    view = []
+    permute = []
+    requires_view = False
+    size = v.size()
+    for t in to_levels:
+        try:
+            idx = from_levels.index(t)
+            permute.append(idx)
+            view.append(size[idx])
+        except ValueError:
+            view.append(1)
+            requires_view = True
+    if permute != list(range(len(permute))):
+        v = v.permute(*permute)
+    if requires_view:
+        v = v.view(*view)
+    return v
+# make a single dimension positional but do not permute it,
+# used to do multi-tensor operators where the dim being acted on
+# should not physically move if possible
+def _positional_no_permute(self, dim, expand_dim=False):
+    from . import Tensor
+    ptensor, levels = self._tensor, llist(self._levels)
+    try:
+        idx = levels.index(dim)
+    except ValueError:
+        if not expand_dim:
+            raise
+        idx = 0
+        ptensor = ptensor.expand(dim.size, *ptensor.size())
+        levels.insert(0, 0)
+    idx_batched = 0
+    for i in range(idx):
+        if isinstance(levels[i], int):
+            levels[i] -= 1
+            idx_batched += 1
+    levels[idx] = -idx_batched - 1
+    return Tensor.from_positional(ptensor, levels, self._has_device), idx_batched
+def seq(a, b):
+    from . import Dim
+    if isinstance(a, Dim) != isinstance(b, Dim):
+        return False
+    if isinstance(a, Dim):
+        return a is b
+    else:
+        return a == b
+class isin:
+    def __contains__(self, item):
+        for x in self:
+            if seq(item, x):
+                return True
+        return False
+    def index(self, item):
+        for i, x in enumerate(self):
+            if seq(item, x):
+                return i
+        raise ValueError
+class llist(isin, list):
+    pass
+class ltuple(isin, tuple):
+    pass
+empty_dict = {}
+@classmethod
+def __torch_function__(self, orig, cls, args, kwargs=empty_dict):
+    from . import _Tensor, Tensor, TensorLike
+    from .delayed_mul_tensor import DelayedMulTensor
+    if orig is torch.Tensor.__mul__:
+        lhs, rhs = args
+        if (
+            isinstance(lhs, _Tensor)
+            and isinstance(rhs, _Tensor)
+            and lhs.ndim == 0
+            and rhs.ndim == 0
+        ):
+            return DelayedMulTensor(lhs, rhs)
+    all_dims = llist()
+    flat_args, unflatten = tree_flatten((args, kwargs))
+    device_holding_tensor = None
+    for f in flat_args:
+        if isinstance(f, _Tensor):
+            if f._has_device:
+                device_holding_tensor = f._batchtensor
+            for d in f.dims:
+                if d not in all_dims:
+                    all_dims.append(d)
+    def unwrap(t):
+        if isinstance(t, _Tensor):
+            r = t._batchtensor
+            if device_holding_tensor is not None and not t._has_device:
+                r = r.to(device=device_holding_tensor.device)
+            return r
+        return t
+    if orig in pointwise:
+        result_levels = llist()
+        arg_levels = llist()
+        to_expand = []
+        for i, f in enumerate(flat_args):
+            if isinstance(f, TensorLike):
+                ptensor, levels, _ = _tensor_levels(f)
+                if (
+                    isinstance(f, _Tensor)
+                    and not f._has_device
+                    and device_holding_tensor is not None
+                ):
+                    ptensor = ptensor.to(device=device_holding_tensor.device)
+                flat_args[i] = ptensor
+                for l in levels:
+                    if l not in result_levels:
+                        result_levels.append(l)
+                to_expand.append((i, levels))
+        for i, levels in to_expand:
+            flat_args[i] = _match_levels(flat_args[i], levels, result_levels)
+        args, kwargs = unflatten(flat_args)
+        result = orig(*args, **kwargs)
+        def wrap(t):
+            if isinstance(t, TensorLike):
+                return Tensor.from_positional(
+                    t, result_levels, device_holding_tensor is not None
+                )
+            return t
+        return tree_map(wrap, result)
+    else:
+        def wrap(t):
+            if isinstance(t, TensorLike):
+                return Tensor.from_batched(t, device_holding_tensor is not None)
+            return t
+        with _enable_layers(all_dims):
+            print(f"batch_tensor for {orig}")
+            args, kwargs = unflatten(unwrap(f) for f in flat_args)
+            result = orig(*args, **kwargs)
+            # print("END", orig)
+            return tree_map(wrap, result)
+def positional(self, *dims):
+    from . import Dim, DimensionBindError, Tensor
+    ptensor, levels = self._tensor, llist(self._levels)
+    flat_dims = llist()
+    view = []
+    needs_view = False
+    ndim = self.ndim
+    for d in dims:
+        if isinstance(d, DimList):
+            flat_dims.extend(d)
+            view.extend(e.size for e in d)
+        elif isinstance(d, Dim):
+            flat_dims.append(d)
+            view.append(d.size)
+        elif isinstance(d, int):
+            d = _wrap_dim(d, ndim, False)
+            flat_dims.append(d)
+            view.append(ptensor.size(d))
+        else:
+            flat_dims.extend(d)
+            view.append(prod(e.size for e in d))
+            needs_view = True
+    permute = list(range(len(levels)))
+    nflat = len(flat_dims)
+    for i, d in enumerate(flat_dims):
+        try:
+            idx = levels.index(d)
+        except ValueError as e:
+            raise DimensionBindError(
+                f"tensor of dimensions {self.dims} does not contain dim {d}"
+            ) from e
+        p = permute[idx]
+        del levels[idx]
+        del permute[idx]
+        levels.insert(i, 0)
+        permute.insert(i, p)
+    ptensor = ptensor.permute(*permute)
+    seen = 0
+    for i in range(len(levels) - 1, -1, -1):
+        if isinstance(levels[i], int):
+            seen += 1
+            levels[i] = -seen
+    result = Tensor.from_positional(ptensor, levels, self._has_device)
+    if needs_view:
+        result = result.reshape(*view, *result.size()[len(flat_dims) :])
+    return result
+def _contains_dim(input):
+    from . import Dim
+    for i in input:
+        if isinstance(i, Dim):
+            return True
+def expand(self, *sizes):
+    if not _contains_dim(sizes):
+        return self.__torch_function__(torch.Tensor.expand, None, (self, *sizes))
+    dims = sizes
+    sizes = [d.size for d in dims] + [-1] * self.ndim
+    self = self.expand(*sizes)
+    return self[dims]
+_not_present = object()
+def _getarg(name, offset, args, kwargs, default):
+    if len(args) > offset:
+        return args[offset]
+    return kwargs.get(name, default)
+def _patcharg(name, offset, args, kwargs, value):
+    if len(args) > offset:
+        args[offset] = value
+    else:
+        kwargs[name] = value
+def _wrap(
+    orig, dim_offset=0, keepdim_offset=1, dim_name="dim", single_dim=False, reduce=True
+):
+    from . import Dim, Tensor, TensorLike
+    def fn(self, *args, **kwargs):
+        dim = _getarg(dim_name, dim_offset, args, kwargs, _not_present)
+        if dim is _not_present or (single_dim and not isinstance(dim, Dim)):
+            with _enable_layers(self.dims):
+                print(f"dim fallback batch_tensor for {orig}")
+                return Tensor.from_batched(
+                    orig(self._batchtensor, *args, **kwargs), self._has_device
+                )
+        keepdim = (
+            _getarg("keepdim", keepdim_offset, args, kwargs, False) if reduce else False
+        )
+        t, levels = self._tensor, llist(self._levels)
+        dims = _dims(dim, self._batchtensor.ndim, keepdim, single_dim)
+        dim_indices = tuple(levels.index(d) for d in dims)
+        if reduce and not keepdim:
+            new_levels = [l for i, l in enumerate(levels) if i not in dim_indices]
+        else:
+            new_levels = levels
+        if len(dim_indices) == 1:
+            dim_indices = dim_indices[
+                0
+            ]  # so that dims that really only take a single argument work...
+        args = list(args)
+        _patcharg(dim_name, dim_offset, args, kwargs, dim_indices)
+        def wrap(t):
+            if isinstance(t, TensorLike):
+                return Tensor.from_positional(t, new_levels, self._has_device)
+            return t
+        with _enable_layers(new_levels):
+            print(f"dim used batch_tensor for {orig}")
+            r = orig(t, *args, **kwargs)
+            return tree_map(wrap, r)
+    return fn
+def _def(name, *args, **kwargs):
+    from . import _Tensor
+    orig = getattr(torch.Tensor, name)
+    setattr(_Tensor, name, _wrap(orig, *args, **kwargs))
+no_slice = slice(None)
+_orig_getitem = torch.Tensor.__getitem__
+class dim_tracker:
+    def __init__(self) -> None:
+        self.dims = llist()
+        self.count = []
+    def record(self, d):
+        if d not in self.dims:
+            self.dims.append(d)
+            self.count.append(1)
+    def __getitem__(self, d):
+        return self.count[self.dims.index(d)]
+def t__getitem__(self, input):
+    from . import _Tensor, Dim, DimensionBindError, DimList, Tensor, TensorLike
+    # * bail to original example if we have a single non-Dim tensor, or a non-tensor
+    # * locate ... or an unbound tensor list, and determine its size, bind dim list
+    #   (remember that None does not count to the total dim count)
+    # * bind simple dims and dim-packs to their sizes, count the number of uses of each dim,
+    #   produce the re-view if needed
+    # * for each single-use dim index, replace with no_slice and mark that it will be added
+    #   (keep track of whether we have to call super)
+    # * call super if needed
+    # * if we have dims to bind, bind them (it will help if we eliminated ... and None before)
+    # this handles bool indexing handling, as well as some other simple cases.
+    is_simple = (
+        not isinstance(input, Dim)
+        and not isinstance(input, (tuple, list))
+        and
+        # WAR for functorch bug where zero time tensors in getitem are not handled correctly.
+        not (isinstance(input, TensorLike) and input.ndim == 0)
+    )
+    if is_simple:
+        if isinstance(self, _Tensor):
+            return _Tensor.__torch_function__(_orig_getitem, None, (self, input))
+        else:
+            return _orig_getitem(self, input)
+    # can further optimize this case
+    if not isinstance(input, tuple):
+        input = [input]
+    else:
+        input = list(input)
+    dims_indexed = 0
+    expanding_object = None
+    dimlists = []
+    for i, s in enumerate(input):
+        if s is ... or isinstance(s, DimList) and not s.is_bound:
+            if expanding_object is not None:
+                msg = (
+                    "at most one ... or unbound dimension list can exist in indexing list but"
+                    f" found 2 at offsets {i} and {expanding_object}"
+                )
+                raise DimensionBindError(msg)
+            expanding_object = i
+        if isinstance(s, DimList):
+            dims_indexed += len(s) if s.is_bound else 0
+            dimlists.append(i)
+        elif s is not None and s is not ...:
+            dims_indexed += 1
+    ndim = self.ndim
+    if dims_indexed > ndim:
+        raise IndexError(
+            f"at least {dims_indexed} indices were supplied but the tensor only has {ndim} dimensions."
+        )
+    if expanding_object is not None:
+        expanding_ndims = ndim - dims_indexed
+        obj = input[expanding_object]
+        if obj is ...:
+            input[expanding_object : expanding_object + 1] = [
+                no_slice
+            ] * expanding_ndims
+        else:
+            obj.bind_len(expanding_ndims)
+    # flatten the dimslists into the indexing
+    for i in reversed(dimlists):
+        input[i : i + 1] = input[i]
+    dims_indexed = 0
+    requires_view = False
+    size = self.size()
+    view_sizes = []
+    dims_seen = dim_tracker()
+    def add_dims(t):
+        if not isinstance(t, _Tensor):
+            return
+        for d in t.dims:
+            dims_seen.record(d)
+    add_dims(self)
+    dim_packs = []
+    for i, idx in enumerate(input):
+        if idx is None:
+            input[i] = no_slice
+            view_sizes.append(1)
+            requires_view = True
+        else:
+            sz = size[dims_indexed]
+            if isinstance(idx, Dim):
+                idx.size = sz
+                dims_seen.record(idx)
+                view_sizes.append(sz)
+            elif isinstance(idx, (tuple, list)) and idx and isinstance(idx[0], Dim):
+                for d in idx:
+                    dims_seen.record(idx)
+                _bind_dims_to_size(sz, idx, f"offset {i}")
+                view_sizes.extend(d.size for d in idx)
+                requires_view = True
+                dim_packs.append(i)
+            else:
+                add_dims(idx)
+                view_sizes.append(sz)
+            dims_indexed += 1
+    if requires_view:
+        self = self.view(*view_sizes)
+    for i in reversed(dim_packs):
+        input[i : i + 1] = input[i]
+    # currenty:
+    # input is flat, containing either Dim, or Tensor, or something valid for standard indexing
+    # self may have first-class dims as well.
+    # to index:
+    # drop the first class dims from self, they just become direct indices of their positions
+    # figure out the dimensions of the indexing tensors: union of all the dims in the tensors in the index.
+    # these dimensions will appear and need to be bound at the first place tensor occures
+    if isinstance(self, _Tensor):
+        ptensor_self, levels = self._tensor, list(self._levels)
+        # indices to ptensor rather than self which has first-class dimensions
+        input_it = iter(input)
+        flat_inputs = [next(input_it) if isinstance(l, int) else l for l in levels]
+        has_device = self._has_device
+        to_pad = 0
+    else:
+        ptensor_self, flat_inputs = self, input
+        to_pad = ptensor_self.ndim - len(flat_inputs)
+        has_device = True
+    result_levels = []
+    index_levels = []
+    tensor_insert_point = None
+    to_expand = {}
+    requires_getindex = False
+    for i, inp in enumerate(flat_inputs):
+        if isinstance(inp, Dim) and dims_seen[inp] == 1:
+            flat_inputs[i] = no_slice
+            result_levels.append(inp)
+        elif isinstance(inp, TensorLike):
+            requires_getindex = True
+            if tensor_insert_point is None:
+                tensor_insert_point = len(result_levels)
+            ptensor, levels, _ = _tensor_levels(inp)
+            to_expand[i] = levels
+            flat_inputs[i] = ptensor
+            for l in levels:
+                if l not in index_levels:
+                    index_levels.append(l)
+        else:
+            requires_getindex = True
+            result_levels.append(0)
+    if tensor_insert_point is not None:
+        result_levels[tensor_insert_point:tensor_insert_point] = index_levels
+    for i, levels in to_expand.items():
+        flat_inputs[i] = _match_levels(flat_inputs[i], levels, index_levels)
+    if requires_getindex:
+        result = _orig_getitem(ptensor_self, flat_inputs)
+    else:
+        result = ptensor_self
+    next_positional = -1
+    if to_pad > 0:
+        result_levels.extend([0] * to_pad)
+    for i, r in enumerate(reversed(result_levels)):
+        if isinstance(r, int):
+            result_levels[-1 - i] = next_positional
+            next_positional -= 1
+    return Tensor.from_positional(result, result_levels, has_device)
+# XXX - dim is optional and can be the outer-most dimension...
+def stack(tensors, new_dim, dim=0, out=None):
+    if isinstance(dim, int):
+        return torch.stack(tensors, dim, out).index(dim, new_dim)
+    index = None
+    if out is not None:
+        out, index = _positional_no_permute(out, dim, expand_dim=True)
+    ptensors = []
+    for t in tensors:
+        pt, pi = _positional_no_permute(t, dim, expand_dim=True)
+        if index is not None and pi != index:
+            pt = pt.move_dim(pi, index)
+        else:
+            index = pi
+        ptensors.append(pt)
+    pr = torch.stack(ptensors, index, out=out)
+    return pr.index((index, index + 1), (new_dim, dim))
+_orig_split = torch.Tensor.split
+def split(self, split_size_or_sections, dim=0):
+    from . import _Tensor, Dim
+    if isinstance(split_size_or_sections, int) or any(
+        isinstance(t, int) for t in split_size_or_sections
+    ):
+        if isinstance(dim, Dim):
+            raise ValueError(
+                "when dim is specified as a Dim object, split sizes must also be dimensions."
+            )
+        return _orig_split(self, split_size_or_sections, dim=dim)
+    if isinstance(dim, Dim):
+        assert isinstance(self, _Tensor), f"Tensor does not have dimension {dim}"
+        self, dim = _positional_no_permute(self, dim)
+    size = self.size(dim)
+    total_bound_size = 0
+    unbound = []
+    sizes = []
+    for i, d in enumerate(split_size_or_sections):
+        if d.is_bound:
+            sizes.append(d.size)
+            total_bound_size += d.size
+        else:
+            sizes.append(0)
+            unbound.append(i)
+    if unbound:
+        assert (
+            total_bound_size <= size
+        ), f"result dimensions are larger than original: {total_bound_size} vs {size} ({split_size_or_sections})"
+        remaining_size = size - total_bound_size
+        chunk_size = -(-remaining_size // len(unbound))
+        for u in unbound:
+            sz = min(chunk_size, remaining_size)
+            split_size_or_sections[u].size = sz
+            sizes[u] = sz
+            remaining_size -= sz
+    else:
+        assert (
+            total_bound_size == size
+        ), f"result dimensions do not match original: {total_bound_size} vs {size} ({split_size_or_sections})"
+    return tuple(
+        t.index(dim, d)
+        for d, t in zip(split_size_or_sections, _orig_split(self, sizes, dim=dim))
+    )

.venv/Lib/site-packages/functorch/dim/tree_map.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from functorch._C import dim
+tree_flatten = dim.tree_flatten
+def tree_map(fn, tree):
+    vs, unflatten = tree_flatten(tree)
+    return unflatten(fn(v) for v in vs)

.venv/Lib/site-packages/functorch/dim/wrap_type.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from types import (
+    BuiltinMethodType,
+    FunctionType,
+    GetSetDescriptorType,
+    MethodDescriptorType,
+    WrapperDescriptorType,
+)
+from functorch._C import dim as _C
+_wrap_method = _C._wrap_method
+FUNC_TYPES = (
+    FunctionType,
+    MethodDescriptorType,
+    BuiltinMethodType,
+    WrapperDescriptorType,
+)
+PROPERTY_TYPES = (GetSetDescriptorType, property)
+def _py_wrap_method(orig, __torch_function__):
+    def impl(*args, **kwargs):
+        return __torch_function__(orig, None, args, kwargs)
+    return impl
+def wrap_type(use_c, to_patch, pattern, __torch_function__):
+    if use_c:
+        wrap_method = _wrap_method
+    else:
+        wrap_method = _py_wrap_method
+    all = {}
+    for t in reversed(pattern.mro()[:-1]):  # skip object
+        all.update(t.__dict__)
+    def wrap_attr(orig):
+        return property(wrap_method(orig.__get__, __torch_function__))
+    for name, obj in all.items():
+        if name in (
+            "__dict__",
+            "__new__",
+            "__init__",
+            "__repr__",
+            "__weakref__",
+            "__doc__",
+            "__module__",
+            "__dir__",
+        ):
+            continue
+        # skip things that have been overloaded
+        # things that come from object like `__eq__` still need to be patched, however.
+        if hasattr(to_patch, name) and getattr(to_patch, name) is not getattr(
+            object, name, None
+        ):
+            continue
+        if isinstance(obj, FUNC_TYPES):
+            setattr(to_patch, name, wrap_method(obj, __torch_function__))
+        elif isinstance(obj, PROPERTY_TYPES):
+            setattr(to_patch, name, wrap_attr(obj))

.venv/Lib/site-packages/huggingface_hub/__init__.py ADDED Viewed

	@@ -0,0 +1,1002 @@

+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ***********
+# `huggingface_hub` init has 2 modes:
+# - Normal usage:
+#       If imported to use it, all modules and functions are lazy-loaded. This means
+#       they exist at top level in module but are imported only the first time they are
+#       used. This way, `from huggingface_hub import something` will import `something`
+#       quickly without the hassle of importing all the features from `huggingface_hub`.
+# - Static check:
+#       If statically analyzed, all modules and functions are loaded normally. This way
+#       static typing check works properly as well as autocomplete in text editors and
+#       IDEs.
+#
+# The static model imports are done inside the `if TYPE_CHECKING:` statement at
+# the bottom of this file. Since module/functions imports are duplicated, it is
+# mandatory to make sure to add them twice when adding one. This is checked in the
+# `make quality` command.
+#
+# To update the static imports, please run the following command and commit the changes.
+# ```
+# # Use script
+# python utils/check_static_imports.py --update-file
+#
+# # Or run style on codebase
+# make style
+# ```
+#
+# ***********
+# Lazy loader vendored from https://github.com/scientific-python/lazy_loader
+import importlib
+import os
+import sys
+from typing import TYPE_CHECKING
+__version__ = "0.26.5"
+# Alphabetical order of definitions is ensured in tests
+# WARNING: any comment added in this dictionary definition will be lost when
+# re-generating the file !
+_SUBMOD_ATTRS = {
+    "_commit_scheduler": [
+        "CommitScheduler",
+    ],
+    "_inference_endpoints": [
+        "InferenceEndpoint",
+        "InferenceEndpointError",
+        "InferenceEndpointStatus",
+        "InferenceEndpointTimeoutError",
+        "InferenceEndpointType",
+    ],
+    "_login": [
+        "auth_list",
+        "auth_switch",
+        "interpreter_login",
+        "login",
+        "logout",
+        "notebook_login",
+    ],
+    "_multi_commits": [
+        "MultiCommitException",
+        "plan_multi_commits",
+    ],
+    "_snapshot_download": [
+        "snapshot_download",
+    ],
+    "_space_api": [
+        "SpaceHardware",
+        "SpaceRuntime",
+        "SpaceStage",
+        "SpaceStorage",
+        "SpaceVariable",
+    ],
+    "_tensorboard_logger": [
+        "HFSummaryWriter",
+    ],
+    "_webhooks_payload": [
+        "WebhookPayload",
+        "WebhookPayloadComment",
+        "WebhookPayloadDiscussion",
+        "WebhookPayloadDiscussionChanges",
+        "WebhookPayloadEvent",
+        "WebhookPayloadMovedTo",
+        "WebhookPayloadRepo",
+        "WebhookPayloadUrl",
+        "WebhookPayloadWebhook",
+    ],
+    "_webhooks_server": [
+        "WebhooksServer",
+        "webhook_endpoint",
+    ],
+    "community": [
+        "Discussion",
+        "DiscussionComment",
+        "DiscussionCommit",
+        "DiscussionEvent",
+        "DiscussionStatusChange",
+        "DiscussionTitleChange",
+        "DiscussionWithDetails",
+    ],
+    "constants": [
+        "CONFIG_NAME",
+        "FLAX_WEIGHTS_NAME",
+        "HUGGINGFACE_CO_URL_HOME",
+        "HUGGINGFACE_CO_URL_TEMPLATE",
+        "PYTORCH_WEIGHTS_NAME",
+        "REPO_TYPE_DATASET",
+        "REPO_TYPE_MODEL",
+        "REPO_TYPE_SPACE",
+        "TF2_WEIGHTS_NAME",
+        "TF_WEIGHTS_NAME",
+    ],
+    "fastai_utils": [
+        "_save_pretrained_fastai",
+        "from_pretrained_fastai",
+        "push_to_hub_fastai",
+    ],
+    "file_download": [
+        "HfFileMetadata",
+        "_CACHED_NO_EXIST",
+        "get_hf_file_metadata",
+        "hf_hub_download",
+        "hf_hub_url",
+        "try_to_load_from_cache",
+    ],
+    "hf_api": [
+        "Collection",
+        "CollectionItem",
+        "CommitInfo",
+        "CommitOperation",
+        "CommitOperationAdd",
+        "CommitOperationCopy",
+        "CommitOperationDelete",
+        "DatasetInfo",
+        "GitCommitInfo",
+        "GitRefInfo",
+        "GitRefs",
+        "HfApi",
+        "ModelInfo",
+        "RepoUrl",
+        "SpaceInfo",
+        "User",
+        "UserLikes",
+        "WebhookInfo",
+        "WebhookWatchedItem",
+        "accept_access_request",
+        "add_collection_item",
+        "add_space_secret",
+        "add_space_variable",
+        "auth_check",
+        "cancel_access_request",
+        "change_discussion_status",
+        "comment_discussion",
+        "create_branch",
+        "create_collection",
+        "create_commit",
+        "create_commits_on_pr",
+        "create_discussion",
+        "create_inference_endpoint",
+        "create_pull_request",
+        "create_repo",
+        "create_tag",
+        "create_webhook",
+        "dataset_info",
+        "delete_branch",
+        "delete_collection",
+        "delete_collection_item",
+        "delete_file",
+        "delete_folder",
+        "delete_inference_endpoint",
+        "delete_repo",
+        "delete_space_secret",
+        "delete_space_storage",
+        "delete_space_variable",
+        "delete_tag",
+        "delete_webhook",
+        "disable_webhook",
+        "duplicate_space",
+        "edit_discussion_comment",
+        "enable_webhook",
+        "file_exists",
+        "get_collection",
+        "get_dataset_tags",
+        "get_discussion_details",
+        "get_full_repo_name",
+        "get_inference_endpoint",
+        "get_model_tags",
+        "get_paths_info",
+        "get_repo_discussions",
+        "get_safetensors_metadata",
+        "get_space_runtime",
+        "get_space_variables",
+        "get_token_permission",
+        "get_user_overview",
+        "get_webhook",
+        "grant_access",
+        "like",
+        "list_accepted_access_requests",
+        "list_collections",
+        "list_datasets",
+        "list_inference_endpoints",
+        "list_liked_repos",
+        "list_metrics",
+        "list_models",
+        "list_organization_members",
+        "list_papers",
+        "list_pending_access_requests",
+        "list_rejected_access_requests",
+        "list_repo_commits",
+        "list_repo_files",
+        "list_repo_likers",
+        "list_repo_refs",
+        "list_repo_tree",
+        "list_spaces",
+        "list_user_followers",
+        "list_user_following",
+        "list_webhooks",
+        "merge_pull_request",
+        "model_info",
+        "move_repo",
+        "paper_info",
+        "parse_safetensors_file_metadata",
+        "pause_inference_endpoint",
+        "pause_space",
+        "preupload_lfs_files",
+        "reject_access_request",
+        "rename_discussion",
+        "repo_exists",
+        "repo_info",
+        "repo_type_and_id_from_hf_id",
+        "request_space_hardware",
+        "request_space_storage",
+        "restart_space",
+        "resume_inference_endpoint",
+        "revision_exists",
+        "run_as_future",
+        "scale_to_zero_inference_endpoint",
+        "set_space_sleep_time",
+        "space_info",
+        "super_squash_history",
+        "unlike",
+        "update_collection_item",
+        "update_collection_metadata",
+        "update_inference_endpoint",
+        "update_repo_settings",
+        "update_repo_visibility",
+        "update_webhook",
+        "upload_file",
+        "upload_folder",
+        "upload_large_folder",
+        "whoami",
+    ],
+    "hf_file_system": [
+        "HfFileSystem",
+        "HfFileSystemFile",
+        "HfFileSystemResolvedPath",
+        "HfFileSystemStreamFile",
+    ],
+    "hub_mixin": [
+        "ModelHubMixin",
+        "PyTorchModelHubMixin",
+    ],
+    "inference._client": [
+        "InferenceClient",
+        "InferenceTimeoutError",
+    ],
+    "inference._generated._async_client": [
+        "AsyncInferenceClient",
+    ],
+    "inference._generated.types": [
+        "AudioClassificationInput",
+        "AudioClassificationOutputElement",
+        "AudioClassificationOutputTransform",
+        "AudioClassificationParameters",
+        "AudioToAudioInput",
+        "AudioToAudioOutputElement",
+        "AutomaticSpeechRecognitionEarlyStoppingEnum",
+        "AutomaticSpeechRecognitionGenerationParameters",
+        "AutomaticSpeechRecognitionInput",
+        "AutomaticSpeechRecognitionOutput",
+        "AutomaticSpeechRecognitionOutputChunk",
+        "AutomaticSpeechRecognitionParameters",
+        "ChatCompletionInput",
+        "ChatCompletionInputFunctionDefinition",
+        "ChatCompletionInputFunctionName",
+        "ChatCompletionInputGrammarType",
+        "ChatCompletionInputMessage",
+        "ChatCompletionInputMessageChunk",
+        "ChatCompletionInputStreamOptions",
+        "ChatCompletionInputToolType",
+        "ChatCompletionInputURL",
+        "ChatCompletionOutput",
+        "ChatCompletionOutputComplete",
+        "ChatCompletionOutputFunctionDefinition",
+        "ChatCompletionOutputLogprob",
+        "ChatCompletionOutputLogprobs",
+        "ChatCompletionOutputMessage",
+        "ChatCompletionOutputToolCall",
+        "ChatCompletionOutputTopLogprob",
+        "ChatCompletionOutputUsage",
+        "ChatCompletionStreamOutput",
+        "ChatCompletionStreamOutputChoice",
+        "ChatCompletionStreamOutputDelta",
+        "ChatCompletionStreamOutputDeltaToolCall",
+        "ChatCompletionStreamOutputFunction",
+        "ChatCompletionStreamOutputLogprob",
+        "ChatCompletionStreamOutputLogprobs",
+        "ChatCompletionStreamOutputTopLogprob",
+        "ChatCompletionStreamOutputUsage",
+        "DepthEstimationInput",
+        "DepthEstimationOutput",
+        "DocumentQuestionAnsweringInput",
+        "DocumentQuestionAnsweringInputData",
+        "DocumentQuestionAnsweringOutputElement",
+        "DocumentQuestionAnsweringParameters",
+        "FeatureExtractionInput",
+        "FillMaskInput",
+        "FillMaskOutputElement",
+        "FillMaskParameters",
+        "ImageClassificationInput",
+        "ImageClassificationOutputElement",
+        "ImageClassificationOutputTransform",
+        "ImageClassificationParameters",
+        "ImageSegmentationInput",
+        "ImageSegmentationOutputElement",
+        "ImageSegmentationParameters",
+        "ImageToImageInput",
+        "ImageToImageOutput",
+        "ImageToImageParameters",
+        "ImageToImageTargetSize",
+        "ImageToTextEarlyStoppingEnum",
+        "ImageToTextGenerationParameters",
+        "ImageToTextInput",
+        "ImageToTextOutput",
+        "ImageToTextParameters",
+        "ObjectDetectionBoundingBox",
+        "ObjectDetectionInput",
+        "ObjectDetectionOutputElement",
+        "ObjectDetectionParameters",
+        "QuestionAnsweringInput",
+        "QuestionAnsweringInputData",
+        "QuestionAnsweringOutputElement",
+        "QuestionAnsweringParameters",
+        "SentenceSimilarityInput",
+        "SentenceSimilarityInputData",
+        "SummarizationInput",
+        "SummarizationOutput",
+        "SummarizationParameters",
+        "TableQuestionAnsweringInput",
+        "TableQuestionAnsweringInputData",
+        "TableQuestionAnsweringOutputElement",
+        "Text2TextGenerationInput",
+        "Text2TextGenerationOutput",
+        "Text2TextGenerationParameters",
+        "TextClassificationInput",
+        "TextClassificationOutputElement",
+        "TextClassificationOutputTransform",
+        "TextClassificationParameters",
+        "TextGenerationInput",
+        "TextGenerationInputGenerateParameters",
+        "TextGenerationInputGrammarType",
+        "TextGenerationOutput",
+        "TextGenerationOutputBestOfSequence",
+        "TextGenerationOutputDetails",
+        "TextGenerationOutputPrefillToken",
+        "TextGenerationOutputToken",
+        "TextGenerationStreamOutput",
+        "TextGenerationStreamOutputStreamDetails",
+        "TextGenerationStreamOutputToken",
+        "TextToAudioEarlyStoppingEnum",
+        "TextToAudioGenerationParameters",
+        "TextToAudioInput",
+        "TextToAudioOutput",
+        "TextToAudioParameters",
+        "TextToImageInput",
+        "TextToImageOutput",
+        "TextToImageParameters",
+        "TextToImageTargetSize",
+        "TextToSpeechEarlyStoppingEnum",
+        "TextToSpeechGenerationParameters",
+        "TextToSpeechInput",
+        "TextToSpeechOutput",
+        "TextToSpeechParameters",
+        "TokenClassificationInput",
+        "TokenClassificationOutputElement",
+        "TokenClassificationParameters",
+        "ToolElement",
+        "TranslationInput",
+        "TranslationOutput",
+        "TranslationParameters",
+        "VideoClassificationInput",
+        "VideoClassificationOutputElement",
+        "VideoClassificationOutputTransform",
+        "VideoClassificationParameters",
+        "VisualQuestionAnsweringInput",
+        "VisualQuestionAnsweringInputData",
+        "VisualQuestionAnsweringOutputElement",
+        "VisualQuestionAnsweringParameters",
+        "ZeroShotClassificationInput",
+        "ZeroShotClassificationInputData",
+        "ZeroShotClassificationOutputElement",
+        "ZeroShotClassificationParameters",
+        "ZeroShotImageClassificationInput",
+        "ZeroShotImageClassificationInputData",
+        "ZeroShotImageClassificationOutputElement",
+        "ZeroShotImageClassificationParameters",
+        "ZeroShotObjectDetectionBoundingBox",
+        "ZeroShotObjectDetectionInput",
+        "ZeroShotObjectDetectionInputData",
+        "ZeroShotObjectDetectionOutputElement",
+    ],
+    "inference_api": [
+        "InferenceApi",
+    ],
+    "keras_mixin": [
+        "KerasModelHubMixin",
+        "from_pretrained_keras",
+        "push_to_hub_keras",
+        "save_pretrained_keras",
+    ],
+    "repocard": [
+        "DatasetCard",
+        "ModelCard",
+        "RepoCard",
+        "SpaceCard",
+        "metadata_eval_result",
+        "metadata_load",
+        "metadata_save",
+        "metadata_update",
+    ],
+    "repocard_data": [
+        "CardData",
+        "DatasetCardData",
+        "EvalResult",
+        "ModelCardData",
+        "SpaceCardData",
+    ],
+    "repository": [
+        "Repository",
+    ],
+    "serialization": [
+        "StateDictSplit",
+        "get_tf_storage_size",
+        "get_torch_storage_id",
+        "get_torch_storage_size",
+        "save_torch_model",
+        "save_torch_state_dict",
+        "split_state_dict_into_shards_factory",
+        "split_tf_state_dict_into_shards",
+        "split_torch_state_dict_into_shards",
+    ],
+    "utils": [
+        "CacheNotFound",
+        "CachedFileInfo",
+        "CachedRepoInfo",
+        "CachedRevisionInfo",
+        "CorruptedCacheException",
+        "DeleteCacheStrategy",
+        "HFCacheInfo",
+        "HfFolder",
+        "cached_assets_path",
+        "configure_http_backend",
+        "dump_environment_info",
+        "get_session",
+        "get_token",
+        "logging",
+        "scan_cache_dir",
+    ],
+}
+def _attach(package_name, submodules=None, submod_attrs=None):
+    """Attach lazily loaded submodules, functions, or other attributes.
+    Typically, modules import submodules and attributes as follows:
+    ```py
+    import mysubmodule
+    import anothersubmodule
+    from .foo import someattr
+    ```
+    The idea is to replace a package's `__getattr__`, `__dir__`, and
+    `__all__`, such that all imports work exactly the way they would
+    with normal imports, except that the import occurs upon first use.
+    The typical way to call this function, replacing the above imports, is:
+    ```python
+    __getattr__, __dir__, __all__ = lazy.attach(
+        __name__,
+        ['mysubmodule', 'anothersubmodule'],
+        {'foo': ['someattr']}
+    )
+    ```
+    This functionality requires Python 3.7 or higher.
+    Args:
+        package_name (`str`):
+            Typically use `__name__`.
+        submodules (`set`):
+            List of submodules to attach.
+        submod_attrs (`dict`):
+            Dictionary of submodule -> list of attributes / functions.
+            These attributes are imported as they are used.
+    Returns:
+        __getattr__, __dir__, __all__
+    """
+    if submod_attrs is None:
+        submod_attrs = {}
+    if submodules is None:
+        submodules = set()
+    else:
+        submodules = set(submodules)
+    attr_to_modules = {attr: mod for mod, attrs in submod_attrs.items() for attr in attrs}
+    __all__ = list(submodules | attr_to_modules.keys())
+    def __getattr__(name):
+        if name in submodules:
+            try:
+                return importlib.import_module(f"{package_name}.{name}")
+            except Exception as e:
+                print(f"Error importing {package_name}.{name}: {e}")
+                raise
+        elif name in attr_to_modules:
+            submod_path = f"{package_name}.{attr_to_modules[name]}"
+            try:
+                submod = importlib.import_module(submod_path)
+            except Exception as e:
+                print(f"Error importing {submod_path}: {e}")
+                raise
+            attr = getattr(submod, name)
+            # If the attribute lives in a file (module) with the same
+            # name as the attribute, ensure that the attribute and *not*
+            # the module is accessible on the package.
+            if name == attr_to_modules[name]:
+                pkg = sys.modules[package_name]
+                pkg.__dict__[name] = attr
+            return attr
+        else:
+            raise AttributeError(f"No {package_name} attribute {name}")
+    def __dir__():
+        return __all__
+    return __getattr__, __dir__, list(__all__)
+__getattr__, __dir__, __all__ = _attach(__name__, submodules=[], submod_attrs=_SUBMOD_ATTRS)
+if os.environ.get("EAGER_IMPORT", ""):
+    for attr in __all__:
+        __getattr__(attr)
+# WARNING: any content below this statement is generated automatically. Any manual edit
+# will be lost when re-generating this file !
+#
+# To update the static imports, please run the following command and commit the changes.
+# ```
+# # Use script
+# python utils/check_static_imports.py --update-file
+#
+# # Or run style on codebase
+# make style
+# ```
+if TYPE_CHECKING:  # pragma: no cover
+    from ._commit_scheduler import CommitScheduler  # noqa: F401
+    from ._inference_endpoints import (
+        InferenceEndpoint,  # noqa: F401
+        InferenceEndpointError,  # noqa: F401
+        InferenceEndpointStatus,  # noqa: F401
+        InferenceEndpointTimeoutError,  # noqa: F401
+        InferenceEndpointType,  # noqa: F401
+    )
+    from ._login import (
+        auth_list,  # noqa: F401
+        auth_switch,  # noqa: F401
+        interpreter_login,  # noqa: F401
+        login,  # noqa: F401
+        logout,  # noqa: F401
+        notebook_login,  # noqa: F401
+    )
+    from ._multi_commits import (
+        MultiCommitException,  # noqa: F401
+        plan_multi_commits,  # noqa: F401
+    )
+    from ._snapshot_download import snapshot_download  # noqa: F401
+    from ._space_api import (
+        SpaceHardware,  # noqa: F401
+        SpaceRuntime,  # noqa: F401
+        SpaceStage,  # noqa: F401
+        SpaceStorage,  # noqa: F401
+        SpaceVariable,  # noqa: F401
+    )
+    from ._tensorboard_logger import HFSummaryWriter  # noqa: F401
+    from ._webhooks_payload import (
+        WebhookPayload,  # noqa: F401
+        WebhookPayloadComment,  # noqa: F401
+        WebhookPayloadDiscussion,  # noqa: F401
+        WebhookPayloadDiscussionChanges,  # noqa: F401
+        WebhookPayloadEvent,  # noqa: F401
+        WebhookPayloadMovedTo,  # noqa: F401
+        WebhookPayloadRepo,  # noqa: F401
+        WebhookPayloadUrl,  # noqa: F401
+        WebhookPayloadWebhook,  # noqa: F401
+    )
+    from ._webhooks_server import (
+        WebhooksServer,  # noqa: F401
+        webhook_endpoint,  # noqa: F401
+    )
+    from .community import (
+        Discussion,  # noqa: F401
+        DiscussionComment,  # noqa: F401
+        DiscussionCommit,  # noqa: F401
+        DiscussionEvent,  # noqa: F401
+        DiscussionStatusChange,  # noqa: F401
+        DiscussionTitleChange,  # noqa: F401
+        DiscussionWithDetails,  # noqa: F401
+    )
+    from .constants import (
+        CONFIG_NAME,  # noqa: F401
+        FLAX_WEIGHTS_NAME,  # noqa: F401
+        HUGGINGFACE_CO_URL_HOME,  # noqa: F401
+        HUGGINGFACE_CO_URL_TEMPLATE,  # noqa: F401
+        PYTORCH_WEIGHTS_NAME,  # noqa: F401
+        REPO_TYPE_DATASET,  # noqa: F401
+        REPO_TYPE_MODEL,  # noqa: F401
+        REPO_TYPE_SPACE,  # noqa: F401
+        TF2_WEIGHTS_NAME,  # noqa: F401
+        TF_WEIGHTS_NAME,  # noqa: F401
+    )
+    from .fastai_utils import (
+        _save_pretrained_fastai,  # noqa: F401
+        from_pretrained_fastai,  # noqa: F401
+        push_to_hub_fastai,  # noqa: F401
+    )
+    from .file_download import (
+        _CACHED_NO_EXIST,  # noqa: F401
+        HfFileMetadata,  # noqa: F401
+        get_hf_file_metadata,  # noqa: F401
+        hf_hub_download,  # noqa: F401
+        hf_hub_url,  # noqa: F401
+        try_to_load_from_cache,  # noqa: F401
+    )
+    from .hf_api import (
+        Collection,  # noqa: F401
+        CollectionItem,  # noqa: F401
+        CommitInfo,  # noqa: F401
+        CommitOperation,  # noqa: F401
+        CommitOperationAdd,  # noqa: F401
+        CommitOperationCopy,  # noqa: F401
+        CommitOperationDelete,  # noqa: F401
+        DatasetInfo,  # noqa: F401
+        GitCommitInfo,  # noqa: F401
+        GitRefInfo,  # noqa: F401
+        GitRefs,  # noqa: F401
+        HfApi,  # noqa: F401
+        ModelInfo,  # noqa: F401
+        RepoUrl,  # noqa: F401
+        SpaceInfo,  # noqa: F401
+        User,  # noqa: F401
+        UserLikes,  # noqa: F401
+        WebhookInfo,  # noqa: F401
+        WebhookWatchedItem,  # noqa: F401
+        accept_access_request,  # noqa: F401
+        add_collection_item,  # noqa: F401
+        add_space_secret,  # noqa: F401
+        add_space_variable,  # noqa: F401
+        auth_check,  # noqa: F401
+        cancel_access_request,  # noqa: F401
+        change_discussion_status,  # noqa: F401
+        comment_discussion,  # noqa: F401
+        create_branch,  # noqa: F401
+        create_collection,  # noqa: F401
+        create_commit,  # noqa: F401
+        create_commits_on_pr,  # noqa: F401
+        create_discussion,  # noqa: F401
+        create_inference_endpoint,  # noqa: F401
+        create_pull_request,  # noqa: F401
+        create_repo,  # noqa: F401
+        create_tag,  # noqa: F401
+        create_webhook,  # noqa: F401
+        dataset_info,  # noqa: F401
+        delete_branch,  # noqa: F401
+        delete_collection,  # noqa: F401
+        delete_collection_item,  # noqa: F401
+        delete_file,  # noqa: F401
+        delete_folder,  # noqa: F401
+        delete_inference_endpoint,  # noqa: F401
+        delete_repo,  # noqa: F401
+        delete_space_secret,  # noqa: F401
+        delete_space_storage,  # noqa: F401
+        delete_space_variable,  # noqa: F401
+        delete_tag,  # noqa: F401
+        delete_webhook,  # noqa: F401
+        disable_webhook,  # noqa: F401
+        duplicate_space,  # noqa: F401
+        edit_discussion_comment,  # noqa: F401
+        enable_webhook,  # noqa: F401
+        file_exists,  # noqa: F401
+        get_collection,  # noqa: F401
+        get_dataset_tags,  # noqa: F401
+        get_discussion_details,  # noqa: F401
+        get_full_repo_name,  # noqa: F401
+        get_inference_endpoint,  # noqa: F401
+        get_model_tags,  # noqa: F401
+        get_paths_info,  # noqa: F401
+        get_repo_discussions,  # noqa: F401
+        get_safetensors_metadata,  # noqa: F401
+        get_space_runtime,  # noqa: F401
+        get_space_variables,  # noqa: F401
+        get_token_permission,  # noqa: F401
+        get_user_overview,  # noqa: F401
+        get_webhook,  # noqa: F401
+        grant_access,  # noqa: F401
+        like,  # noqa: F401
+        list_accepted_access_requests,  # noqa: F401
+        list_collections,  # noqa: F401
+        list_datasets,  # noqa: F401
+        list_inference_endpoints,  # noqa: F401
+        list_liked_repos,  # noqa: F401
+        list_metrics,  # noqa: F401
+        list_models,  # noqa: F401
+        list_organization_members,  # noqa: F401
+        list_papers,  # noqa: F401
+        list_pending_access_requests,  # noqa: F401
+        list_rejected_access_requests,  # noqa: F401
+        list_repo_commits,  # noqa: F401
+        list_repo_files,  # noqa: F401
+        list_repo_likers,  # noqa: F401
+        list_repo_refs,  # noqa: F401
+        list_repo_tree,  # noqa: F401
+        list_spaces,  # noqa: F401
+        list_user_followers,  # noqa: F401
+        list_user_following,  # noqa: F401
+        list_webhooks,  # noqa: F401
+        merge_pull_request,  # noqa: F401
+        model_info,  # noqa: F401
+        move_repo,  # noqa: F401
+        paper_info,  # noqa: F401
+        parse_safetensors_file_metadata,  # noqa: F401
+        pause_inference_endpoint,  # noqa: F401
+        pause_space,  # noqa: F401
+        preupload_lfs_files,  # noqa: F401
+        reject_access_request,  # noqa: F401
+        rename_discussion,  # noqa: F401
+        repo_exists,  # noqa: F401
+        repo_info,  # noqa: F401
+        repo_type_and_id_from_hf_id,  # noqa: F401
+        request_space_hardware,  # noqa: F401
+        request_space_storage,  # noqa: F401
+        restart_space,  # noqa: F401
+        resume_inference_endpoint,  # noqa: F401
+        revision_exists,  # noqa: F401
+        run_as_future,  # noqa: F401
+        scale_to_zero_inference_endpoint,  # noqa: F401
+        set_space_sleep_time,  # noqa: F401
+        space_info,  # noqa: F401
+        super_squash_history,  # noqa: F401
+        unlike,  # noqa: F401
+        update_collection_item,  # noqa: F401
+        update_collection_metadata,  # noqa: F401
+        update_inference_endpoint,  # noqa: F401
+        update_repo_settings,  # noqa: F401
+        update_repo_visibility,  # noqa: F401
+        update_webhook,  # noqa: F401
+        upload_file,  # noqa: F401
+        upload_folder,  # noqa: F401
+        upload_large_folder,  # noqa: F401
+        whoami,  # noqa: F401
+    )
+    from .hf_file_system import (
+        HfFileSystem,  # noqa: F401
+        HfFileSystemFile,  # noqa: F401
+        HfFileSystemResolvedPath,  # noqa: F401
+        HfFileSystemStreamFile,  # noqa: F401
+    )
+    from .hub_mixin import (
+        ModelHubMixin,  # noqa: F401
+        PyTorchModelHubMixin,  # noqa: F401
+    )
+    from .inference._client import (
+        InferenceClient,  # noqa: F401
+        InferenceTimeoutError,  # noqa: F401
+    )
+    from .inference._generated._async_client import AsyncInferenceClient  # noqa: F401
+    from .inference._generated.types import (
+        AudioClassificationInput,  # noqa: F401
+        AudioClassificationOutputElement,  # noqa: F401
+        AudioClassificationOutputTransform,  # noqa: F401
+        AudioClassificationParameters,  # noqa: F401
+        AudioToAudioInput,  # noqa: F401
+        AudioToAudioOutputElement,  # noqa: F401
+        AutomaticSpeechRecognitionEarlyStoppingEnum,  # noqa: F401
+        AutomaticSpeechRecognitionGenerationParameters,  # noqa: F401
+        AutomaticSpeechRecognitionInput,  # noqa: F401
+        AutomaticSpeechRecognitionOutput,  # noqa: F401
+        AutomaticSpeechRecognitionOutputChunk,  # noqa: F401
+        AutomaticSpeechRecognitionParameters,  # noqa: F401
+        ChatCompletionInput,  # noqa: F401
+        ChatCompletionInputFunctionDefinition,  # noqa: F401
+        ChatCompletionInputFunctionName,  # noqa: F401
+        ChatCompletionInputGrammarType,  # noqa: F401
+        ChatCompletionInputMessage,  # noqa: F401
+        ChatCompletionInputMessageChunk,  # noqa: F401
+        ChatCompletionInputStreamOptions,  # noqa: F401
+        ChatCompletionInputToolType,  # noqa: F401
+        ChatCompletionInputURL,  # noqa: F401
+        ChatCompletionOutput,  # noqa: F401
+        ChatCompletionOutputComplete,  # noqa: F401
+        ChatCompletionOutputFunctionDefinition,  # noqa: F401
+        ChatCompletionOutputLogprob,  # noqa: F401
+        ChatCompletionOutputLogprobs,  # noqa: F401
+        ChatCompletionOutputMessage,  # noqa: F401
+        ChatCompletionOutputToolCall,  # noqa: F401
+        ChatCompletionOutputTopLogprob,  # noqa: F401
+        ChatCompletionOutputUsage,  # noqa: F401
+        ChatCompletionStreamOutput,  # noqa: F401
+        ChatCompletionStreamOutputChoice,  # noqa: F401
+        ChatCompletionStreamOutputDelta,  # noqa: F401
+        ChatCompletionStreamOutputDeltaToolCall,  # noqa: F401
+        ChatCompletionStreamOutputFunction,  # noqa: F401
+        ChatCompletionStreamOutputLogprob,  # noqa: F401
+        ChatCompletionStreamOutputLogprobs,  # noqa: F401
+        ChatCompletionStreamOutputTopLogprob,  # noqa: F401
+        ChatCompletionStreamOutputUsage,  # noqa: F401
+        DepthEstimationInput,  # noqa: F401
+        DepthEstimationOutput,  # noqa: F401
+        DocumentQuestionAnsweringInput,  # noqa: F401
+        DocumentQuestionAnsweringInputData,  # noqa: F401
+        DocumentQuestionAnsweringOutputElement,  # noqa: F401
+        DocumentQuestionAnsweringParameters,  # noqa: F401
+        FeatureExtractionInput,  # noqa: F401
+        FillMaskInput,  # noqa: F401
+        FillMaskOutputElement,  # noqa: F401
+        FillMaskParameters,  # noqa: F401
+        ImageClassificationInput,  # noqa: F401
+        ImageClassificationOutputElement,  # noqa: F401
+        ImageClassificationOutputTransform,  # noqa: F401
+        ImageClassificationParameters,  # noqa: F401
+        ImageSegmentationInput,  # noqa: F401
+        ImageSegmentationOutputElement,  # noqa: F401
+        ImageSegmentationParameters,  # noqa: F401
+        ImageToImageInput,  # noqa: F401
+        ImageToImageOutput,  # noqa: F401
+        ImageToImageParameters,  # noqa: F401
+        ImageToImageTargetSize,  # noqa: F401
+        ImageToTextEarlyStoppingEnum,  # noqa: F401
+        ImageToTextGenerationParameters,  # noqa: F401
+        ImageToTextInput,  # noqa: F401
+        ImageToTextOutput,  # noqa: F401
+        ImageToTextParameters,  # noqa: F401
+        ObjectDetectionBoundingBox,  # noqa: F401
+        ObjectDetectionInput,  # noqa: F401
+        ObjectDetectionOutputElement,  # noqa: F401
+        ObjectDetectionParameters,  # noqa: F401
+        QuestionAnsweringInput,  # noqa: F401
+        QuestionAnsweringInputData,  # noqa: F401
+        QuestionAnsweringOutputElement,  # noqa: F401
+        QuestionAnsweringParameters,  # noqa: F401
+        SentenceSimilarityInput,  # noqa: F401
+        SentenceSimilarityInputData,  # noqa: F401
+        SummarizationInput,  # noqa: F401
+        SummarizationOutput,  # noqa: F401
+        SummarizationParameters,  # noqa: F401
+        TableQuestionAnsweringInput,  # noqa: F401
+        TableQuestionAnsweringInputData,  # noqa: F401
+        TableQuestionAnsweringOutputElement,  # noqa: F401
+        Text2TextGenerationInput,  # noqa: F401
+        Text2TextGenerationOutput,  # noqa: F401
+        Text2TextGenerationParameters,  # noqa: F401
+        TextClassificationInput,  # noqa: F401
+        TextClassificationOutputElement,  # noqa: F401
+        TextClassificationOutputTransform,  # noqa: F401
+        TextClassificationParameters,  # noqa: F401
+        TextGenerationInput,  # noqa: F401
+        TextGenerationInputGenerateParameters,  # noqa: F401
+        TextGenerationInputGrammarType,  # noqa: F401
+        TextGenerationOutput,  # noqa: F401
+        TextGenerationOutputBestOfSequence,  # noqa: F401
+        TextGenerationOutputDetails,  # noqa: F401
+        TextGenerationOutputPrefillToken,  # noqa: F401
+        TextGenerationOutputToken,  # noqa: F401
+        TextGenerationStreamOutput,  # noqa: F401
+        TextGenerationStreamOutputStreamDetails,  # noqa: F401
+        TextGenerationStreamOutputToken,  # noqa: F401
+        TextToAudioEarlyStoppingEnum,  # noqa: F401
+        TextToAudioGenerationParameters,  # noqa: F401
+        TextToAudioInput,  # noqa: F401
+        TextToAudioOutput,  # noqa: F401
+        TextToAudioParameters,  # noqa: F401
+        TextToImageInput,  # noqa: F401
+        TextToImageOutput,  # noqa: F401
+        TextToImageParameters,  # noqa: F401
+        TextToImageTargetSize,  # noqa: F401
+        TextToSpeechEarlyStoppingEnum,  # noqa: F401
+        TextToSpeechGenerationParameters,  # noqa: F401
+        TextToSpeechInput,  # noqa: F401
+        TextToSpeechOutput,  # noqa: F401
+        TextToSpeechParameters,  # noqa: F401
+        TokenClassificationInput,  # noqa: F401
+        TokenClassificationOutputElement,  # noqa: F401
+        TokenClassificationParameters,  # noqa: F401
+        ToolElement,  # noqa: F401
+        TranslationInput,  # noqa: F401
+        TranslationOutput,  # noqa: F401
+        TranslationParameters,  # noqa: F401
+        VideoClassificationInput,  # noqa: F401
+        VideoClassificationOutputElement,  # noqa: F401
+        VideoClassificationOutputTransform,  # noqa: F401
+        VideoClassificationParameters,  # noqa: F401
+        VisualQuestionAnsweringInput,  # noqa: F401
+        VisualQuestionAnsweringInputData,  # noqa: F401
+        VisualQuestionAnsweringOutputElement,  # noqa: F401
+        VisualQuestionAnsweringParameters,  # noqa: F401
+        ZeroShotClassificationInput,  # noqa: F401
+        ZeroShotClassificationInputData,  # noqa: F401
+        ZeroShotClassificationOutputElement,  # noqa: F401
+        ZeroShotClassificationParameters,  # noqa: F401
+        ZeroShotImageClassificationInput,  # noqa: F401
+        ZeroShotImageClassificationInputData,  # noqa: F401
+        ZeroShotImageClassificationOutputElement,  # noqa: F401
+        ZeroShotImageClassificationParameters,  # noqa: F401
+        ZeroShotObjectDetectionBoundingBox,  # noqa: F401
+        ZeroShotObjectDetectionInput,  # noqa: F401
+        ZeroShotObjectDetectionInputData,  # noqa: F401
+        ZeroShotObjectDetectionOutputElement,  # noqa: F401
+    )
+    from .inference_api import InferenceApi  # noqa: F401
+    from .keras_mixin import (
+        KerasModelHubMixin,  # noqa: F401
+        from_pretrained_keras,  # noqa: F401
+        push_to_hub_keras,  # noqa: F401
+        save_pretrained_keras,  # noqa: F401
+    )
+    from .repocard import (
+        DatasetCard,  # noqa: F401
+        ModelCard,  # noqa: F401
+        RepoCard,  # noqa: F401
+        SpaceCard,  # noqa: F401
+        metadata_eval_result,  # noqa: F401
+        metadata_load,  # noqa: F401
+        metadata_save,  # noqa: F401
+        metadata_update,  # noqa: F401
+    )
+    from .repocard_data import (
+        CardData,  # noqa: F401
+        DatasetCardData,  # noqa: F401
+        EvalResult,  # noqa: F401
+        ModelCardData,  # noqa: F401
+        SpaceCardData,  # noqa: F401
+    )
+    from .repository import Repository  # noqa: F401
+    from .serialization import (
+        StateDictSplit,  # noqa: F401
+        get_tf_storage_size,  # noqa: F401
+        get_torch_storage_id,  # noqa: F401
+        get_torch_storage_size,  # noqa: F401
+        save_torch_model,  # noqa: F401
+        save_torch_state_dict,  # noqa: F401
+        split_state_dict_into_shards_factory,  # noqa: F401
+        split_tf_state_dict_into_shards,  # noqa: F401
+        split_torch_state_dict_into_shards,  # noqa: F401
+    )
+    from .utils import (
+        CachedFileInfo,  # noqa: F401
+        CachedRepoInfo,  # noqa: F401
+        CachedRevisionInfo,  # noqa: F401
+        CacheNotFound,  # noqa: F401
+        CorruptedCacheException,  # noqa: F401
+        DeleteCacheStrategy,  # noqa: F401
+        HFCacheInfo,  # noqa: F401
+        HfFolder,  # noqa: F401
+        cached_assets_path,  # noqa: F401
+        configure_http_backend,  # noqa: F401
+        dump_environment_info,  # noqa: F401
+        get_session,  # noqa: F401
+        get_token,  # noqa: F401
+        logging,  # noqa: F401
+        scan_cache_dir,  # noqa: F401
+    )