Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .venv/Lib/site-packages/fsspec-2024.2.0.dist-info/INSTALLER +1 -0
- .venv/Lib/site-packages/fsspec-2024.2.0.dist-info/LICENSE +29 -0
- .venv/Lib/site-packages/fsspec-2024.2.0.dist-info/METADATA +167 -0
- .venv/Lib/site-packages/fsspec-2024.2.0.dist-info/RECORD +56 -0
- .venv/Lib/site-packages/fsspec-2024.2.0.dist-info/REQUESTED +0 -0
- .venv/Lib/site-packages/fsspec-2024.2.0.dist-info/WHEEL +5 -0
- .venv/Lib/site-packages/fsspec-2024.2.0.dist-info/top_level.txt +1 -0
- .venv/Lib/site-packages/fsspec/implementations/__init__.py +0 -0
- .venv/Lib/site-packages/fsspec/implementations/jupyter.py +124 -0
- .venv/Lib/site-packages/fsspec/implementations/libarchive.py +213 -0
- .venv/Lib/site-packages/fsspec/implementations/local.py +418 -0
- .venv/Lib/site-packages/fsspec/implementations/memory.py +292 -0
- .venv/Lib/site-packages/fsspec/implementations/reference.py +1160 -0
- .venv/Lib/site-packages/fsspec/implementations/sftp.py +180 -0
- .venv/Lib/site-packages/fsspec/implementations/smb.py +324 -0
- .venv/Lib/site-packages/fsspec/implementations/tar.py +124 -0
- .venv/Lib/site-packages/fsspec/implementations/webhdfs.py +486 -0
- .venv/Lib/site-packages/fsspec/implementations/zip.py +133 -0
- .venv/Lib/site-packages/fsspec/tests/abstract/__init__.py +287 -0
- .venv/Lib/site-packages/fsspec/tests/abstract/common.py +175 -0
- .venv/Lib/site-packages/fsspec/tests/abstract/copy.py +557 -0
- .venv/Lib/site-packages/fsspec/tests/abstract/get.py +587 -0
- .venv/Lib/site-packages/fsspec/tests/abstract/put.py +591 -0
- .venv/Lib/site-packages/fugashi-1.4.0.dist-info/INSTALLER +1 -0
- .venv/Lib/site-packages/fugashi-1.4.0.dist-info/LICENSE +21 -0
- .venv/Lib/site-packages/fugashi-1.4.0.dist-info/LICENSE.mecab +29 -0
- .venv/Lib/site-packages/fugashi-1.4.0.dist-info/METADATA +157 -0
- .venv/Lib/site-packages/fugashi-1.4.0.dist-info/RECORD +16 -0
- .venv/Lib/site-packages/fugashi-1.4.0.dist-info/REQUESTED +0 -0
- .venv/Lib/site-packages/fugashi-1.4.0.dist-info/WHEEL +5 -0
- .venv/Lib/site-packages/fugashi-1.4.0.dist-info/entry_points.txt +4 -0
- .venv/Lib/site-packages/fugashi-1.4.0.dist-info/top_level.txt +1 -0
- .venv/Lib/site-packages/fugashi/__init__.py +2 -0
- .venv/Lib/site-packages/fugashi/__pycache__/__init__.cpython-39.pyc +0 -0
- .venv/Lib/site-packages/fugashi/cli.py +47 -0
- .venv/Lib/site-packages/fugashi/fugashi.cp39-win_amd64.pyd +0 -0
- .venv/Lib/site-packages/functorch/_C.cp39-win_amd64.pyd +0 -0
- .venv/Lib/site-packages/functorch/__init__.py +39 -0
- .venv/Lib/site-packages/functorch/_src/make_functional/__init__.py +4 -0
- .venv/Lib/site-packages/functorch/_src/vmap/__init__.py +16 -0
- .venv/Lib/site-packages/functorch/compile/__init__.py +30 -0
- .venv/Lib/site-packages/functorch/dim/batch_tensor.py +26 -0
- .venv/Lib/site-packages/functorch/dim/delayed_mul_tensor.py +77 -0
- .venv/Lib/site-packages/functorch/dim/dim.py +121 -0
- .venv/Lib/site-packages/functorch/dim/magic_trace.py +42 -0
- .venv/Lib/site-packages/functorch/dim/op_properties.py +312 -0
- .venv/Lib/site-packages/functorch/dim/reference.py +645 -0
- .venv/Lib/site-packages/functorch/dim/tree_map.py +15 -0
- .venv/Lib/site-packages/functorch/dim/wrap_type.py +72 -0
- .venv/Lib/site-packages/huggingface_hub/__init__.py +1002 -0
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/INSTALLER
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
uv
|
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/LICENSE
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BSD 3-Clause License
|
2 |
+
|
3 |
+
Copyright (c) 2018, Martin Durant
|
4 |
+
All rights reserved.
|
5 |
+
|
6 |
+
Redistribution and use in source and binary forms, with or without
|
7 |
+
modification, are permitted provided that the following conditions are met:
|
8 |
+
|
9 |
+
* Redistributions of source code must retain the above copyright notice, this
|
10 |
+
list of conditions and the following disclaimer.
|
11 |
+
|
12 |
+
* Redistributions in binary form must reproduce the above copyright notice,
|
13 |
+
this list of conditions and the following disclaimer in the documentation
|
14 |
+
and/or other materials provided with the distribution.
|
15 |
+
|
16 |
+
* Neither the name of the copyright holder nor the names of its
|
17 |
+
contributors may be used to endorse or promote products derived from
|
18 |
+
this software without specific prior written permission.
|
19 |
+
|
20 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
21 |
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
22 |
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
23 |
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
24 |
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
25 |
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
26 |
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
27 |
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
28 |
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
29 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/METADATA
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Metadata-Version: 2.1
|
2 |
+
Name: fsspec
|
3 |
+
Version: 2024.2.0
|
4 |
+
Summary: File-system specification
|
5 |
+
Home-page: https://github.com/fsspec/filesystem_spec
|
6 |
+
Maintainer: Martin Durant
|
7 |
+
Maintainer-email: [email protected]
|
8 |
+
License: BSD
|
9 |
+
Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
|
10 |
+
Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
|
11 |
+
Keywords: file
|
12 |
+
Classifier: Development Status :: 4 - Beta
|
13 |
+
Classifier: Intended Audience :: Developers
|
14 |
+
Classifier: License :: OSI Approved :: BSD License
|
15 |
+
Classifier: Operating System :: OS Independent
|
16 |
+
Classifier: Programming Language :: Python :: 3.8
|
17 |
+
Classifier: Programming Language :: Python :: 3.9
|
18 |
+
Classifier: Programming Language :: Python :: 3.10
|
19 |
+
Classifier: Programming Language :: Python :: 3.11
|
20 |
+
Requires-Python: >=3.8
|
21 |
+
Description-Content-Type: text/markdown
|
22 |
+
License-File: LICENSE
|
23 |
+
Provides-Extra: abfs
|
24 |
+
Requires-Dist: adlfs ; extra == 'abfs'
|
25 |
+
Provides-Extra: adl
|
26 |
+
Requires-Dist: adlfs ; extra == 'adl'
|
27 |
+
Provides-Extra: arrow
|
28 |
+
Requires-Dist: pyarrow >=1 ; extra == 'arrow'
|
29 |
+
Provides-Extra: dask
|
30 |
+
Requires-Dist: dask ; extra == 'dask'
|
31 |
+
Requires-Dist: distributed ; extra == 'dask'
|
32 |
+
Provides-Extra: devel
|
33 |
+
Requires-Dist: pytest ; extra == 'devel'
|
34 |
+
Requires-Dist: pytest-cov ; extra == 'devel'
|
35 |
+
Provides-Extra: dropbox
|
36 |
+
Requires-Dist: dropboxdrivefs ; extra == 'dropbox'
|
37 |
+
Requires-Dist: requests ; extra == 'dropbox'
|
38 |
+
Requires-Dist: dropbox ; extra == 'dropbox'
|
39 |
+
Provides-Extra: entrypoints
|
40 |
+
Provides-Extra: full
|
41 |
+
Requires-Dist: adlfs ; extra == 'full'
|
42 |
+
Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'full'
|
43 |
+
Requires-Dist: dask ; extra == 'full'
|
44 |
+
Requires-Dist: distributed ; extra == 'full'
|
45 |
+
Requires-Dist: dropbox ; extra == 'full'
|
46 |
+
Requires-Dist: dropboxdrivefs ; extra == 'full'
|
47 |
+
Requires-Dist: fusepy ; extra == 'full'
|
48 |
+
Requires-Dist: gcsfs ; extra == 'full'
|
49 |
+
Requires-Dist: libarchive-c ; extra == 'full'
|
50 |
+
Requires-Dist: ocifs ; extra == 'full'
|
51 |
+
Requires-Dist: panel ; extra == 'full'
|
52 |
+
Requires-Dist: paramiko ; extra == 'full'
|
53 |
+
Requires-Dist: pyarrow >=1 ; extra == 'full'
|
54 |
+
Requires-Dist: pygit2 ; extra == 'full'
|
55 |
+
Requires-Dist: requests ; extra == 'full'
|
56 |
+
Requires-Dist: s3fs ; extra == 'full'
|
57 |
+
Requires-Dist: smbprotocol ; extra == 'full'
|
58 |
+
Requires-Dist: tqdm ; extra == 'full'
|
59 |
+
Provides-Extra: fuse
|
60 |
+
Requires-Dist: fusepy ; extra == 'fuse'
|
61 |
+
Provides-Extra: gcs
|
62 |
+
Requires-Dist: gcsfs ; extra == 'gcs'
|
63 |
+
Provides-Extra: git
|
64 |
+
Requires-Dist: pygit2 ; extra == 'git'
|
65 |
+
Provides-Extra: github
|
66 |
+
Requires-Dist: requests ; extra == 'github'
|
67 |
+
Provides-Extra: gs
|
68 |
+
Requires-Dist: gcsfs ; extra == 'gs'
|
69 |
+
Provides-Extra: gui
|
70 |
+
Requires-Dist: panel ; extra == 'gui'
|
71 |
+
Provides-Extra: hdfs
|
72 |
+
Requires-Dist: pyarrow >=1 ; extra == 'hdfs'
|
73 |
+
Provides-Extra: http
|
74 |
+
Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'http'
|
75 |
+
Provides-Extra: libarchive
|
76 |
+
Requires-Dist: libarchive-c ; extra == 'libarchive'
|
77 |
+
Provides-Extra: oci
|
78 |
+
Requires-Dist: ocifs ; extra == 'oci'
|
79 |
+
Provides-Extra: s3
|
80 |
+
Requires-Dist: s3fs ; extra == 's3'
|
81 |
+
Provides-Extra: sftp
|
82 |
+
Requires-Dist: paramiko ; extra == 'sftp'
|
83 |
+
Provides-Extra: smb
|
84 |
+
Requires-Dist: smbprotocol ; extra == 'smb'
|
85 |
+
Provides-Extra: ssh
|
86 |
+
Requires-Dist: paramiko ; extra == 'ssh'
|
87 |
+
Provides-Extra: tqdm
|
88 |
+
Requires-Dist: tqdm ; extra == 'tqdm'
|
89 |
+
|
90 |
+
# filesystem_spec
|
91 |
+
|
92 |
+
[](https://pypi.python.org/pypi/fsspec/)
|
93 |
+
[](https://anaconda.org/conda-forge/fsspec)
|
94 |
+

|
95 |
+
[](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
|
96 |
+
[](https://pepy.tech/project/fsspec)
|
97 |
+
|
98 |
+
A specification for pythonic filesystems.
|
99 |
+
|
100 |
+
## Install
|
101 |
+
|
102 |
+
```bash
|
103 |
+
pip install fsspec
|
104 |
+
```
|
105 |
+
|
106 |
+
would install the base fsspec. Various optionally supported features might require specification of custom
|
107 |
+
extra require, e.g. `pip install fsspec[ssh]` will install dependencies for `ssh` backends support.
|
108 |
+
Use `pip install fsspec[full]` for installation of all known extra dependencies.
|
109 |
+
|
110 |
+
Up-to-date package also provided through conda-forge distribution:
|
111 |
+
|
112 |
+
```bash
|
113 |
+
conda install -c conda-forge fsspec
|
114 |
+
```
|
115 |
+
|
116 |
+
|
117 |
+
## Purpose
|
118 |
+
|
119 |
+
To produce a template or specification for a file-system interface, that specific implementations should follow,
|
120 |
+
so that applications making use of them can rely on a common behaviour and not have to worry about the specific
|
121 |
+
internal implementation decisions with any given backend. Many such implementations are included in this package,
|
122 |
+
or in sister projects such as `s3fs` and `gcsfs`.
|
123 |
+
|
124 |
+
In addition, if this is well-designed, then additional functionality, such as a key-value store or FUSE
|
125 |
+
mounting of the file-system implementation may be available for all implementations "for free".
|
126 |
+
|
127 |
+
## Documentation
|
128 |
+
|
129 |
+
Please refer to [RTD](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
|
130 |
+
|
131 |
+
## Develop
|
132 |
+
|
133 |
+
fsspec uses GitHub Actions for CI. Environment files can be found
|
134 |
+
in the "ci/" directory. Note that the main environment is called "py38",
|
135 |
+
but it is expected that the version of python installed be adjustable at
|
136 |
+
CI runtime. For local use, pick a version suitable for you.
|
137 |
+
|
138 |
+
### Testing
|
139 |
+
|
140 |
+
Tests can be run in the dev environment, if activated, via ``pytest fsspec``.
|
141 |
+
|
142 |
+
The full fsspec suite requires a system-level docker, docker-compose, and fuse
|
143 |
+
installation. If only making changes to one backend implementation, it is
|
144 |
+
not generally necessary to run all tests locally.
|
145 |
+
|
146 |
+
It is expected that contributors ensure that any change to fsspec does not
|
147 |
+
cause issues or regressions for either other fsspec-related packages such
|
148 |
+
as gcsfs and s3fs, nor for downstream users of fsspec. The "downstream" CI
|
149 |
+
run and corresponding environment file run a set of tests from the dask
|
150 |
+
test suite, and very minimal tests against pandas and zarr from the
|
151 |
+
test_downstream.py module in this repo.
|
152 |
+
|
153 |
+
### Code Formatting
|
154 |
+
|
155 |
+
fsspec uses [Black](https://black.readthedocs.io/en/stable) to ensure
|
156 |
+
a consistent code format throughout the project.
|
157 |
+
Run ``black fsspec`` from the root of the filesystem_spec repository to
|
158 |
+
auto-format your code. Additionally, many editors have plugins that will apply
|
159 |
+
``black`` as you edit files. ``black`` is included in the ``tox`` environments.
|
160 |
+
|
161 |
+
Optionally, you may wish to setup [pre-commit hooks](https://pre-commit.com) to
|
162 |
+
automatically run ``black`` when you make a git commit.
|
163 |
+
Run ``pre-commit install --install-hooks`` from the root of the
|
164 |
+
filesystem_spec repository to setup pre-commit hooks. ``black`` will now be run
|
165 |
+
before you commit, reformatting any changed files. You can format without
|
166 |
+
committing via ``pre-commit run`` or skip these checks with ``git commit
|
167 |
+
--no-verify``.
|
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/RECORD
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fsspec-2024.2.0.dist-info/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
|
2 |
+
fsspec-2024.2.0.dist-info/METADATA,sha256=uwzW1Braxnd_QGVI8W6J0KHi5KTiTJEm8YzSUdG-_Dc,6786
|
3 |
+
fsspec-2024.2.0.dist-info/RECORD,,
|
4 |
+
fsspec-2024.2.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
5 |
+
fsspec-2024.2.0.dist-info/top_level.txt,sha256=blt2pDrQDwN3Gklcw13CSPLQRd6aaOgJ8AxqrW395MI,7
|
6 |
+
fsspec-2024.2.0.dist-info\INSTALLER,sha256=5hhM4Q4mYTT9z6QB6PGpUAW81PGNFrYrdXMj4oM_6ak,2
|
7 |
+
fsspec-2024.2.0.dist-info\REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8 |
+
fsspec/__init__.py,sha256=2kT62GfFK-AjgS-LgwSsCo_VA2IePvsyv8Ash5oiaFA,1982
|
9 |
+
fsspec/_version.py,sha256=onTKKWe4fXkBjQxbTwM82SUT0H3x4U17IYrciFAryaU,500
|
10 |
+
fsspec/archive.py,sha256=S__DzfZj-urAN3tp2W6jJ6YDiXG1fAl7FjvWUN73qIE,2386
|
11 |
+
fsspec/asyn.py,sha256=kJ45sFFya2lZsmu2v8CVc8ZPRs8AccEzAy6Jot2ylkU,36157
|
12 |
+
fsspec/caching.py,sha256=N45pzJdD4w5FOX_sxGvHWirggPNB66JTGP1HH6fpSck,28781
|
13 |
+
fsspec/callbacks.py,sha256=BDIwLzK6rr_0V5ch557fSzsivCElpdqhXr5dZ9Te-EE,9210
|
14 |
+
fsspec/compression.py,sha256=Yyd8FXw2rwWRtVoRVah_yguv-J7BUcBo4yDu6Qt52a0,4859
|
15 |
+
fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
|
16 |
+
fsspec/conftest.py,sha256=fVfx-NLrH_OZS1TIpYNoPzM7efEcMoL62reHOdYeFCA,1245
|
17 |
+
fsspec/core.py,sha256=0yCj1Z5MhbSDIQiqFs49VORl9QaGwV6hp9bXdkIoPIo,22363
|
18 |
+
fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
|
19 |
+
fsspec/exceptions.py,sha256=xcS7LiRrQ748kvOB9mrUR14kpjNztrHgEkZWi9M-VaI,330
|
20 |
+
fsspec/fuse.py,sha256=66amOa6wdIbS0DMhhfAPUoOB37HPorfXD1izV0prmTY,10145
|
21 |
+
fsspec/generic.py,sha256=NuNaP66OaphwMbuLHRFBLda78TD81isa9O4ozJqbUv0,13455
|
22 |
+
fsspec/gui.py,sha256=XKoXZpUhRE7jOhRCJH4-jRbKhVu56aS8h9tecvPD3nc,13932
|
23 |
+
fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
24 |
+
fsspec/implementations/arrow.py,sha256=_7TLuV6ZzNlpmUU_v6ud56u2wadzsKmY5qugPBxgMEs,8649
|
25 |
+
fsspec/implementations/cache_mapper.py,sha256=iHgBA6gjzDJ7_mBboHFzpLTf55HP3UEwUOZ43xyUK4M,2429
|
26 |
+
fsspec/implementations/cache_metadata.py,sha256=ZvyA7Y3KK-5Ct4E5pELzD6mH_5T03XqaKVT96qYDADU,8576
|
27 |
+
fsspec/implementations/cached.py,sha256=LbbPbeUup07O0y7gXD_atFgajWM9p1vlDKu_BOyLfbo,30943
|
28 |
+
fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
|
29 |
+
fsspec/implementations/data.py,sha256=Oti0dKzyeadnVIedo3s8CADoh9bNM-96_6viTEYr4lo,1245
|
30 |
+
fsspec/implementations/dbfs.py,sha256=cix9OYUveuSOx5UO5uRUwNUkYqjzyY0fkKnca1kTgZ0,15014
|
31 |
+
fsspec/implementations/dirfs.py,sha256=inDIRSDPhI1_ud1MMBFrpZQ11VIAMJ_dZQtbE4V08Ng,11384
|
32 |
+
fsspec/implementations/ftp.py,sha256=rp6cTog8xqjDPlKdSLKcsyP7K593_ByMabxGbNSEpTo,11655
|
33 |
+
fsspec/implementations/git.py,sha256=vKGI-Vd5q4H2RrvhebkPc9NwlfkZ980OUGhebeCw-M0,4034
|
34 |
+
fsspec/implementations/github.py,sha256=0kIiKkeAaROuHgdWBHVQFrzJ2ZfoDgymCehL_kJXHYA,7565
|
35 |
+
fsspec/implementations/http.py,sha256=PkhfgUV3-T7fG2Jf-NLX9doH52snV5Wmw91uVA9k74M,29454
|
36 |
+
fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
|
37 |
+
fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
|
38 |
+
fsspec/implementations/local.py,sha256=nxiRKg9FAQHTQss9-ET8ZzDXPGhSOktgkxrg0ffMs2I,13454
|
39 |
+
fsspec/implementations/memory.py,sha256=2iU--pOV2KCTrS-d5K8VKSygh9MPk2D7NZ_C8lMMEIw,9701
|
40 |
+
fsspec/implementations/reference.py,sha256=0iGu8mscaQ3a5iTlRNByytQ3_-1Bj8__ARqVwyy4q2M,43871
|
41 |
+
fsspec/implementations/sftp.py,sha256=fMY9XZcmpjszQ2tCqO_TPaJesaeD_Dv7ptYzgUPGoO0,5631
|
42 |
+
fsspec/implementations/smb.py,sha256=k3RtzW97lJtYuw_QpP1rJRFnUBmSsw9twFjUCex0a5U,10591
|
43 |
+
fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
|
44 |
+
fsspec/implementations/webhdfs.py,sha256=wqVfno7z0TY1HepaIvKTUUcl_bi5NkV6qWsST8t_s7Y,16745
|
45 |
+
fsspec/implementations/zip.py,sha256=JDX-3HOI15qUl6VTBsNPuDp5RVN6s2n3Bywd4mMu0T0,4347
|
46 |
+
fsspec/mapping.py,sha256=WFEXRWxujQwfzzkRP5tpdIE0265okAtlP97qFZGvV1k,8165
|
47 |
+
fsspec/parquet.py,sha256=qVxDhwc960SGOt5etcYAJxCr-7HQKP01687KpDR02Gw,19463
|
48 |
+
fsspec/registry.py,sha256=-dl7sh2tsfhMA2uxz5KQDsPFehQTgMJIbVjNq6QLoKU,11145
|
49 |
+
fsspec/spec.py,sha256=3t96RgizRN_slIuHXnuR0bXjVUfBS1TfuDrEua4oQvE,66277
|
50 |
+
fsspec/tests/abstract/__init__.py,sha256=i1wcFixV6QhOwdoB24c8oXjzobISNqiKVz9kl2DvAY8,10028
|
51 |
+
fsspec/tests/abstract/common.py,sha256=1GQwNo5AONzAnzZj0fWgn8NJPLXALehbsuGxS3FzWVU,4973
|
52 |
+
fsspec/tests/abstract/copy.py,sha256=gU5-d97U3RSde35Vp4RxPY4rWwL744HiSrJ8IBOp9-8,19967
|
53 |
+
fsspec/tests/abstract/get.py,sha256=vNR4HztvTR7Cj56AMo7_tx7TeYz1Jgr_2Wb8Lv-UiBY,20755
|
54 |
+
fsspec/tests/abstract/put.py,sha256=7aih17OKB_IZZh1Mkq1eBDIjobhtMQmI8x-Pw-S_aZk,21201
|
55 |
+
fsspec/transaction.py,sha256=jeexB-H6Aw_gN6Z7hoKKe6v8zizITq39-gyTgpipIKE,2251
|
56 |
+
fsspec/utils.py,sha256=_VX_0VwDtoAFSjMYrxvJvnPNX9FMoHO5BlFHXJ0bHFI,23053
|
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/REQUESTED
ADDED
File without changes
|
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/WHEEL
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Wheel-Version: 1.0
|
2 |
+
Generator: bdist_wheel (0.42.0)
|
3 |
+
Root-Is-Purelib: true
|
4 |
+
Tag: py3-none-any
|
5 |
+
|
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/top_level.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
fsspec
|
.venv/Lib/site-packages/fsspec/implementations/__init__.py
ADDED
File without changes
|
.venv/Lib/site-packages/fsspec/implementations/jupyter.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import io
|
3 |
+
import re
|
4 |
+
|
5 |
+
import requests
|
6 |
+
|
7 |
+
import fsspec
|
8 |
+
|
9 |
+
|
10 |
+
class JupyterFileSystem(fsspec.AbstractFileSystem):
|
11 |
+
"""View of the files as seen by a Jupyter server (notebook or lab)"""
|
12 |
+
|
13 |
+
protocol = ("jupyter", "jlab")
|
14 |
+
|
15 |
+
def __init__(self, url, tok=None, **kwargs):
|
16 |
+
"""
|
17 |
+
|
18 |
+
Parameters
|
19 |
+
----------
|
20 |
+
url : str
|
21 |
+
Base URL of the server, like "http://127.0.0.1:8888". May include
|
22 |
+
token in the string, which is given by the process when starting up
|
23 |
+
tok : str
|
24 |
+
If the token is obtained separately, can be given here
|
25 |
+
kwargs
|
26 |
+
"""
|
27 |
+
if "?" in url:
|
28 |
+
if tok is None:
|
29 |
+
try:
|
30 |
+
tok = re.findall("token=([a-z0-9]+)", url)[0]
|
31 |
+
except IndexError as e:
|
32 |
+
raise ValueError("Could not determine token") from e
|
33 |
+
url = url.split("?", 1)[0]
|
34 |
+
self.url = url.rstrip("/") + "/api/contents"
|
35 |
+
self.session = requests.Session()
|
36 |
+
if tok:
|
37 |
+
self.session.headers["Authorization"] = f"token {tok}"
|
38 |
+
|
39 |
+
super().__init__(**kwargs)
|
40 |
+
|
41 |
+
def ls(self, path, detail=True, **kwargs):
|
42 |
+
path = self._strip_protocol(path)
|
43 |
+
r = self.session.get(f"{self.url}/{path}")
|
44 |
+
if r.status_code == 404:
|
45 |
+
return FileNotFoundError(path)
|
46 |
+
r.raise_for_status()
|
47 |
+
out = r.json()
|
48 |
+
|
49 |
+
if out["type"] == "directory":
|
50 |
+
out = out["content"]
|
51 |
+
else:
|
52 |
+
out = [out]
|
53 |
+
for o in out:
|
54 |
+
o["name"] = o.pop("path")
|
55 |
+
o.pop("content")
|
56 |
+
if o["type"] == "notebook":
|
57 |
+
o["type"] = "file"
|
58 |
+
if detail:
|
59 |
+
return out
|
60 |
+
return [o["name"] for o in out]
|
61 |
+
|
62 |
+
def cat_file(self, path, start=None, end=None, **kwargs):
|
63 |
+
path = self._strip_protocol(path)
|
64 |
+
r = self.session.get(f"{self.url}/{path}")
|
65 |
+
if r.status_code == 404:
|
66 |
+
return FileNotFoundError(path)
|
67 |
+
r.raise_for_status()
|
68 |
+
out = r.json()
|
69 |
+
if out["format"] == "text":
|
70 |
+
# data should be binary
|
71 |
+
b = out["content"].encode()
|
72 |
+
else:
|
73 |
+
b = base64.b64decode(out["content"])
|
74 |
+
return b[start:end]
|
75 |
+
|
76 |
+
def pipe_file(self, path, value, **_):
|
77 |
+
path = self._strip_protocol(path)
|
78 |
+
json = {
|
79 |
+
"name": path.rsplit("/", 1)[-1],
|
80 |
+
"path": path,
|
81 |
+
"size": len(value),
|
82 |
+
"content": base64.b64encode(value).decode(),
|
83 |
+
"format": "base64",
|
84 |
+
"type": "file",
|
85 |
+
}
|
86 |
+
self.session.put(f"{self.url}/{path}", json=json)
|
87 |
+
|
88 |
+
def mkdir(self, path, create_parents=True, **kwargs):
|
89 |
+
path = self._strip_protocol(path)
|
90 |
+
if create_parents and "/" in path:
|
91 |
+
self.mkdir(path.rsplit("/", 1)[0], True)
|
92 |
+
json = {
|
93 |
+
"name": path.rsplit("/", 1)[-1],
|
94 |
+
"path": path,
|
95 |
+
"size": None,
|
96 |
+
"content": None,
|
97 |
+
"type": "directory",
|
98 |
+
}
|
99 |
+
self.session.put(f"{self.url}/{path}", json=json)
|
100 |
+
|
101 |
+
def _rm(self, path):
|
102 |
+
path = self._strip_protocol(path)
|
103 |
+
self.session.delete(f"{self.url}/{path}")
|
104 |
+
|
105 |
+
def _open(self, path, mode="rb", **kwargs):
|
106 |
+
path = self._strip_protocol(path)
|
107 |
+
if mode == "rb":
|
108 |
+
data = self.cat_file(path)
|
109 |
+
return io.BytesIO(data)
|
110 |
+
else:
|
111 |
+
return SimpleFileWriter(self, path, mode="wb")
|
112 |
+
|
113 |
+
|
114 |
+
class SimpleFileWriter(fsspec.spec.AbstractBufferedFile):
|
115 |
+
def _upload_chunk(self, final=False):
|
116 |
+
"""Never uploads a chunk until file is done
|
117 |
+
|
118 |
+
Not suitable for large files
|
119 |
+
"""
|
120 |
+
if final is False:
|
121 |
+
return False
|
122 |
+
self.buffer.seek(0)
|
123 |
+
data = self.buffer.read()
|
124 |
+
self.fs.pipe_file(self.path, data)
|
.venv/Lib/site-packages/fsspec/implementations/libarchive.py
ADDED
@@ -0,0 +1,213 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from contextlib import contextmanager
|
2 |
+
from ctypes import (
|
3 |
+
CFUNCTYPE,
|
4 |
+
POINTER,
|
5 |
+
c_int,
|
6 |
+
c_longlong,
|
7 |
+
c_void_p,
|
8 |
+
cast,
|
9 |
+
create_string_buffer,
|
10 |
+
)
|
11 |
+
|
12 |
+
import libarchive
|
13 |
+
import libarchive.ffi as ffi
|
14 |
+
|
15 |
+
from fsspec import open_files
|
16 |
+
from fsspec.archive import AbstractArchiveFileSystem
|
17 |
+
from fsspec.implementations.memory import MemoryFile
|
18 |
+
from fsspec.utils import DEFAULT_BLOCK_SIZE
|
19 |
+
|
20 |
+
# Libarchive requires seekable files or memory only for certain archive
|
21 |
+
# types. However, since we read the directory first to cache the contents
|
22 |
+
# and also allow random access to any file, the file-like object needs
|
23 |
+
# to be seekable no matter what.
|
24 |
+
|
25 |
+
# Seek call-backs (not provided in the libarchive python wrapper)
|
26 |
+
SEEK_CALLBACK = CFUNCTYPE(c_longlong, c_int, c_void_p, c_longlong, c_int)
|
27 |
+
read_set_seek_callback = ffi.ffi(
|
28 |
+
"read_set_seek_callback", [ffi.c_archive_p, SEEK_CALLBACK], c_int, ffi.check_int
|
29 |
+
)
|
30 |
+
new_api = hasattr(ffi, "NO_OPEN_CB")
|
31 |
+
|
32 |
+
|
33 |
+
@contextmanager
|
34 |
+
def custom_reader(file, format_name="all", filter_name="all", block_size=ffi.page_size):
|
35 |
+
"""Read an archive from a seekable file-like object.
|
36 |
+
|
37 |
+
The `file` object must support the standard `readinto` and 'seek' methods.
|
38 |
+
"""
|
39 |
+
buf = create_string_buffer(block_size)
|
40 |
+
buf_p = cast(buf, c_void_p)
|
41 |
+
|
42 |
+
def read_func(archive_p, context, ptrptr):
|
43 |
+
# readinto the buffer, returns number of bytes read
|
44 |
+
length = file.readinto(buf)
|
45 |
+
# write the address of the buffer into the pointer
|
46 |
+
ptrptr = cast(ptrptr, POINTER(c_void_p))
|
47 |
+
ptrptr[0] = buf_p
|
48 |
+
# tell libarchive how much data was written into the buffer
|
49 |
+
return length
|
50 |
+
|
51 |
+
def seek_func(archive_p, context, offset, whence):
|
52 |
+
file.seek(offset, whence)
|
53 |
+
# tell libarchvie the current position
|
54 |
+
return file.tell()
|
55 |
+
|
56 |
+
read_cb = ffi.READ_CALLBACK(read_func)
|
57 |
+
seek_cb = SEEK_CALLBACK(seek_func)
|
58 |
+
|
59 |
+
if new_api:
|
60 |
+
open_cb = ffi.NO_OPEN_CB
|
61 |
+
close_cb = ffi.NO_CLOSE_CB
|
62 |
+
else:
|
63 |
+
open_cb = libarchive.read.OPEN_CALLBACK(ffi.VOID_CB)
|
64 |
+
close_cb = libarchive.read.CLOSE_CALLBACK(ffi.VOID_CB)
|
65 |
+
|
66 |
+
with libarchive.read.new_archive_read(format_name, filter_name) as archive_p:
|
67 |
+
read_set_seek_callback(archive_p, seek_cb)
|
68 |
+
ffi.read_open(archive_p, None, open_cb, read_cb, close_cb)
|
69 |
+
yield libarchive.read.ArchiveRead(archive_p)
|
70 |
+
|
71 |
+
|
72 |
+
class LibArchiveFileSystem(AbstractArchiveFileSystem):
|
73 |
+
"""Compressed archives as a file-system (read-only)
|
74 |
+
|
75 |
+
Supports the following formats:
|
76 |
+
tar, pax , cpio, ISO9660, zip, mtree, shar, ar, raw, xar, lha/lzh, rar
|
77 |
+
Microsoft CAB, 7-Zip, WARC
|
78 |
+
|
79 |
+
See the libarchive documentation for further restrictions.
|
80 |
+
https://www.libarchive.org/
|
81 |
+
|
82 |
+
Keeps file object open while instance lives. It only works in seekable
|
83 |
+
file-like objects. In case the filesystem does not support this kind of
|
84 |
+
file object, it is recommended to cache locally.
|
85 |
+
|
86 |
+
This class is pickleable, but not necessarily thread-safe (depends on the
|
87 |
+
platform). See libarchive documentation for details.
|
88 |
+
"""
|
89 |
+
|
90 |
+
root_marker = ""
|
91 |
+
protocol = "libarchive"
|
92 |
+
cachable = False
|
93 |
+
|
94 |
+
def __init__(
|
95 |
+
self,
|
96 |
+
fo="",
|
97 |
+
mode="r",
|
98 |
+
target_protocol=None,
|
99 |
+
target_options=None,
|
100 |
+
block_size=DEFAULT_BLOCK_SIZE,
|
101 |
+
**kwargs,
|
102 |
+
):
|
103 |
+
"""
|
104 |
+
Parameters
|
105 |
+
----------
|
106 |
+
fo: str or file-like
|
107 |
+
Contains ZIP, and must exist. If a str, will fetch file using
|
108 |
+
:meth:`~fsspec.open_files`, which must return one file exactly.
|
109 |
+
mode: str
|
110 |
+
Currently, only 'r' accepted
|
111 |
+
target_protocol: str (optional)
|
112 |
+
If ``fo`` is a string, this value can be used to override the
|
113 |
+
FS protocol inferred from a URL
|
114 |
+
target_options: dict (optional)
|
115 |
+
Kwargs passed when instantiating the target FS, if ``fo`` is
|
116 |
+
a string.
|
117 |
+
"""
|
118 |
+
super().__init__(self, **kwargs)
|
119 |
+
if mode != "r":
|
120 |
+
raise ValueError("Only read from archive files accepted")
|
121 |
+
if isinstance(fo, str):
|
122 |
+
files = open_files(fo, protocol=target_protocol, **(target_options or {}))
|
123 |
+
if len(files) != 1:
|
124 |
+
raise ValueError(
|
125 |
+
f'Path "{fo}" did not resolve to exactly one file: "{files}"'
|
126 |
+
)
|
127 |
+
fo = files[0]
|
128 |
+
self.of = fo
|
129 |
+
self.fo = fo.__enter__() # the whole instance is a context
|
130 |
+
self.block_size = block_size
|
131 |
+
self.dir_cache = None
|
132 |
+
|
133 |
+
@contextmanager
|
134 |
+
def _open_archive(self):
|
135 |
+
self.fo.seek(0)
|
136 |
+
with custom_reader(self.fo, block_size=self.block_size) as arc:
|
137 |
+
yield arc
|
138 |
+
|
139 |
+
@classmethod
|
140 |
+
def _strip_protocol(cls, path):
|
141 |
+
# file paths are always relative to the archive root
|
142 |
+
return super()._strip_protocol(path).lstrip("/")
|
143 |
+
|
144 |
+
def _get_dirs(self):
|
145 |
+
fields = {
|
146 |
+
"name": "pathname",
|
147 |
+
"size": "size",
|
148 |
+
"created": "ctime",
|
149 |
+
"mode": "mode",
|
150 |
+
"uid": "uid",
|
151 |
+
"gid": "gid",
|
152 |
+
"mtime": "mtime",
|
153 |
+
}
|
154 |
+
|
155 |
+
if self.dir_cache is not None:
|
156 |
+
return
|
157 |
+
|
158 |
+
self.dir_cache = {}
|
159 |
+
list_names = []
|
160 |
+
with self._open_archive() as arc:
|
161 |
+
for entry in arc:
|
162 |
+
if not entry.isdir and not entry.isfile:
|
163 |
+
# Skip symbolic links, fifo entries, etc.
|
164 |
+
continue
|
165 |
+
self.dir_cache.update(
|
166 |
+
{
|
167 |
+
dirname: {"name": dirname, "size": 0, "type": "directory"}
|
168 |
+
for dirname in self._all_dirnames(set(entry.name))
|
169 |
+
}
|
170 |
+
)
|
171 |
+
f = {key: getattr(entry, fields[key]) for key in fields}
|
172 |
+
f["type"] = "directory" if entry.isdir else "file"
|
173 |
+
list_names.append(entry.name)
|
174 |
+
|
175 |
+
self.dir_cache[f["name"]] = f
|
176 |
+
# libarchive does not seem to return an entry for the directories (at least
|
177 |
+
# not in all formats), so get the directories names from the files names
|
178 |
+
self.dir_cache.update(
|
179 |
+
{
|
180 |
+
dirname: {"name": dirname, "size": 0, "type": "directory"}
|
181 |
+
for dirname in self._all_dirnames(list_names)
|
182 |
+
}
|
183 |
+
)
|
184 |
+
|
185 |
+
def _open(
|
186 |
+
self,
|
187 |
+
path,
|
188 |
+
mode="rb",
|
189 |
+
block_size=None,
|
190 |
+
autocommit=True,
|
191 |
+
cache_options=None,
|
192 |
+
**kwargs,
|
193 |
+
):
|
194 |
+
path = self._strip_protocol(path)
|
195 |
+
if mode != "rb":
|
196 |
+
raise NotImplementedError
|
197 |
+
|
198 |
+
data = bytes()
|
199 |
+
with self._open_archive() as arc:
|
200 |
+
for entry in arc:
|
201 |
+
if entry.pathname != path:
|
202 |
+
continue
|
203 |
+
|
204 |
+
if entry.size == 0:
|
205 |
+
# empty file, so there are no blocks
|
206 |
+
break
|
207 |
+
|
208 |
+
for block in entry.get_blocks(entry.size):
|
209 |
+
data = block
|
210 |
+
break
|
211 |
+
else:
|
212 |
+
raise ValueError
|
213 |
+
return MemoryFile(fs=self, path=path, data=data)
|
.venv/Lib/site-packages/fsspec/implementations/local.py
ADDED
@@ -0,0 +1,418 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datetime
|
2 |
+
import io
|
3 |
+
import logging
|
4 |
+
import os
|
5 |
+
import os.path as osp
|
6 |
+
import re
|
7 |
+
import shutil
|
8 |
+
import stat
|
9 |
+
import tempfile
|
10 |
+
|
11 |
+
from fsspec import AbstractFileSystem
|
12 |
+
from fsspec.compression import compr
|
13 |
+
from fsspec.core import get_compression
|
14 |
+
from fsspec.utils import isfilelike, stringify_path
|
15 |
+
|
16 |
+
logger = logging.getLogger("fsspec.local")
|
17 |
+
|
18 |
+
|
19 |
+
class LocalFileSystem(AbstractFileSystem):
|
20 |
+
"""Interface to files on local storage
|
21 |
+
|
22 |
+
Parameters
|
23 |
+
----------
|
24 |
+
auto_mkdir: bool
|
25 |
+
Whether, when opening a file, the directory containing it should
|
26 |
+
be created (if it doesn't already exist). This is assumed by pyarrow
|
27 |
+
code.
|
28 |
+
"""
|
29 |
+
|
30 |
+
root_marker = "/"
|
31 |
+
protocol = "file", "local"
|
32 |
+
local_file = True
|
33 |
+
|
34 |
+
def __init__(self, auto_mkdir=False, **kwargs):
|
35 |
+
super().__init__(**kwargs)
|
36 |
+
self.auto_mkdir = auto_mkdir
|
37 |
+
|
38 |
+
@property
|
39 |
+
def fsid(self):
|
40 |
+
return "local"
|
41 |
+
|
42 |
+
def mkdir(self, path, create_parents=True, **kwargs):
|
43 |
+
path = self._strip_protocol(path)
|
44 |
+
if self.exists(path):
|
45 |
+
raise FileExistsError(path)
|
46 |
+
if create_parents:
|
47 |
+
self.makedirs(path, exist_ok=True)
|
48 |
+
else:
|
49 |
+
os.mkdir(path, **kwargs)
|
50 |
+
|
51 |
+
def makedirs(self, path, exist_ok=False):
|
52 |
+
path = self._strip_protocol(path)
|
53 |
+
os.makedirs(path, exist_ok=exist_ok)
|
54 |
+
|
55 |
+
def rmdir(self, path):
|
56 |
+
path = self._strip_protocol(path)
|
57 |
+
os.rmdir(path)
|
58 |
+
|
59 |
+
def ls(self, path, detail=False, **kwargs):
|
60 |
+
path = self._strip_protocol(path)
|
61 |
+
info = self.info(path)
|
62 |
+
if info["type"] == "directory":
|
63 |
+
with os.scandir(path) as it:
|
64 |
+
infos = [self.info(f) for f in it]
|
65 |
+
else:
|
66 |
+
infos = [info]
|
67 |
+
|
68 |
+
if not detail:
|
69 |
+
return [i["name"] for i in infos]
|
70 |
+
return infos
|
71 |
+
|
72 |
+
def info(self, path, **kwargs):
|
73 |
+
if isinstance(path, os.DirEntry):
|
74 |
+
# scandir DirEntry
|
75 |
+
out = path.stat(follow_symlinks=False)
|
76 |
+
link = path.is_symlink()
|
77 |
+
if path.is_dir(follow_symlinks=False):
|
78 |
+
t = "directory"
|
79 |
+
elif path.is_file(follow_symlinks=False):
|
80 |
+
t = "file"
|
81 |
+
else:
|
82 |
+
t = "other"
|
83 |
+
path = self._strip_protocol(path.path)
|
84 |
+
else:
|
85 |
+
# str or path-like
|
86 |
+
path = self._strip_protocol(path)
|
87 |
+
out = os.stat(path, follow_symlinks=False)
|
88 |
+
link = stat.S_ISLNK(out.st_mode)
|
89 |
+
if link:
|
90 |
+
out = os.stat(path, follow_symlinks=True)
|
91 |
+
if stat.S_ISDIR(out.st_mode):
|
92 |
+
t = "directory"
|
93 |
+
elif stat.S_ISREG(out.st_mode):
|
94 |
+
t = "file"
|
95 |
+
else:
|
96 |
+
t = "other"
|
97 |
+
result = {
|
98 |
+
"name": path,
|
99 |
+
"size": out.st_size,
|
100 |
+
"type": t,
|
101 |
+
"created": out.st_ctime,
|
102 |
+
"islink": link,
|
103 |
+
}
|
104 |
+
for field in ["mode", "uid", "gid", "mtime", "ino", "nlink"]:
|
105 |
+
result[field] = getattr(out, f"st_{field}")
|
106 |
+
if result["islink"]:
|
107 |
+
result["destination"] = os.readlink(path)
|
108 |
+
try:
|
109 |
+
out2 = os.stat(path, follow_symlinks=True)
|
110 |
+
result["size"] = out2.st_size
|
111 |
+
except OSError:
|
112 |
+
result["size"] = 0
|
113 |
+
return result
|
114 |
+
|
115 |
+
def lexists(self, path, **kwargs):
|
116 |
+
return osp.lexists(path)
|
117 |
+
|
118 |
+
def cp_file(self, path1, path2, **kwargs):
|
119 |
+
path1 = self._strip_protocol(path1).rstrip("/")
|
120 |
+
path2 = self._strip_protocol(path2).rstrip("/")
|
121 |
+
if self.auto_mkdir:
|
122 |
+
self.makedirs(self._parent(path2), exist_ok=True)
|
123 |
+
if self.isfile(path1):
|
124 |
+
shutil.copyfile(path1, path2)
|
125 |
+
elif self.isdir(path1):
|
126 |
+
self.mkdirs(path2, exist_ok=True)
|
127 |
+
else:
|
128 |
+
raise FileNotFoundError(path1)
|
129 |
+
|
130 |
+
def get_file(self, path1, path2, callback=None, **kwargs):
|
131 |
+
if isfilelike(path2):
|
132 |
+
with open(path1, "rb") as f:
|
133 |
+
shutil.copyfileobj(f, path2)
|
134 |
+
else:
|
135 |
+
return self.cp_file(path1, path2, **kwargs)
|
136 |
+
|
137 |
+
def put_file(self, path1, path2, callback=None, **kwargs):
|
138 |
+
return self.cp_file(path1, path2, **kwargs)
|
139 |
+
|
140 |
+
def mv_file(self, path1, path2, **kwargs):
|
141 |
+
path1 = self._strip_protocol(path1).rstrip("/")
|
142 |
+
path2 = self._strip_protocol(path2).rstrip("/")
|
143 |
+
shutil.move(path1, path2)
|
144 |
+
|
145 |
+
def link(self, src, dst, **kwargs):
|
146 |
+
src = self._strip_protocol(src)
|
147 |
+
dst = self._strip_protocol(dst)
|
148 |
+
os.link(src, dst, **kwargs)
|
149 |
+
|
150 |
+
def symlink(self, src, dst, **kwargs):
|
151 |
+
src = self._strip_protocol(src)
|
152 |
+
dst = self._strip_protocol(dst)
|
153 |
+
os.symlink(src, dst, **kwargs)
|
154 |
+
|
155 |
+
def islink(self, path) -> bool:
|
156 |
+
return os.path.islink(self._strip_protocol(path))
|
157 |
+
|
158 |
+
def rm_file(self, path):
|
159 |
+
os.remove(self._strip_protocol(path))
|
160 |
+
|
161 |
+
def rm(self, path, recursive=False, maxdepth=None):
|
162 |
+
if not isinstance(path, list):
|
163 |
+
path = [path]
|
164 |
+
|
165 |
+
for p in path:
|
166 |
+
p = self._strip_protocol(p).rstrip("/")
|
167 |
+
if self.isdir(p):
|
168 |
+
if not recursive:
|
169 |
+
raise ValueError("Cannot delete directory, set recursive=True")
|
170 |
+
if osp.abspath(p) == os.getcwd():
|
171 |
+
raise ValueError("Cannot delete current working directory")
|
172 |
+
shutil.rmtree(p)
|
173 |
+
else:
|
174 |
+
os.remove(p)
|
175 |
+
|
176 |
+
def unstrip_protocol(self, name):
|
177 |
+
name = self._strip_protocol(name) # normalise for local/win/...
|
178 |
+
return f"file://{name}"
|
179 |
+
|
180 |
+
def _open(self, path, mode="rb", block_size=None, **kwargs):
|
181 |
+
path = self._strip_protocol(path)
|
182 |
+
if self.auto_mkdir and "w" in mode:
|
183 |
+
self.makedirs(self._parent(path), exist_ok=True)
|
184 |
+
return LocalFileOpener(path, mode, fs=self, **kwargs)
|
185 |
+
|
186 |
+
def touch(self, path, truncate=True, **kwargs):
|
187 |
+
path = self._strip_protocol(path)
|
188 |
+
if self.auto_mkdir:
|
189 |
+
self.makedirs(self._parent(path), exist_ok=True)
|
190 |
+
if self.exists(path):
|
191 |
+
os.utime(path, None)
|
192 |
+
else:
|
193 |
+
open(path, "a").close()
|
194 |
+
if truncate:
|
195 |
+
os.truncate(path, 0)
|
196 |
+
|
197 |
+
def created(self, path):
|
198 |
+
info = self.info(path=path)
|
199 |
+
return datetime.datetime.fromtimestamp(
|
200 |
+
info["created"], tz=datetime.timezone.utc
|
201 |
+
)
|
202 |
+
|
203 |
+
def modified(self, path):
|
204 |
+
info = self.info(path=path)
|
205 |
+
return datetime.datetime.fromtimestamp(info["mtime"], tz=datetime.timezone.utc)
|
206 |
+
|
207 |
+
@classmethod
|
208 |
+
def _parent(cls, path):
|
209 |
+
path = cls._strip_protocol(path).rstrip("/")
|
210 |
+
if "/" in path:
|
211 |
+
return path.rsplit("/", 1)[0]
|
212 |
+
else:
|
213 |
+
return cls.root_marker
|
214 |
+
|
215 |
+
@classmethod
|
216 |
+
def _strip_protocol(cls, path):
|
217 |
+
path = stringify_path(path)
|
218 |
+
if path.startswith("file://"):
|
219 |
+
path = path[7:]
|
220 |
+
elif path.startswith("file:"):
|
221 |
+
path = path[5:]
|
222 |
+
elif path.startswith("local://"):
|
223 |
+
path = path[8:]
|
224 |
+
elif path.startswith("local:"):
|
225 |
+
path = path[6:]
|
226 |
+
return make_path_posix(path).rstrip("/") or cls.root_marker
|
227 |
+
|
228 |
+
def _isfilestore(self):
|
229 |
+
# Inheriting from DaskFileSystem makes this False (S3, etc. were)
|
230 |
+
# the original motivation. But we are a posix-like file system.
|
231 |
+
# See https://github.com/dask/dask/issues/5526
|
232 |
+
return True
|
233 |
+
|
234 |
+
def chmod(self, path, mode):
|
235 |
+
path = stringify_path(path)
|
236 |
+
return os.chmod(path, mode)
|
237 |
+
|
238 |
+
|
239 |
+
def make_path_posix(path, sep=os.sep):
|
240 |
+
"""Make path generic"""
|
241 |
+
if isinstance(path, (list, set, tuple)):
|
242 |
+
return type(path)(make_path_posix(p) for p in path)
|
243 |
+
if "~" in path:
|
244 |
+
path = osp.expanduser(path)
|
245 |
+
if sep == "/":
|
246 |
+
# most common fast case for posix
|
247 |
+
if path.startswith("/"):
|
248 |
+
return path
|
249 |
+
if path.startswith("./"):
|
250 |
+
path = path[2:]
|
251 |
+
return f"{os.getcwd()}/{path}"
|
252 |
+
if (
|
253 |
+
(sep not in path and "/" not in path)
|
254 |
+
or (sep == "/" and not path.startswith("/"))
|
255 |
+
or (sep == "\\" and ":" not in path and not path.startswith("\\\\"))
|
256 |
+
):
|
257 |
+
# relative path like "path" or "rel\\path" (win) or rel/path"
|
258 |
+
if os.sep == "\\":
|
259 |
+
# abspath made some more '\\' separators
|
260 |
+
return make_path_posix(osp.abspath(path))
|
261 |
+
else:
|
262 |
+
return f"{os.getcwd()}/{path}"
|
263 |
+
if path.startswith("file://"):
|
264 |
+
path = path[7:]
|
265 |
+
if re.match("/[A-Za-z]:", path):
|
266 |
+
# for windows file URI like "file:///C:/folder/file"
|
267 |
+
# or "file:///C:\\dir\\file"
|
268 |
+
path = path[1:].replace("\\", "/").replace("//", "/")
|
269 |
+
if path.startswith("\\\\"):
|
270 |
+
# special case for windows UNC/DFS-style paths, do nothing,
|
271 |
+
# just flip the slashes around (case below does not work!)
|
272 |
+
return path.replace("\\", "/")
|
273 |
+
if re.match("[A-Za-z]:", path):
|
274 |
+
# windows full path like "C:\\local\\path"
|
275 |
+
return path.lstrip("\\").replace("\\", "/").replace("//", "/")
|
276 |
+
if path.startswith("\\"):
|
277 |
+
# windows network path like "\\server\\path"
|
278 |
+
return "/" + path.lstrip("\\").replace("\\", "/").replace("//", "/")
|
279 |
+
return path
|
280 |
+
|
281 |
+
|
282 |
+
def trailing_sep(path):
|
283 |
+
"""Return True if the path ends with a path separator.
|
284 |
+
|
285 |
+
A forward slash is always considered a path separator, even on Operating
|
286 |
+
Systems that normally use a backslash.
|
287 |
+
"""
|
288 |
+
# TODO: if all incoming paths were posix-compliant then separator would
|
289 |
+
# always be a forward slash, simplifying this function.
|
290 |
+
# See https://github.com/fsspec/filesystem_spec/pull/1250
|
291 |
+
return path.endswith(os.sep) or (os.altsep is not None and path.endswith(os.altsep))
|
292 |
+
|
293 |
+
|
294 |
+
class LocalFileOpener(io.IOBase):
|
295 |
+
def __init__(
|
296 |
+
self, path, mode, autocommit=True, fs=None, compression=None, **kwargs
|
297 |
+
):
|
298 |
+
logger.debug("open file: %s", path)
|
299 |
+
self.path = path
|
300 |
+
self.mode = mode
|
301 |
+
self.fs = fs
|
302 |
+
self.f = None
|
303 |
+
self.autocommit = autocommit
|
304 |
+
self.compression = get_compression(path, compression)
|
305 |
+
self.blocksize = io.DEFAULT_BUFFER_SIZE
|
306 |
+
self._open()
|
307 |
+
|
308 |
+
def _open(self):
|
309 |
+
if self.f is None or self.f.closed:
|
310 |
+
if self.autocommit or "w" not in self.mode:
|
311 |
+
self.f = open(self.path, mode=self.mode)
|
312 |
+
if self.compression:
|
313 |
+
compress = compr[self.compression]
|
314 |
+
self.f = compress(self.f, mode=self.mode)
|
315 |
+
else:
|
316 |
+
# TODO: check if path is writable?
|
317 |
+
i, name = tempfile.mkstemp()
|
318 |
+
os.close(i) # we want normal open and normal buffered file
|
319 |
+
self.temp = name
|
320 |
+
self.f = open(name, mode=self.mode)
|
321 |
+
if "w" not in self.mode:
|
322 |
+
self.size = self.f.seek(0, 2)
|
323 |
+
self.f.seek(0)
|
324 |
+
self.f.size = self.size
|
325 |
+
|
326 |
+
def _fetch_range(self, start, end):
|
327 |
+
# probably only used by cached FS
|
328 |
+
if "r" not in self.mode:
|
329 |
+
raise ValueError
|
330 |
+
self._open()
|
331 |
+
self.f.seek(start)
|
332 |
+
return self.f.read(end - start)
|
333 |
+
|
334 |
+
def __setstate__(self, state):
|
335 |
+
self.f = None
|
336 |
+
loc = state.pop("loc", None)
|
337 |
+
self.__dict__.update(state)
|
338 |
+
if "r" in state["mode"]:
|
339 |
+
self.f = None
|
340 |
+
self._open()
|
341 |
+
self.f.seek(loc)
|
342 |
+
|
343 |
+
def __getstate__(self):
|
344 |
+
d = self.__dict__.copy()
|
345 |
+
d.pop("f")
|
346 |
+
if "r" in self.mode:
|
347 |
+
d["loc"] = self.f.tell()
|
348 |
+
else:
|
349 |
+
if not self.f.closed:
|
350 |
+
raise ValueError("Cannot serialise open write-mode local file")
|
351 |
+
return d
|
352 |
+
|
353 |
+
def commit(self):
|
354 |
+
if self.autocommit:
|
355 |
+
raise RuntimeError("Can only commit if not already set to autocommit")
|
356 |
+
shutil.move(self.temp, self.path)
|
357 |
+
|
358 |
+
def discard(self):
|
359 |
+
if self.autocommit:
|
360 |
+
raise RuntimeError("Cannot discard if set to autocommit")
|
361 |
+
os.remove(self.temp)
|
362 |
+
|
363 |
+
def readable(self) -> bool:
|
364 |
+
return True
|
365 |
+
|
366 |
+
def writable(self) -> bool:
|
367 |
+
return "r" not in self.mode
|
368 |
+
|
369 |
+
def read(self, *args, **kwargs):
|
370 |
+
return self.f.read(*args, **kwargs)
|
371 |
+
|
372 |
+
def write(self, *args, **kwargs):
|
373 |
+
return self.f.write(*args, **kwargs)
|
374 |
+
|
375 |
+
def tell(self, *args, **kwargs):
|
376 |
+
return self.f.tell(*args, **kwargs)
|
377 |
+
|
378 |
+
def seek(self, *args, **kwargs):
|
379 |
+
return self.f.seek(*args, **kwargs)
|
380 |
+
|
381 |
+
def seekable(self, *args, **kwargs):
|
382 |
+
return self.f.seekable(*args, **kwargs)
|
383 |
+
|
384 |
+
def readline(self, *args, **kwargs):
|
385 |
+
return self.f.readline(*args, **kwargs)
|
386 |
+
|
387 |
+
def readlines(self, *args, **kwargs):
|
388 |
+
return self.f.readlines(*args, **kwargs)
|
389 |
+
|
390 |
+
def close(self):
|
391 |
+
return self.f.close()
|
392 |
+
|
393 |
+
def truncate(self, size=None) -> int:
|
394 |
+
return self.f.truncate(size)
|
395 |
+
|
396 |
+
@property
|
397 |
+
def closed(self):
|
398 |
+
return self.f.closed
|
399 |
+
|
400 |
+
def fileno(self):
|
401 |
+
return self.raw.fileno()
|
402 |
+
|
403 |
+
def flush(self) -> None:
|
404 |
+
self.f.flush()
|
405 |
+
|
406 |
+
def __iter__(self):
|
407 |
+
return self.f.__iter__()
|
408 |
+
|
409 |
+
def __getattr__(self, item):
|
410 |
+
return getattr(self.f, item)
|
411 |
+
|
412 |
+
def __enter__(self):
|
413 |
+
self._incontext = True
|
414 |
+
return self
|
415 |
+
|
416 |
+
def __exit__(self, exc_type, exc_value, traceback):
|
417 |
+
self._incontext = False
|
418 |
+
self.f.__exit__(exc_type, exc_value, traceback)
|
.venv/Lib/site-packages/fsspec/implementations/memory.py
ADDED
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import logging
|
4 |
+
from datetime import datetime, timezone
|
5 |
+
from errno import ENOTEMPTY
|
6 |
+
from io import BytesIO
|
7 |
+
from typing import Any, ClassVar
|
8 |
+
|
9 |
+
from fsspec import AbstractFileSystem
|
10 |
+
|
11 |
+
logger = logging.getLogger("fsspec.memoryfs")
|
12 |
+
|
13 |
+
|
14 |
+
class MemoryFileSystem(AbstractFileSystem):
|
15 |
+
"""A filesystem based on a dict of BytesIO objects
|
16 |
+
|
17 |
+
This is a global filesystem so instances of this class all point to the same
|
18 |
+
in memory filesystem.
|
19 |
+
"""
|
20 |
+
|
21 |
+
store: ClassVar[dict[str, Any]] = {} # global, do not overwrite!
|
22 |
+
pseudo_dirs = [""] # global, do not overwrite!
|
23 |
+
protocol = "memory"
|
24 |
+
root_marker = "/"
|
25 |
+
|
26 |
+
@classmethod
|
27 |
+
def _strip_protocol(cls, path):
|
28 |
+
if path.startswith("memory://"):
|
29 |
+
path = path[len("memory://") :]
|
30 |
+
if "::" in path or "://" in path:
|
31 |
+
return path.rstrip("/")
|
32 |
+
path = path.lstrip("/").rstrip("/")
|
33 |
+
return "/" + path if path else ""
|
34 |
+
|
35 |
+
def ls(self, path, detail=True, **kwargs):
|
36 |
+
path = self._strip_protocol(path)
|
37 |
+
if path in self.store:
|
38 |
+
# there is a key with this exact name
|
39 |
+
if not detail:
|
40 |
+
return [path]
|
41 |
+
return [
|
42 |
+
{
|
43 |
+
"name": path,
|
44 |
+
"size": self.store[path].size,
|
45 |
+
"type": "file",
|
46 |
+
"created": self.store[path].created.timestamp(),
|
47 |
+
}
|
48 |
+
]
|
49 |
+
paths = set()
|
50 |
+
starter = path + "/"
|
51 |
+
out = []
|
52 |
+
for p2 in tuple(self.store):
|
53 |
+
if p2.startswith(starter):
|
54 |
+
if "/" not in p2[len(starter) :]:
|
55 |
+
# exact child
|
56 |
+
out.append(
|
57 |
+
{
|
58 |
+
"name": p2,
|
59 |
+
"size": self.store[p2].size,
|
60 |
+
"type": "file",
|
61 |
+
"created": self.store[p2].created.timestamp(),
|
62 |
+
}
|
63 |
+
)
|
64 |
+
elif len(p2) > len(starter):
|
65 |
+
# implied child directory
|
66 |
+
ppath = starter + p2[len(starter) :].split("/", 1)[0]
|
67 |
+
if ppath not in paths:
|
68 |
+
out = out or []
|
69 |
+
out.append(
|
70 |
+
{
|
71 |
+
"name": ppath,
|
72 |
+
"size": 0,
|
73 |
+
"type": "directory",
|
74 |
+
}
|
75 |
+
)
|
76 |
+
paths.add(ppath)
|
77 |
+
for p2 in self.pseudo_dirs:
|
78 |
+
if p2.startswith(starter):
|
79 |
+
if "/" not in p2[len(starter) :]:
|
80 |
+
# exact child pdir
|
81 |
+
if p2 not in paths:
|
82 |
+
out.append({"name": p2, "size": 0, "type": "directory"})
|
83 |
+
paths.add(p2)
|
84 |
+
else:
|
85 |
+
# directory implied by deeper pdir
|
86 |
+
ppath = starter + p2[len(starter) :].split("/", 1)[0]
|
87 |
+
if ppath not in paths:
|
88 |
+
out.append({"name": ppath, "size": 0, "type": "directory"})
|
89 |
+
paths.add(ppath)
|
90 |
+
if not out:
|
91 |
+
if path in self.pseudo_dirs:
|
92 |
+
# empty dir
|
93 |
+
return []
|
94 |
+
raise FileNotFoundError(path)
|
95 |
+
if detail:
|
96 |
+
return out
|
97 |
+
return sorted([f["name"] for f in out])
|
98 |
+
|
99 |
+
def mkdir(self, path, create_parents=True, **kwargs):
|
100 |
+
path = self._strip_protocol(path)
|
101 |
+
if path in self.store or path in self.pseudo_dirs:
|
102 |
+
raise FileExistsError(path)
|
103 |
+
if self._parent(path).strip("/") and self.isfile(self._parent(path)):
|
104 |
+
raise NotADirectoryError(self._parent(path))
|
105 |
+
if create_parents and self._parent(path).strip("/"):
|
106 |
+
try:
|
107 |
+
self.mkdir(self._parent(path), create_parents, **kwargs)
|
108 |
+
except FileExistsError:
|
109 |
+
pass
|
110 |
+
if path and path not in self.pseudo_dirs:
|
111 |
+
self.pseudo_dirs.append(path)
|
112 |
+
|
113 |
+
def makedirs(self, path, exist_ok=False):
|
114 |
+
try:
|
115 |
+
self.mkdir(path, create_parents=True)
|
116 |
+
except FileExistsError:
|
117 |
+
if not exist_ok:
|
118 |
+
raise
|
119 |
+
|
120 |
+
def pipe_file(self, path, value, **kwargs):
|
121 |
+
"""Set the bytes of given file
|
122 |
+
|
123 |
+
Avoids copies of the data if possible
|
124 |
+
"""
|
125 |
+
self.open(path, "wb", data=value)
|
126 |
+
|
127 |
+
def rmdir(self, path):
|
128 |
+
path = self._strip_protocol(path)
|
129 |
+
if path == "":
|
130 |
+
# silently avoid deleting FS root
|
131 |
+
return
|
132 |
+
if path in self.pseudo_dirs:
|
133 |
+
if not self.ls(path):
|
134 |
+
self.pseudo_dirs.remove(path)
|
135 |
+
else:
|
136 |
+
raise OSError(ENOTEMPTY, "Directory not empty", path)
|
137 |
+
else:
|
138 |
+
raise FileNotFoundError(path)
|
139 |
+
|
140 |
+
def info(self, path, **kwargs):
|
141 |
+
path = self._strip_protocol(path)
|
142 |
+
if path in self.pseudo_dirs or any(
|
143 |
+
p.startswith(path + "/") for p in list(self.store) + self.pseudo_dirs
|
144 |
+
):
|
145 |
+
return {
|
146 |
+
"name": path,
|
147 |
+
"size": 0,
|
148 |
+
"type": "directory",
|
149 |
+
}
|
150 |
+
elif path in self.store:
|
151 |
+
filelike = self.store[path]
|
152 |
+
return {
|
153 |
+
"name": path,
|
154 |
+
"size": filelike.size,
|
155 |
+
"type": "file",
|
156 |
+
"created": getattr(filelike, "created", None),
|
157 |
+
}
|
158 |
+
else:
|
159 |
+
raise FileNotFoundError(path)
|
160 |
+
|
161 |
+
def _open(
|
162 |
+
self,
|
163 |
+
path,
|
164 |
+
mode="rb",
|
165 |
+
block_size=None,
|
166 |
+
autocommit=True,
|
167 |
+
cache_options=None,
|
168 |
+
**kwargs,
|
169 |
+
):
|
170 |
+
path = self._strip_protocol(path)
|
171 |
+
if path in self.pseudo_dirs:
|
172 |
+
raise IsADirectoryError(path)
|
173 |
+
parent = path
|
174 |
+
while len(parent) > 1:
|
175 |
+
parent = self._parent(parent)
|
176 |
+
if self.isfile(parent):
|
177 |
+
raise FileExistsError(parent)
|
178 |
+
if mode in ["rb", "ab", "r+b"]:
|
179 |
+
if path in self.store:
|
180 |
+
f = self.store[path]
|
181 |
+
if mode == "ab":
|
182 |
+
# position at the end of file
|
183 |
+
f.seek(0, 2)
|
184 |
+
else:
|
185 |
+
# position at the beginning of file
|
186 |
+
f.seek(0)
|
187 |
+
return f
|
188 |
+
else:
|
189 |
+
raise FileNotFoundError(path)
|
190 |
+
elif mode == "wb":
|
191 |
+
m = MemoryFile(self, path, kwargs.get("data"))
|
192 |
+
if not self._intrans:
|
193 |
+
m.commit()
|
194 |
+
return m
|
195 |
+
else:
|
196 |
+
name = self.__class__.__name__
|
197 |
+
raise ValueError(f"unsupported file mode for {name}: {mode!r}")
|
198 |
+
|
199 |
+
def cp_file(self, path1, path2, **kwargs):
|
200 |
+
path1 = self._strip_protocol(path1)
|
201 |
+
path2 = self._strip_protocol(path2)
|
202 |
+
if self.isfile(path1):
|
203 |
+
self.store[path2] = MemoryFile(
|
204 |
+
self, path2, self.store[path1].getvalue()
|
205 |
+
) # implicit copy
|
206 |
+
elif self.isdir(path1):
|
207 |
+
if path2 not in self.pseudo_dirs:
|
208 |
+
self.pseudo_dirs.append(path2)
|
209 |
+
else:
|
210 |
+
raise FileNotFoundError(path1)
|
211 |
+
|
212 |
+
def cat_file(self, path, start=None, end=None, **kwargs):
|
213 |
+
path = self._strip_protocol(path)
|
214 |
+
try:
|
215 |
+
return bytes(self.store[path].getbuffer()[start:end])
|
216 |
+
except KeyError:
|
217 |
+
raise FileNotFoundError(path)
|
218 |
+
|
219 |
+
def _rm(self, path):
|
220 |
+
path = self._strip_protocol(path)
|
221 |
+
try:
|
222 |
+
del self.store[path]
|
223 |
+
except KeyError as e:
|
224 |
+
raise FileNotFoundError(path) from e
|
225 |
+
|
226 |
+
def modified(self, path):
|
227 |
+
path = self._strip_protocol(path)
|
228 |
+
try:
|
229 |
+
return self.store[path].modified
|
230 |
+
except KeyError:
|
231 |
+
raise FileNotFoundError(path)
|
232 |
+
|
233 |
+
def created(self, path):
|
234 |
+
path = self._strip_protocol(path)
|
235 |
+
try:
|
236 |
+
return self.store[path].created
|
237 |
+
except KeyError:
|
238 |
+
raise FileNotFoundError(path)
|
239 |
+
|
240 |
+
def rm(self, path, recursive=False, maxdepth=None):
|
241 |
+
if isinstance(path, str):
|
242 |
+
path = self._strip_protocol(path)
|
243 |
+
else:
|
244 |
+
path = [self._strip_protocol(p) for p in path]
|
245 |
+
paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
|
246 |
+
for p in reversed(paths):
|
247 |
+
# If the expanded path doesn't exist, it is only because the expanded
|
248 |
+
# path was a directory that does not exist in self.pseudo_dirs. This
|
249 |
+
# is possible if you directly create files without making the
|
250 |
+
# directories first.
|
251 |
+
if not self.exists(p):
|
252 |
+
continue
|
253 |
+
if self.isfile(p):
|
254 |
+
self.rm_file(p)
|
255 |
+
else:
|
256 |
+
self.rmdir(p)
|
257 |
+
|
258 |
+
|
259 |
+
class MemoryFile(BytesIO):
|
260 |
+
"""A BytesIO which can't close and works as a context manager
|
261 |
+
|
262 |
+
Can initialise with data. Each path should only be active once at any moment.
|
263 |
+
|
264 |
+
No need to provide fs, path if auto-committing (default)
|
265 |
+
"""
|
266 |
+
|
267 |
+
def __init__(self, fs=None, path=None, data=None):
|
268 |
+
logger.debug("open file %s", path)
|
269 |
+
self.fs = fs
|
270 |
+
self.path = path
|
271 |
+
self.created = datetime.now(tz=timezone.utc)
|
272 |
+
self.modified = datetime.now(tz=timezone.utc)
|
273 |
+
if data:
|
274 |
+
super().__init__(data)
|
275 |
+
self.seek(0)
|
276 |
+
|
277 |
+
@property
|
278 |
+
def size(self):
|
279 |
+
return self.getbuffer().nbytes
|
280 |
+
|
281 |
+
def __enter__(self):
|
282 |
+
return self
|
283 |
+
|
284 |
+
def close(self):
|
285 |
+
pass
|
286 |
+
|
287 |
+
def discard(self):
|
288 |
+
pass
|
289 |
+
|
290 |
+
def commit(self):
|
291 |
+
self.fs.store[self.path] = self
|
292 |
+
self.modified = datetime.now(tz=timezone.utc)
|
.venv/Lib/site-packages/fsspec/implementations/reference.py
ADDED
@@ -0,0 +1,1160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import collections
|
3 |
+
import io
|
4 |
+
import itertools
|
5 |
+
import logging
|
6 |
+
import math
|
7 |
+
import os
|
8 |
+
from functools import lru_cache
|
9 |
+
from typing import TYPE_CHECKING
|
10 |
+
|
11 |
+
import fsspec.core
|
12 |
+
|
13 |
+
try:
|
14 |
+
import ujson as json
|
15 |
+
except ImportError:
|
16 |
+
if not TYPE_CHECKING:
|
17 |
+
import json
|
18 |
+
|
19 |
+
from ..asyn import AsyncFileSystem
|
20 |
+
from ..callbacks import DEFAULT_CALLBACK
|
21 |
+
from ..core import filesystem, open, split_protocol
|
22 |
+
from ..utils import isfilelike, merge_offset_ranges, other_paths
|
23 |
+
|
24 |
+
logger = logging.getLogger("fsspec.reference")
|
25 |
+
|
26 |
+
|
27 |
+
class ReferenceNotReachable(RuntimeError):
|
28 |
+
def __init__(self, reference, target, *args):
|
29 |
+
super().__init__(*args)
|
30 |
+
self.reference = reference
|
31 |
+
self.target = target
|
32 |
+
|
33 |
+
def __str__(self):
|
34 |
+
return f'Reference "{self.reference}" failed to fetch target {self.target}'
|
35 |
+
|
36 |
+
|
37 |
+
def _first(d):
|
38 |
+
return list(d.values())[0]
|
39 |
+
|
40 |
+
|
41 |
+
def _prot_in_references(path, references):
|
42 |
+
ref = references.get(path)
|
43 |
+
if isinstance(ref, (list, tuple)):
|
44 |
+
return split_protocol(ref[0])[0] if ref[0] else ref[0]
|
45 |
+
|
46 |
+
|
47 |
+
def _protocol_groups(paths, references):
|
48 |
+
if isinstance(paths, str):
|
49 |
+
return {_prot_in_references(paths, references): [paths]}
|
50 |
+
out = {}
|
51 |
+
for path in paths:
|
52 |
+
protocol = _prot_in_references(path, references)
|
53 |
+
out.setdefault(protocol, []).append(path)
|
54 |
+
return out
|
55 |
+
|
56 |
+
|
57 |
+
class RefsValuesView(collections.abc.ValuesView):
|
58 |
+
def __iter__(self):
|
59 |
+
for val in self._mapping.zmetadata.values():
|
60 |
+
yield json.dumps(val).encode()
|
61 |
+
yield from self._mapping._items.values()
|
62 |
+
for field in self._mapping.listdir():
|
63 |
+
chunk_sizes = self._mapping._get_chunk_sizes(field)
|
64 |
+
if len(chunk_sizes) == 0:
|
65 |
+
yield self._mapping[field + "/0"]
|
66 |
+
continue
|
67 |
+
yield from self._mapping._generate_all_records(field)
|
68 |
+
|
69 |
+
|
70 |
+
class RefsItemsView(collections.abc.ItemsView):
|
71 |
+
def __iter__(self):
|
72 |
+
return zip(self._mapping.keys(), self._mapping.values())
|
73 |
+
|
74 |
+
|
75 |
+
def ravel_multi_index(idx, sizes):
|
76 |
+
val = 0
|
77 |
+
mult = 1
|
78 |
+
for i, s in zip(idx[::-1], sizes[::-1]):
|
79 |
+
val += i * mult
|
80 |
+
mult *= s
|
81 |
+
return val
|
82 |
+
|
83 |
+
|
84 |
+
class LazyReferenceMapper(collections.abc.MutableMapping):
|
85 |
+
"""This interface can be used to read/write references from Parquet stores.
|
86 |
+
It is not intended for other types of references.
|
87 |
+
It can be used with Kerchunk's MultiZarrToZarr method to combine
|
88 |
+
references into a parquet store.
|
89 |
+
Examples of this use-case can be found here:
|
90 |
+
https://fsspec.github.io/kerchunk/advanced.html?highlight=parquet#parquet-storage"""
|
91 |
+
|
92 |
+
# import is class level to prevent numpy dep requirement for fsspec
|
93 |
+
@property
|
94 |
+
def np(self):
|
95 |
+
import numpy as np
|
96 |
+
|
97 |
+
return np
|
98 |
+
|
99 |
+
@property
|
100 |
+
def pd(self):
|
101 |
+
import pandas as pd
|
102 |
+
|
103 |
+
return pd
|
104 |
+
|
105 |
+
def __init__(
|
106 |
+
self, root, fs=None, out_root=None, cache_size=128, categorical_threshold=10
|
107 |
+
):
|
108 |
+
"""
|
109 |
+
|
110 |
+
This instance will be writable, storing changes in memory until full partitions
|
111 |
+
are accumulated or .flush() is called.
|
112 |
+
|
113 |
+
To create an empty lazy store, use .create()
|
114 |
+
|
115 |
+
Parameters
|
116 |
+
----------
|
117 |
+
root : str
|
118 |
+
Root of parquet store
|
119 |
+
fs : fsspec.AbstractFileSystem
|
120 |
+
fsspec filesystem object, default is local filesystem.
|
121 |
+
cache_size : int, default=128
|
122 |
+
Maximum size of LRU cache, where cache_size*record_size denotes
|
123 |
+
the total number of references that can be loaded in memory at once.
|
124 |
+
categorical_threshold : int
|
125 |
+
Encode urls as pandas.Categorical to reduce memory footprint if the ratio
|
126 |
+
of the number of unique urls to total number of refs for each variable
|
127 |
+
is greater than or equal to this number. (default 10)
|
128 |
+
"""
|
129 |
+
self.root = root
|
130 |
+
self.chunk_sizes = {}
|
131 |
+
self.out_root = out_root or self.root
|
132 |
+
self.cat_thresh = categorical_threshold
|
133 |
+
self.cache_size = cache_size
|
134 |
+
self.dirs = None
|
135 |
+
self.url = self.root + "/{field}/refs.{record}.parq"
|
136 |
+
# TODO: derive fs from `root`
|
137 |
+
self.fs = fsspec.filesystem("file") if fs is None else fs
|
138 |
+
|
139 |
+
def __getattr__(self, item):
|
140 |
+
if item in ("_items", "record_size", "zmetadata"):
|
141 |
+
self.setup()
|
142 |
+
# avoid possible recursion if setup fails somehow
|
143 |
+
return self.__dict__[item]
|
144 |
+
raise AttributeError(item)
|
145 |
+
|
146 |
+
def setup(self):
|
147 |
+
self._items = {}
|
148 |
+
self._items[".zmetadata"] = self.fs.cat_file(
|
149 |
+
"/".join([self.root, ".zmetadata"])
|
150 |
+
)
|
151 |
+
met = json.loads(self._items[".zmetadata"])
|
152 |
+
self.record_size = met["record_size"]
|
153 |
+
self.zmetadata = met["metadata"]
|
154 |
+
|
155 |
+
# Define function to open and decompress refs
|
156 |
+
@lru_cache(maxsize=self.cache_size)
|
157 |
+
def open_refs(field, record):
|
158 |
+
"""cached parquet file loader"""
|
159 |
+
path = self.url.format(field=field, record=record)
|
160 |
+
data = io.BytesIO(self.fs.cat_file(path))
|
161 |
+
df = self.pd.read_parquet(data, engine="fastparquet")
|
162 |
+
refs = {c: df[c].values for c in df.columns}
|
163 |
+
return refs
|
164 |
+
|
165 |
+
self.open_refs = open_refs
|
166 |
+
|
167 |
+
@staticmethod
|
168 |
+
def create(root, storage_options=None, fs=None, record_size=10000, **kwargs):
|
169 |
+
"""Make empty parquet reference set
|
170 |
+
|
171 |
+
First deletes the contents of the given directory, if it exists.
|
172 |
+
|
173 |
+
Parameters
|
174 |
+
----------
|
175 |
+
root: str
|
176 |
+
Directory to contain the output; will be created
|
177 |
+
storage_options: dict | None
|
178 |
+
For making the filesystem to use for writing is fs is None
|
179 |
+
fs: FileSystem | None
|
180 |
+
Filesystem for writing
|
181 |
+
record_size: int
|
182 |
+
Number of references per parquet file
|
183 |
+
kwargs: passed to __init__
|
184 |
+
|
185 |
+
Returns
|
186 |
+
-------
|
187 |
+
LazyReferenceMapper instance
|
188 |
+
"""
|
189 |
+
met = {"metadata": {}, "record_size": record_size}
|
190 |
+
if fs is None:
|
191 |
+
fs, root = fsspec.core.url_to_fs(root, **(storage_options or {}))
|
192 |
+
if fs.exists(root):
|
193 |
+
fs.rm(root, recursive=True)
|
194 |
+
fs.makedirs(root, exist_ok=True)
|
195 |
+
fs.pipe("/".join([root, ".zmetadata"]), json.dumps(met).encode())
|
196 |
+
return LazyReferenceMapper(root, fs, **kwargs)
|
197 |
+
|
198 |
+
def listdir(self, basename=True):
|
199 |
+
"""List top-level directories"""
|
200 |
+
# cache me?
|
201 |
+
if self.dirs is None:
|
202 |
+
dirs = [p.split("/", 1)[0] for p in self.zmetadata]
|
203 |
+
self.dirs = {p for p in dirs if p and not p.startswith(".")}
|
204 |
+
listing = self.dirs
|
205 |
+
if basename:
|
206 |
+
listing = [os.path.basename(path) for path in listing]
|
207 |
+
return listing
|
208 |
+
|
209 |
+
def ls(self, path="", detail=True):
|
210 |
+
"""Shortcut file listings"""
|
211 |
+
if not path:
|
212 |
+
dirnames = self.listdir()
|
213 |
+
others = set(
|
214 |
+
[".zmetadata"]
|
215 |
+
+ [name for name in self.zmetadata if "/" not in name]
|
216 |
+
+ [name for name in self._items if "/" not in name]
|
217 |
+
)
|
218 |
+
if detail is False:
|
219 |
+
others.update(dirnames)
|
220 |
+
return sorted(others)
|
221 |
+
dirinfo = [
|
222 |
+
{"name": name, "type": "directory", "size": 0} for name in dirnames
|
223 |
+
]
|
224 |
+
fileinfo = [
|
225 |
+
{
|
226 |
+
"name": name,
|
227 |
+
"type": "file",
|
228 |
+
"size": len(
|
229 |
+
json.dumps(self.zmetadata[name])
|
230 |
+
if name in self.zmetadata
|
231 |
+
else self._items[name]
|
232 |
+
),
|
233 |
+
}
|
234 |
+
for name in others
|
235 |
+
]
|
236 |
+
return sorted(dirinfo + fileinfo, key=lambda s: s["name"])
|
237 |
+
parts = path.split("/", 1)
|
238 |
+
if len(parts) > 1:
|
239 |
+
raise FileNotFoundError("Cannot list within directories right now")
|
240 |
+
field = parts[0]
|
241 |
+
others = set(
|
242 |
+
[name for name in self.zmetadata if name.startswith(f"{path}/")]
|
243 |
+
+ [name for name in self._items if name.startswith(f"{path}/")]
|
244 |
+
)
|
245 |
+
fileinfo = [
|
246 |
+
{
|
247 |
+
"name": name,
|
248 |
+
"type": "file",
|
249 |
+
"size": len(
|
250 |
+
json.dumps(self.zmetadata[name])
|
251 |
+
if name in self.zmetadata
|
252 |
+
else self._items[name]
|
253 |
+
),
|
254 |
+
}
|
255 |
+
for name in others
|
256 |
+
]
|
257 |
+
keys = self._keys_in_field(field)
|
258 |
+
|
259 |
+
if detail is False:
|
260 |
+
return list(others) + list(keys)
|
261 |
+
recs = self._generate_all_records(field)
|
262 |
+
recinfo = [
|
263 |
+
{"name": name, "type": "file", "size": rec[-1]}
|
264 |
+
for name, rec in zip(keys, recs)
|
265 |
+
if rec[0] # filters out path==None, deleted/missing
|
266 |
+
]
|
267 |
+
return fileinfo + recinfo
|
268 |
+
|
269 |
+
def _load_one_key(self, key):
|
270 |
+
"""Get the reference for one key
|
271 |
+
|
272 |
+
Returns bytes, one-element list or three-element list.
|
273 |
+
"""
|
274 |
+
if key in self._items:
|
275 |
+
return self._items[key]
|
276 |
+
elif key in self.zmetadata:
|
277 |
+
return json.dumps(self.zmetadata[key]).encode()
|
278 |
+
elif "/" not in key or self._is_meta(key):
|
279 |
+
raise KeyError(key)
|
280 |
+
field, sub_key = key.split("/")
|
281 |
+
record, ri, chunk_size = self._key_to_record(key)
|
282 |
+
maybe = self._items.get((field, record), {}).get(ri, False)
|
283 |
+
if maybe is None:
|
284 |
+
# explicitly deleted
|
285 |
+
raise KeyError
|
286 |
+
elif maybe:
|
287 |
+
return maybe
|
288 |
+
elif chunk_size == 0:
|
289 |
+
return b""
|
290 |
+
|
291 |
+
# Chunk keys can be loaded from row group and cached in LRU cache
|
292 |
+
try:
|
293 |
+
refs = self.open_refs(field, record)
|
294 |
+
except (ValueError, TypeError, FileNotFoundError):
|
295 |
+
raise KeyError(key)
|
296 |
+
columns = ["path", "offset", "size", "raw"]
|
297 |
+
selection = [refs[c][ri] if c in refs else None for c in columns]
|
298 |
+
raw = selection[-1]
|
299 |
+
if raw is not None:
|
300 |
+
return raw
|
301 |
+
if selection[0] is None:
|
302 |
+
raise KeyError("This reference does not exist or has been deleted")
|
303 |
+
if selection[1:3] == [0, 0]:
|
304 |
+
# URL only
|
305 |
+
return selection[:1]
|
306 |
+
# URL, offset, size
|
307 |
+
return selection[:3]
|
308 |
+
|
309 |
+
@lru_cache(4096)
|
310 |
+
def _key_to_record(self, key):
|
311 |
+
"""Details needed to construct a reference for one key"""
|
312 |
+
field, chunk = key.split("/")
|
313 |
+
chunk_sizes = self._get_chunk_sizes(field)
|
314 |
+
if len(chunk_sizes) == 0:
|
315 |
+
return 0, 0, 0
|
316 |
+
chunk_idx = [int(c) for c in chunk.split(".")]
|
317 |
+
chunk_number = ravel_multi_index(chunk_idx, chunk_sizes)
|
318 |
+
record = chunk_number // self.record_size
|
319 |
+
ri = chunk_number % self.record_size
|
320 |
+
return record, ri, len(chunk_sizes)
|
321 |
+
|
322 |
+
def _get_chunk_sizes(self, field):
|
323 |
+
"""The number of chunks along each axis for a given field"""
|
324 |
+
if field not in self.chunk_sizes:
|
325 |
+
zarray = self.zmetadata[f"{field}/.zarray"]
|
326 |
+
size_ratio = [
|
327 |
+
math.ceil(s / c) for s, c in zip(zarray["shape"], zarray["chunks"])
|
328 |
+
]
|
329 |
+
self.chunk_sizes[field] = size_ratio or [1]
|
330 |
+
return self.chunk_sizes[field]
|
331 |
+
|
332 |
+
def _generate_record(self, field, record):
|
333 |
+
"""The references for a given parquet file of a given field"""
|
334 |
+
refs = self.open_refs(field, record)
|
335 |
+
it = iter(zip(*refs.values()))
|
336 |
+
if len(refs) == 3:
|
337 |
+
# All urls
|
338 |
+
return (list(t) for t in it)
|
339 |
+
elif len(refs) == 1:
|
340 |
+
# All raws
|
341 |
+
return refs["raw"]
|
342 |
+
else:
|
343 |
+
# Mix of urls and raws
|
344 |
+
return (list(t[:3]) if not t[3] else t[3] for t in it)
|
345 |
+
|
346 |
+
def _generate_all_records(self, field):
|
347 |
+
"""Load all the references within a field by iterating over the parquet files"""
|
348 |
+
nrec = 1
|
349 |
+
for ch in self._get_chunk_sizes(field):
|
350 |
+
nrec *= ch
|
351 |
+
nrec = math.ceil(nrec / self.record_size)
|
352 |
+
for record in range(nrec):
|
353 |
+
yield from self._generate_record(field, record)
|
354 |
+
|
355 |
+
def values(self):
|
356 |
+
return RefsValuesView(self)
|
357 |
+
|
358 |
+
def items(self):
|
359 |
+
return RefsItemsView(self)
|
360 |
+
|
361 |
+
def __hash__(self):
|
362 |
+
return id(self)
|
363 |
+
|
364 |
+
def __getitem__(self, key):
|
365 |
+
return self._load_one_key(key)
|
366 |
+
|
367 |
+
def __setitem__(self, key, value):
|
368 |
+
if "/" in key and not self._is_meta(key):
|
369 |
+
field, chunk = key.split("/")
|
370 |
+
record, i, _ = self._key_to_record(key)
|
371 |
+
subdict = self._items.setdefault((field, record), {})
|
372 |
+
subdict[i] = value
|
373 |
+
if len(subdict) == self.record_size:
|
374 |
+
self.write(field, record)
|
375 |
+
else:
|
376 |
+
# metadata or top-level
|
377 |
+
self._items[key] = value
|
378 |
+
new_value = json.loads(
|
379 |
+
value.decode() if isinstance(value, bytes) else value
|
380 |
+
)
|
381 |
+
self.zmetadata[key] = {**self.zmetadata.get(key, {}), **new_value}
|
382 |
+
|
383 |
+
@staticmethod
|
384 |
+
def _is_meta(key):
|
385 |
+
return key.startswith(".z") or "/.z" in key
|
386 |
+
|
387 |
+
def __delitem__(self, key):
|
388 |
+
if key in self._items:
|
389 |
+
del self._items[key]
|
390 |
+
elif key in self.zmetadata:
|
391 |
+
del self.zmetadata[key]
|
392 |
+
else:
|
393 |
+
if "/" in key and not self._is_meta(key):
|
394 |
+
field, chunk = key.split("/")
|
395 |
+
record, i, _ = self._key_to_record(key)
|
396 |
+
subdict = self._items.setdefault((field, record), {})
|
397 |
+
subdict[i] = None
|
398 |
+
if len(subdict) == self.record_size:
|
399 |
+
self.write(field, record)
|
400 |
+
else:
|
401 |
+
# metadata or top-level
|
402 |
+
self._items[key] = None
|
403 |
+
|
404 |
+
def write(self, field, record, base_url=None, storage_options=None):
|
405 |
+
# extra requirements if writing
|
406 |
+
import kerchunk.df
|
407 |
+
import numpy as np
|
408 |
+
import pandas as pd
|
409 |
+
|
410 |
+
partition = self._items[(field, record)]
|
411 |
+
original = False
|
412 |
+
if len(partition) < self.record_size:
|
413 |
+
try:
|
414 |
+
original = self.open_refs(field, record)
|
415 |
+
except IOError:
|
416 |
+
pass
|
417 |
+
|
418 |
+
if original:
|
419 |
+
paths = original["path"]
|
420 |
+
offsets = original["offset"]
|
421 |
+
sizes = original["size"]
|
422 |
+
raws = original["raw"]
|
423 |
+
else:
|
424 |
+
paths = np.full(self.record_size, np.nan, dtype="O")
|
425 |
+
offsets = np.zeros(self.record_size, dtype="int64")
|
426 |
+
sizes = np.zeros(self.record_size, dtype="int64")
|
427 |
+
raws = np.full(self.record_size, np.nan, dtype="O")
|
428 |
+
for j, data in partition.items():
|
429 |
+
if isinstance(data, list):
|
430 |
+
if (
|
431 |
+
str(paths.dtype) == "category"
|
432 |
+
and data[0] not in paths.dtype.categories
|
433 |
+
):
|
434 |
+
paths = paths.add_categories(data[0])
|
435 |
+
paths[j] = data[0]
|
436 |
+
if len(data) > 1:
|
437 |
+
offsets[j] = data[1]
|
438 |
+
sizes[j] = data[2]
|
439 |
+
elif data is None:
|
440 |
+
# delete
|
441 |
+
paths[j] = None
|
442 |
+
offsets[j] = 0
|
443 |
+
sizes[j] = 0
|
444 |
+
raws[j] = None
|
445 |
+
else:
|
446 |
+
# this is the only call into kerchunk, could remove
|
447 |
+
raws[j] = kerchunk.df._proc_raw(data)
|
448 |
+
# TODO: only save needed columns
|
449 |
+
df = pd.DataFrame(
|
450 |
+
{
|
451 |
+
"path": paths,
|
452 |
+
"offset": offsets,
|
453 |
+
"size": sizes,
|
454 |
+
"raw": raws,
|
455 |
+
},
|
456 |
+
copy=False,
|
457 |
+
)
|
458 |
+
if df.path.count() / (df.path.nunique() or 1) > self.cat_thresh:
|
459 |
+
df["path"] = df["path"].astype("category")
|
460 |
+
object_encoding = {"raw": "bytes", "path": "utf8"}
|
461 |
+
has_nulls = ["path", "raw"]
|
462 |
+
|
463 |
+
fn = f"{base_url or self.out_root}/{field}/refs.{record}.parq"
|
464 |
+
self.fs.mkdirs(f"{base_url or self.out_root}/{field}", exist_ok=True)
|
465 |
+
df.to_parquet(
|
466 |
+
fn,
|
467 |
+
engine="fastparquet",
|
468 |
+
storage_options=storage_options
|
469 |
+
or getattr(self.fs, "storage_options", None),
|
470 |
+
compression="zstd",
|
471 |
+
index=False,
|
472 |
+
stats=False,
|
473 |
+
object_encoding=object_encoding,
|
474 |
+
has_nulls=has_nulls,
|
475 |
+
# **kwargs,
|
476 |
+
)
|
477 |
+
partition.clear()
|
478 |
+
self._items.pop((field, record))
|
479 |
+
|
480 |
+
def flush(self, base_url=None, storage_options=None):
|
481 |
+
"""Output any modified or deleted keys
|
482 |
+
|
483 |
+
Parameters
|
484 |
+
----------
|
485 |
+
base_url: str
|
486 |
+
Location of the output
|
487 |
+
"""
|
488 |
+
# write what we have so far and clear sub chunks
|
489 |
+
for thing in list(self._items):
|
490 |
+
if isinstance(thing, tuple):
|
491 |
+
field, record = thing
|
492 |
+
self.write(
|
493 |
+
field,
|
494 |
+
record,
|
495 |
+
base_url=base_url,
|
496 |
+
storage_options=storage_options,
|
497 |
+
)
|
498 |
+
|
499 |
+
# gather .zmetadata from self._items and write that too
|
500 |
+
for k in list(self._items):
|
501 |
+
if k != ".zmetadata" and ".z" in k:
|
502 |
+
self.zmetadata[k] = json.loads(self._items.pop(k))
|
503 |
+
met = {"metadata": self.zmetadata, "record_size": self.record_size}
|
504 |
+
self._items[".zmetadata"] = json.dumps(met).encode()
|
505 |
+
self.fs.pipe(
|
506 |
+
"/".join([base_url or self.out_root, ".zmetadata"]),
|
507 |
+
self._items[".zmetadata"],
|
508 |
+
)
|
509 |
+
|
510 |
+
# TODO: only clear those that we wrote to?
|
511 |
+
self.open_refs.cache_clear()
|
512 |
+
|
513 |
+
def __len__(self):
|
514 |
+
# Caveat: This counts expected references, not actual - but is fast
|
515 |
+
count = 0
|
516 |
+
for field in self.listdir():
|
517 |
+
if field.startswith("."):
|
518 |
+
count += 1
|
519 |
+
else:
|
520 |
+
count += math.prod(self._get_chunk_sizes(field))
|
521 |
+
count += len(self.zmetadata) # all metadata keys
|
522 |
+
# any other files not in reference partitions
|
523 |
+
count += sum(1 for _ in self._items if not isinstance(_, tuple))
|
524 |
+
return count
|
525 |
+
|
526 |
+
def __iter__(self):
|
527 |
+
# Caveat: returns only existing keys, so the number of these does not
|
528 |
+
# match len(self)
|
529 |
+
metas = set(self.zmetadata)
|
530 |
+
metas.update(self._items)
|
531 |
+
for bit in metas:
|
532 |
+
if isinstance(bit, str):
|
533 |
+
yield bit
|
534 |
+
for field in self.listdir():
|
535 |
+
for k in self._keys_in_field(field):
|
536 |
+
if k in self:
|
537 |
+
yield k
|
538 |
+
|
539 |
+
def __contains__(self, item):
|
540 |
+
try:
|
541 |
+
self._load_one_key(item)
|
542 |
+
return True
|
543 |
+
except KeyError:
|
544 |
+
return False
|
545 |
+
|
546 |
+
def _keys_in_field(self, field):
|
547 |
+
"""List key names in given field
|
548 |
+
|
549 |
+
Produces strings like "field/x.y" appropriate from the chunking of the array
|
550 |
+
"""
|
551 |
+
chunk_sizes = self._get_chunk_sizes(field)
|
552 |
+
if len(chunk_sizes) == 0:
|
553 |
+
yield field + "/0"
|
554 |
+
return
|
555 |
+
inds = itertools.product(*(range(i) for i in chunk_sizes))
|
556 |
+
for ind in inds:
|
557 |
+
yield field + "/" + ".".join([str(c) for c in ind])
|
558 |
+
|
559 |
+
|
560 |
+
class ReferenceFileSystem(AsyncFileSystem):
|
561 |
+
"""View byte ranges of some other file as a file system
|
562 |
+
Initial version: single file system target, which must support
|
563 |
+
async, and must allow start and end args in _cat_file. Later versions
|
564 |
+
may allow multiple arbitrary URLs for the targets.
|
565 |
+
This FileSystem is read-only. It is designed to be used with async
|
566 |
+
targets (for now). This FileSystem only allows whole-file access, no
|
567 |
+
``open``. We do not get original file details from the target FS.
|
568 |
+
Configuration is by passing a dict of references at init, or a URL to
|
569 |
+
a JSON file containing the same; this dict
|
570 |
+
can also contain concrete data for some set of paths.
|
571 |
+
Reference dict format:
|
572 |
+
{path0: bytes_data, path1: (target_url, offset, size)}
|
573 |
+
https://github.com/fsspec/kerchunk/blob/main/README.md
|
574 |
+
"""
|
575 |
+
|
576 |
+
protocol = "reference"
|
577 |
+
|
578 |
+
def __init__(
|
579 |
+
self,
|
580 |
+
fo,
|
581 |
+
target=None,
|
582 |
+
ref_storage_args=None,
|
583 |
+
target_protocol=None,
|
584 |
+
target_options=None,
|
585 |
+
remote_protocol=None,
|
586 |
+
remote_options=None,
|
587 |
+
fs=None,
|
588 |
+
template_overrides=None,
|
589 |
+
simple_templates=True,
|
590 |
+
max_gap=64_000,
|
591 |
+
max_block=256_000_000,
|
592 |
+
cache_size=128,
|
593 |
+
**kwargs,
|
594 |
+
):
|
595 |
+
"""
|
596 |
+
Parameters
|
597 |
+
----------
|
598 |
+
fo : dict or str
|
599 |
+
The set of references to use for this instance, with a structure as above.
|
600 |
+
If str referencing a JSON file, will use fsspec.open, in conjunction
|
601 |
+
with target_options and target_protocol to open and parse JSON at this
|
602 |
+
location. If a directory, then assume references are a set of parquet
|
603 |
+
files to be loaded lazily.
|
604 |
+
target : str
|
605 |
+
For any references having target_url as None, this is the default file
|
606 |
+
target to use
|
607 |
+
ref_storage_args : dict
|
608 |
+
If references is a str, use these kwargs for loading the JSON file.
|
609 |
+
Deprecated: use target_options instead.
|
610 |
+
target_protocol : str
|
611 |
+
Used for loading the reference file, if it is a path. If None, protocol
|
612 |
+
will be derived from the given path
|
613 |
+
target_options : dict
|
614 |
+
Extra FS options for loading the reference file ``fo``, if given as a path
|
615 |
+
remote_protocol : str
|
616 |
+
The protocol of the filesystem on which the references will be evaluated
|
617 |
+
(unless fs is provided). If not given, will be derived from the first
|
618 |
+
URL that has a protocol in the templates or in the references, in that
|
619 |
+
order.
|
620 |
+
remote_options : dict
|
621 |
+
kwargs to go with remote_protocol
|
622 |
+
fs : AbstractFileSystem | dict(str, (AbstractFileSystem | dict))
|
623 |
+
Directly provide a file system(s):
|
624 |
+
- a single filesystem instance
|
625 |
+
- a dict of protocol:filesystem, where each value is either a filesystem
|
626 |
+
instance, or a dict of kwargs that can be used to create in
|
627 |
+
instance for the given protocol
|
628 |
+
|
629 |
+
If this is given, remote_options and remote_protocol are ignored.
|
630 |
+
template_overrides : dict
|
631 |
+
Swap out any templates in the references file with these - useful for
|
632 |
+
testing.
|
633 |
+
simple_templates: bool
|
634 |
+
Whether templates can be processed with simple replace (True) or if
|
635 |
+
jinja is needed (False, much slower). All reference sets produced by
|
636 |
+
``kerchunk`` are simple in this sense, but the spec allows for complex.
|
637 |
+
max_gap, max_block: int
|
638 |
+
For merging multiple concurrent requests to the same remote file.
|
639 |
+
Neighboring byte ranges will only be merged when their
|
640 |
+
inter-range gap is <= ``max_gap``. Default is 64KB. Set to 0
|
641 |
+
to only merge when it requires no extra bytes. Pass a negative
|
642 |
+
number to disable merging, appropriate for local target files.
|
643 |
+
Neighboring byte ranges will only be merged when the size of
|
644 |
+
the aggregated range is <= ``max_block``. Default is 256MB.
|
645 |
+
cache_size : int
|
646 |
+
Maximum size of LRU cache, where cache_size*record_size denotes
|
647 |
+
the total number of references that can be loaded in memory at once.
|
648 |
+
Only used for lazily loaded references.
|
649 |
+
kwargs : passed to parent class
|
650 |
+
"""
|
651 |
+
super().__init__(**kwargs)
|
652 |
+
self.target = target
|
653 |
+
self.template_overrides = template_overrides
|
654 |
+
self.simple_templates = simple_templates
|
655 |
+
self.templates = {}
|
656 |
+
self.fss = {}
|
657 |
+
self._dircache = {}
|
658 |
+
self.max_gap = max_gap
|
659 |
+
self.max_block = max_block
|
660 |
+
if isinstance(fo, str):
|
661 |
+
dic = dict(
|
662 |
+
**(ref_storage_args or target_options or {}), protocol=target_protocol
|
663 |
+
)
|
664 |
+
ref_fs, fo2 = fsspec.core.url_to_fs(fo, **dic)
|
665 |
+
if ref_fs.isfile(fo2):
|
666 |
+
# text JSON
|
667 |
+
with fsspec.open(fo, "rb", **dic) as f:
|
668 |
+
logger.info("Read reference from URL %s", fo)
|
669 |
+
text = json.load(f)
|
670 |
+
self._process_references(text, template_overrides)
|
671 |
+
else:
|
672 |
+
# Lazy parquet refs
|
673 |
+
logger.info("Open lazy reference dict from URL %s", fo)
|
674 |
+
self.references = LazyReferenceMapper(
|
675 |
+
fo2,
|
676 |
+
fs=ref_fs,
|
677 |
+
cache_size=cache_size,
|
678 |
+
)
|
679 |
+
else:
|
680 |
+
# dictionaries
|
681 |
+
self._process_references(fo, template_overrides)
|
682 |
+
if isinstance(fs, dict):
|
683 |
+
self.fss = {
|
684 |
+
k: (
|
685 |
+
fsspec.filesystem(k.split(":", 1)[0], **opts)
|
686 |
+
if isinstance(opts, dict)
|
687 |
+
else opts
|
688 |
+
)
|
689 |
+
for k, opts in fs.items()
|
690 |
+
}
|
691 |
+
if None not in self.fss:
|
692 |
+
self.fss[None] = filesystem("file")
|
693 |
+
return
|
694 |
+
if fs is not None:
|
695 |
+
# single remote FS
|
696 |
+
remote_protocol = (
|
697 |
+
fs.protocol[0] if isinstance(fs.protocol, tuple) else fs.protocol
|
698 |
+
)
|
699 |
+
self.fss[remote_protocol] = fs
|
700 |
+
|
701 |
+
if remote_protocol is None:
|
702 |
+
# get single protocol from any templates
|
703 |
+
for ref in self.templates.values():
|
704 |
+
if callable(ref):
|
705 |
+
ref = ref()
|
706 |
+
protocol, _ = fsspec.core.split_protocol(ref)
|
707 |
+
if protocol and protocol not in self.fss:
|
708 |
+
fs = filesystem(protocol, **(remote_options or {}))
|
709 |
+
self.fss[protocol] = fs
|
710 |
+
if remote_protocol is None:
|
711 |
+
# get single protocol from references
|
712 |
+
# TODO: warning here, since this can be very expensive?
|
713 |
+
for ref in self.references.values():
|
714 |
+
if callable(ref):
|
715 |
+
ref = ref()
|
716 |
+
if isinstance(ref, list) and ref[0]:
|
717 |
+
protocol, _ = fsspec.core.split_protocol(ref[0])
|
718 |
+
if protocol not in self.fss:
|
719 |
+
fs = filesystem(protocol, **(remote_options or {}))
|
720 |
+
self.fss[protocol] = fs
|
721 |
+
# only use first remote URL
|
722 |
+
break
|
723 |
+
|
724 |
+
if remote_protocol and remote_protocol not in self.fss:
|
725 |
+
fs = filesystem(remote_protocol, **(remote_options or {}))
|
726 |
+
self.fss[remote_protocol] = fs
|
727 |
+
|
728 |
+
self.fss[None] = fs or filesystem("file") # default one
|
729 |
+
|
730 |
+
def _cat_common(self, path, start=None, end=None):
|
731 |
+
path = self._strip_protocol(path)
|
732 |
+
logger.debug(f"cat: {path}")
|
733 |
+
try:
|
734 |
+
part = self.references[path]
|
735 |
+
except KeyError:
|
736 |
+
raise FileNotFoundError(path)
|
737 |
+
if isinstance(part, str):
|
738 |
+
part = part.encode()
|
739 |
+
if isinstance(part, bytes):
|
740 |
+
logger.debug(f"Reference: {path}, type bytes")
|
741 |
+
if part.startswith(b"base64:"):
|
742 |
+
part = base64.b64decode(part[7:])
|
743 |
+
return part, None, None
|
744 |
+
|
745 |
+
if len(part) == 1:
|
746 |
+
logger.debug(f"Reference: {path}, whole file => {part}")
|
747 |
+
url = part[0]
|
748 |
+
start1, end1 = start, end
|
749 |
+
else:
|
750 |
+
url, start0, size = part
|
751 |
+
logger.debug(f"Reference: {path} => {url}, offset {start0}, size {size}")
|
752 |
+
end0 = start0 + size
|
753 |
+
|
754 |
+
if start is not None:
|
755 |
+
if start >= 0:
|
756 |
+
start1 = start0 + start
|
757 |
+
else:
|
758 |
+
start1 = end0 + start
|
759 |
+
else:
|
760 |
+
start1 = start0
|
761 |
+
if end is not None:
|
762 |
+
if end >= 0:
|
763 |
+
end1 = start0 + end
|
764 |
+
else:
|
765 |
+
end1 = end0 + end
|
766 |
+
else:
|
767 |
+
end1 = end0
|
768 |
+
if url is None:
|
769 |
+
url = self.target
|
770 |
+
return url, start1, end1
|
771 |
+
|
772 |
+
async def _cat_file(self, path, start=None, end=None, **kwargs):
|
773 |
+
part_or_url, start0, end0 = self._cat_common(path, start=start, end=end)
|
774 |
+
if isinstance(part_or_url, bytes):
|
775 |
+
return part_or_url[start:end]
|
776 |
+
protocol, _ = split_protocol(part_or_url)
|
777 |
+
try:
|
778 |
+
await self.fss[protocol]._cat_file(part_or_url, start=start, end=end)
|
779 |
+
except Exception as e:
|
780 |
+
raise ReferenceNotReachable(path, part_or_url) from e
|
781 |
+
|
782 |
+
def cat_file(self, path, start=None, end=None, **kwargs):
|
783 |
+
part_or_url, start0, end0 = self._cat_common(path, start=start, end=end)
|
784 |
+
if isinstance(part_or_url, bytes):
|
785 |
+
return part_or_url[start:end]
|
786 |
+
protocol, _ = split_protocol(part_or_url)
|
787 |
+
try:
|
788 |
+
return self.fss[protocol].cat_file(part_or_url, start=start0, end=end0)
|
789 |
+
except Exception as e:
|
790 |
+
raise ReferenceNotReachable(path, part_or_url) from e
|
791 |
+
|
792 |
+
def pipe_file(self, path, value, **_):
|
793 |
+
"""Temporarily add binary data or reference as a file"""
|
794 |
+
self.references[path] = value
|
795 |
+
|
796 |
+
async def _get_file(self, rpath, lpath, **kwargs):
|
797 |
+
if self.isdir(rpath):
|
798 |
+
return os.makedirs(lpath, exist_ok=True)
|
799 |
+
data = await self._cat_file(rpath)
|
800 |
+
with open(lpath, "wb") as f:
|
801 |
+
f.write(data)
|
802 |
+
|
803 |
+
def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, **kwargs):
|
804 |
+
if self.isdir(rpath):
|
805 |
+
return os.makedirs(lpath, exist_ok=True)
|
806 |
+
data = self.cat_file(rpath, **kwargs)
|
807 |
+
callback.set_size(len(data))
|
808 |
+
if isfilelike(lpath):
|
809 |
+
lpath.write(data)
|
810 |
+
else:
|
811 |
+
with open(lpath, "wb") as f:
|
812 |
+
f.write(data)
|
813 |
+
callback.absolute_update(len(data))
|
814 |
+
|
815 |
+
def get(self, rpath, lpath, recursive=False, **kwargs):
|
816 |
+
if recursive:
|
817 |
+
# trigger directory build
|
818 |
+
self.ls("")
|
819 |
+
rpath = self.expand_path(rpath, recursive=recursive)
|
820 |
+
fs = fsspec.filesystem("file", auto_mkdir=True)
|
821 |
+
targets = other_paths(rpath, lpath)
|
822 |
+
if recursive:
|
823 |
+
data = self.cat([r for r in rpath if not self.isdir(r)])
|
824 |
+
else:
|
825 |
+
data = self.cat(rpath)
|
826 |
+
for remote, local in zip(rpath, targets):
|
827 |
+
if remote in data:
|
828 |
+
fs.pipe_file(local, data[remote])
|
829 |
+
|
830 |
+
def cat(self, path, recursive=False, on_error="raise", **kwargs):
|
831 |
+
if isinstance(path, str) and recursive:
|
832 |
+
raise NotImplementedError
|
833 |
+
if isinstance(path, list) and (recursive or any("*" in p for p in path)):
|
834 |
+
raise NotImplementedError
|
835 |
+
# TODO: if references is lazy, pre-fetch all paths in batch before access
|
836 |
+
proto_dict = _protocol_groups(path, self.references)
|
837 |
+
out = {}
|
838 |
+
for proto, paths in proto_dict.items():
|
839 |
+
fs = self.fss[proto]
|
840 |
+
urls, starts, ends, valid_paths = [], [], [], []
|
841 |
+
for p in paths:
|
842 |
+
# find references or label not-found. Early exit if any not
|
843 |
+
# found and on_error is "raise"
|
844 |
+
try:
|
845 |
+
u, s, e = self._cat_common(p)
|
846 |
+
except FileNotFoundError as err:
|
847 |
+
if on_error == "raise":
|
848 |
+
raise
|
849 |
+
if on_error != "omit":
|
850 |
+
out[p] = err
|
851 |
+
else:
|
852 |
+
urls.append(u)
|
853 |
+
starts.append(s)
|
854 |
+
ends.append(e)
|
855 |
+
valid_paths.append(p)
|
856 |
+
|
857 |
+
# process references into form for merging
|
858 |
+
urls2 = []
|
859 |
+
starts2 = []
|
860 |
+
ends2 = []
|
861 |
+
paths2 = []
|
862 |
+
whole_files = set()
|
863 |
+
for u, s, e, p in zip(urls, starts, ends, valid_paths):
|
864 |
+
if isinstance(u, bytes):
|
865 |
+
# data
|
866 |
+
out[p] = u
|
867 |
+
elif s is None:
|
868 |
+
# whole file - limits are None, None, but no further
|
869 |
+
# entries take for this file
|
870 |
+
whole_files.add(u)
|
871 |
+
urls2.append(u)
|
872 |
+
starts2.append(s)
|
873 |
+
ends2.append(e)
|
874 |
+
paths2.append(p)
|
875 |
+
for u, s, e, p in zip(urls, starts, ends, valid_paths):
|
876 |
+
# second run to account for files that are to be loaded whole
|
877 |
+
if s is not None and u not in whole_files:
|
878 |
+
urls2.append(u)
|
879 |
+
starts2.append(s)
|
880 |
+
ends2.append(e)
|
881 |
+
paths2.append(p)
|
882 |
+
|
883 |
+
# merge and fetch consolidated ranges
|
884 |
+
new_paths, new_starts, new_ends = merge_offset_ranges(
|
885 |
+
list(urls2),
|
886 |
+
list(starts2),
|
887 |
+
list(ends2),
|
888 |
+
sort=True,
|
889 |
+
max_gap=self.max_gap,
|
890 |
+
max_block=self.max_block,
|
891 |
+
)
|
892 |
+
bytes_out = fs.cat_ranges(new_paths, new_starts, new_ends)
|
893 |
+
|
894 |
+
# unbundle from merged bytes - simple approach
|
895 |
+
for u, s, e, p in zip(urls, starts, ends, valid_paths):
|
896 |
+
if p in out:
|
897 |
+
continue # was bytes, already handled
|
898 |
+
for np, ns, ne, b in zip(new_paths, new_starts, new_ends, bytes_out):
|
899 |
+
if np == u and (ns is None or ne is None):
|
900 |
+
if isinstance(b, Exception):
|
901 |
+
out[p] = b
|
902 |
+
else:
|
903 |
+
out[p] = b[s:e]
|
904 |
+
elif np == u and s >= ns and e <= ne:
|
905 |
+
if isinstance(b, Exception):
|
906 |
+
out[p] = b
|
907 |
+
else:
|
908 |
+
out[p] = b[s - ns : (e - ne) or None]
|
909 |
+
|
910 |
+
for k, v in out.copy().items():
|
911 |
+
# these were valid references, but fetch failed, so transform exc
|
912 |
+
if isinstance(v, Exception) and k in self.references:
|
913 |
+
ex = out[k]
|
914 |
+
new_ex = ReferenceNotReachable(k, self.references[k])
|
915 |
+
new_ex.__cause__ = ex
|
916 |
+
if on_error == "raise":
|
917 |
+
raise new_ex
|
918 |
+
elif on_error != "omit":
|
919 |
+
out[k] = new_ex
|
920 |
+
|
921 |
+
if len(out) == 1 and isinstance(path, str) and "*" not in path:
|
922 |
+
return _first(out)
|
923 |
+
return out
|
924 |
+
|
925 |
+
def _process_references(self, references, template_overrides=None):
|
926 |
+
vers = references.get("version", None)
|
927 |
+
if vers is None:
|
928 |
+
self._process_references0(references)
|
929 |
+
elif vers == 1:
|
930 |
+
self._process_references1(references, template_overrides=template_overrides)
|
931 |
+
else:
|
932 |
+
raise ValueError(f"Unknown reference spec version: {vers}")
|
933 |
+
# TODO: we make dircache by iterating over all entries, but for Spec >= 1,
|
934 |
+
# can replace with programmatic. Is it even needed for mapper interface?
|
935 |
+
|
936 |
+
def _process_references0(self, references):
|
937 |
+
"""Make reference dict for Spec Version 0"""
|
938 |
+
self.references = references
|
939 |
+
|
940 |
+
def _process_references1(self, references, template_overrides=None):
|
941 |
+
if not self.simple_templates or self.templates:
|
942 |
+
import jinja2
|
943 |
+
self.references = {}
|
944 |
+
self._process_templates(references.get("templates", {}))
|
945 |
+
|
946 |
+
@lru_cache(1000)
|
947 |
+
def _render_jinja(u):
|
948 |
+
return jinja2.Template(u).render(**self.templates)
|
949 |
+
|
950 |
+
for k, v in references.get("refs", {}).items():
|
951 |
+
if isinstance(v, str):
|
952 |
+
if v.startswith("base64:"):
|
953 |
+
self.references[k] = base64.b64decode(v[7:])
|
954 |
+
self.references[k] = v
|
955 |
+
elif self.templates:
|
956 |
+
u = v[0]
|
957 |
+
if "{{" in u:
|
958 |
+
if self.simple_templates:
|
959 |
+
u = (
|
960 |
+
u.replace("{{", "{")
|
961 |
+
.replace("}}", "}")
|
962 |
+
.format(**self.templates)
|
963 |
+
)
|
964 |
+
else:
|
965 |
+
u = _render_jinja(u)
|
966 |
+
self.references[k] = [u] if len(v) == 1 else [u, v[1], v[2]]
|
967 |
+
else:
|
968 |
+
self.references[k] = v
|
969 |
+
self.references.update(self._process_gen(references.get("gen", [])))
|
970 |
+
|
971 |
+
def _process_templates(self, tmp):
|
972 |
+
self.templates = {}
|
973 |
+
if self.template_overrides is not None:
|
974 |
+
tmp.update(self.template_overrides)
|
975 |
+
for k, v in tmp.items():
|
976 |
+
if "{{" in v:
|
977 |
+
import jinja2
|
978 |
+
|
979 |
+
self.templates[k] = lambda temp=v, **kwargs: jinja2.Template(
|
980 |
+
temp
|
981 |
+
).render(**kwargs)
|
982 |
+
else:
|
983 |
+
self.templates[k] = v
|
984 |
+
|
985 |
+
def _process_gen(self, gens):
|
986 |
+
out = {}
|
987 |
+
for gen in gens:
|
988 |
+
dimension = {
|
989 |
+
k: v
|
990 |
+
if isinstance(v, list)
|
991 |
+
else range(v.get("start", 0), v["stop"], v.get("step", 1))
|
992 |
+
for k, v in gen["dimensions"].items()
|
993 |
+
}
|
994 |
+
products = (
|
995 |
+
dict(zip(dimension.keys(), values))
|
996 |
+
for values in itertools.product(*dimension.values())
|
997 |
+
)
|
998 |
+
for pr in products:
|
999 |
+
import jinja2
|
1000 |
+
|
1001 |
+
key = jinja2.Template(gen["key"]).render(**pr, **self.templates)
|
1002 |
+
url = jinja2.Template(gen["url"]).render(**pr, **self.templates)
|
1003 |
+
if ("offset" in gen) and ("length" in gen):
|
1004 |
+
offset = int(
|
1005 |
+
jinja2.Template(gen["offset"]).render(**pr, **self.templates)
|
1006 |
+
)
|
1007 |
+
length = int(
|
1008 |
+
jinja2.Template(gen["length"]).render(**pr, **self.templates)
|
1009 |
+
)
|
1010 |
+
out[key] = [url, offset, length]
|
1011 |
+
elif ("offset" in gen) ^ ("length" in gen):
|
1012 |
+
raise ValueError(
|
1013 |
+
"Both 'offset' and 'length' are required for a "
|
1014 |
+
"reference generator entry if either is provided."
|
1015 |
+
)
|
1016 |
+
else:
|
1017 |
+
out[key] = [url]
|
1018 |
+
return out
|
1019 |
+
|
1020 |
+
def _dircache_from_items(self):
|
1021 |
+
self.dircache = {"": []}
|
1022 |
+
it = self.references.items()
|
1023 |
+
for path, part in it:
|
1024 |
+
if isinstance(part, (bytes, str)):
|
1025 |
+
size = len(part)
|
1026 |
+
elif len(part) == 1:
|
1027 |
+
size = None
|
1028 |
+
else:
|
1029 |
+
_, _, size = part
|
1030 |
+
par = path.rsplit("/", 1)[0] if "/" in path else ""
|
1031 |
+
par0 = par
|
1032 |
+
subdirs = [par0]
|
1033 |
+
while par0 and par0 not in self.dircache:
|
1034 |
+
# collect parent directories
|
1035 |
+
par0 = self._parent(par0)
|
1036 |
+
subdirs.append(par0)
|
1037 |
+
|
1038 |
+
subdirs = subdirs[::-1]
|
1039 |
+
for parent, child in zip(subdirs, subdirs[1:]):
|
1040 |
+
# register newly discovered directories
|
1041 |
+
assert child not in self.dircache
|
1042 |
+
assert parent in self.dircache
|
1043 |
+
self.dircache[parent].append(
|
1044 |
+
{"name": child, "type": "directory", "size": 0}
|
1045 |
+
)
|
1046 |
+
self.dircache[child] = []
|
1047 |
+
|
1048 |
+
self.dircache[par].append({"name": path, "type": "file", "size": size})
|
1049 |
+
|
1050 |
+
def _open(self, path, mode="rb", block_size=None, cache_options=None, **kwargs):
|
1051 |
+
data = self.cat_file(path) # load whole chunk into memory
|
1052 |
+
return io.BytesIO(data)
|
1053 |
+
|
1054 |
+
def ls(self, path, detail=True, **kwargs):
|
1055 |
+
path = self._strip_protocol(path)
|
1056 |
+
if isinstance(self.references, LazyReferenceMapper):
|
1057 |
+
try:
|
1058 |
+
return self.references.ls(path, detail)
|
1059 |
+
except KeyError:
|
1060 |
+
pass
|
1061 |
+
raise FileNotFoundError(f"'{path}' is not a known key")
|
1062 |
+
if not self.dircache:
|
1063 |
+
self._dircache_from_items()
|
1064 |
+
out = self._ls_from_cache(path)
|
1065 |
+
if out is None:
|
1066 |
+
raise FileNotFoundError(path)
|
1067 |
+
if detail:
|
1068 |
+
return out
|
1069 |
+
return [o["name"] for o in out]
|
1070 |
+
|
1071 |
+
def exists(self, path, **kwargs): # overwrite auto-sync version
|
1072 |
+
return self.isdir(path) or self.isfile(path)
|
1073 |
+
|
1074 |
+
def isdir(self, path): # overwrite auto-sync version
|
1075 |
+
if self.dircache:
|
1076 |
+
return path in self.dircache
|
1077 |
+
elif isinstance(self.references, LazyReferenceMapper):
|
1078 |
+
return path in self.references.listdir("")
|
1079 |
+
else:
|
1080 |
+
# this may be faster than building dircache for single calls, but
|
1081 |
+
# by looping will be slow for many calls; could cache it?
|
1082 |
+
return any(_.startswith(f"{path}/") for _ in self.references)
|
1083 |
+
|
1084 |
+
def isfile(self, path): # overwrite auto-sync version
|
1085 |
+
return path in self.references
|
1086 |
+
|
1087 |
+
async def _ls(self, path, detail=True, **kwargs): # calls fast sync code
|
1088 |
+
return self.ls(path, detail, **kwargs)
|
1089 |
+
|
1090 |
+
def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
|
1091 |
+
if withdirs:
|
1092 |
+
return super().find(
|
1093 |
+
path, maxdepth=maxdepth, withdirs=withdirs, detail=detail, **kwargs
|
1094 |
+
)
|
1095 |
+
if path:
|
1096 |
+
path = self._strip_protocol(path)
|
1097 |
+
r = sorted(k for k in self.references if k.startswith(path))
|
1098 |
+
else:
|
1099 |
+
r = sorted(self.references)
|
1100 |
+
if detail:
|
1101 |
+
if not self.dircache:
|
1102 |
+
self._dircache_from_items()
|
1103 |
+
return {k: self._ls_from_cache(k)[0] for k in r}
|
1104 |
+
else:
|
1105 |
+
return r
|
1106 |
+
|
1107 |
+
def info(self, path, **kwargs):
|
1108 |
+
out = self.references.get(path)
|
1109 |
+
if out is not None:
|
1110 |
+
if isinstance(out, (str, bytes)):
|
1111 |
+
# decode base64 here
|
1112 |
+
return {"name": path, "type": "file", "size": len(out)}
|
1113 |
+
elif len(out) > 1:
|
1114 |
+
return {"name": path, "type": "file", "size": out[2]}
|
1115 |
+
else:
|
1116 |
+
out0 = [{"name": path, "type": "file", "size": None}]
|
1117 |
+
else:
|
1118 |
+
out = self.ls(path, True)
|
1119 |
+
out0 = [o for o in out if o["name"] == path]
|
1120 |
+
if not out0:
|
1121 |
+
return {"name": path, "type": "directory", "size": 0}
|
1122 |
+
if out0[0]["size"] is None:
|
1123 |
+
# if this is a whole remote file, update size using remote FS
|
1124 |
+
prot, _ = split_protocol(self.references[path][0])
|
1125 |
+
out0[0]["size"] = self.fss[prot].size(self.references[path][0])
|
1126 |
+
return out0[0]
|
1127 |
+
|
1128 |
+
async def _info(self, path, **kwargs): # calls fast sync code
|
1129 |
+
return self.info(path)
|
1130 |
+
|
1131 |
+
async def _rm_file(self, path, **kwargs):
|
1132 |
+
self.references.pop(
|
1133 |
+
path, None
|
1134 |
+
) # ignores FileNotFound, just as well for directories
|
1135 |
+
self.dircache.clear() # this is a bit heavy handed
|
1136 |
+
|
1137 |
+
async def _pipe_file(self, path, data):
|
1138 |
+
# can be str or bytes
|
1139 |
+
self.references[path] = data
|
1140 |
+
self.dircache.clear() # this is a bit heavy handed
|
1141 |
+
|
1142 |
+
async def _put_file(self, lpath, rpath, **kwargs):
|
1143 |
+
# puts binary
|
1144 |
+
with open(lpath, "rb") as f:
|
1145 |
+
self.references[rpath] = f.read()
|
1146 |
+
self.dircache.clear() # this is a bit heavy handed
|
1147 |
+
|
1148 |
+
def save_json(self, url, **storage_options):
|
1149 |
+
"""Write modified references into new location"""
|
1150 |
+
out = {}
|
1151 |
+
for k, v in self.references.items():
|
1152 |
+
if isinstance(v, bytes):
|
1153 |
+
try:
|
1154 |
+
out[k] = v.decode("ascii")
|
1155 |
+
except UnicodeDecodeError:
|
1156 |
+
out[k] = (b"base64:" + base64.b64encode(v)).decode()
|
1157 |
+
else:
|
1158 |
+
out[k] = v
|
1159 |
+
with fsspec.open(url, "wb", **storage_options) as f:
|
1160 |
+
f.write(json.dumps({"version": 1, "refs": out}).encode())
|
.venv/Lib/site-packages/fsspec/implementations/sftp.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datetime
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
import types
|
5 |
+
import uuid
|
6 |
+
from stat import S_ISDIR, S_ISLNK
|
7 |
+
|
8 |
+
import paramiko
|
9 |
+
|
10 |
+
from .. import AbstractFileSystem
|
11 |
+
from ..utils import infer_storage_options
|
12 |
+
|
13 |
+
logger = logging.getLogger("fsspec.sftp")
|
14 |
+
|
15 |
+
|
16 |
+
class SFTPFileSystem(AbstractFileSystem):
|
17 |
+
"""Files over SFTP/SSH
|
18 |
+
|
19 |
+
Peer-to-peer filesystem over SSH using paramiko.
|
20 |
+
|
21 |
+
Note: if using this with the ``open`` or ``open_files``, with full URLs,
|
22 |
+
there is no way to tell if a path is relative, so all paths are assumed
|
23 |
+
to be absolute.
|
24 |
+
"""
|
25 |
+
|
26 |
+
protocol = "sftp", "ssh"
|
27 |
+
|
28 |
+
def __init__(self, host, **ssh_kwargs):
|
29 |
+
"""
|
30 |
+
|
31 |
+
Parameters
|
32 |
+
----------
|
33 |
+
host: str
|
34 |
+
Hostname or IP as a string
|
35 |
+
temppath: str
|
36 |
+
Location on the server to put files, when within a transaction
|
37 |
+
ssh_kwargs: dict
|
38 |
+
Parameters passed on to connection. See details in
|
39 |
+
https://docs.paramiko.org/en/3.3/api/client.html#paramiko.client.SSHClient.connect
|
40 |
+
May include port, username, password...
|
41 |
+
"""
|
42 |
+
if self._cached:
|
43 |
+
return
|
44 |
+
super().__init__(**ssh_kwargs)
|
45 |
+
self.temppath = ssh_kwargs.pop("temppath", "/tmp") # remote temp directory
|
46 |
+
self.host = host
|
47 |
+
self.ssh_kwargs = ssh_kwargs
|
48 |
+
self._connect()
|
49 |
+
|
50 |
+
def _connect(self):
|
51 |
+
logger.debug("Connecting to SFTP server %s", self.host)
|
52 |
+
self.client = paramiko.SSHClient()
|
53 |
+
self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
54 |
+
self.client.connect(self.host, **self.ssh_kwargs)
|
55 |
+
self.ftp = self.client.open_sftp()
|
56 |
+
|
57 |
+
@classmethod
|
58 |
+
def _strip_protocol(cls, path):
|
59 |
+
return infer_storage_options(path)["path"]
|
60 |
+
|
61 |
+
@staticmethod
|
62 |
+
def _get_kwargs_from_urls(urlpath):
|
63 |
+
out = infer_storage_options(urlpath)
|
64 |
+
out.pop("path", None)
|
65 |
+
out.pop("protocol", None)
|
66 |
+
return out
|
67 |
+
|
68 |
+
def mkdir(self, path, create_parents=True, mode=511):
|
69 |
+
logger.debug("Creating folder %s", path)
|
70 |
+
if self.exists(path):
|
71 |
+
raise FileExistsError(f"File exists: {path}")
|
72 |
+
|
73 |
+
if create_parents:
|
74 |
+
self.makedirs(path)
|
75 |
+
else:
|
76 |
+
self.ftp.mkdir(path, mode)
|
77 |
+
|
78 |
+
def makedirs(self, path, exist_ok=False, mode=511):
|
79 |
+
if self.exists(path) and not exist_ok:
|
80 |
+
raise FileExistsError(f"File exists: {path}")
|
81 |
+
|
82 |
+
parts = path.split("/")
|
83 |
+
new_path = "/" if path[:1] == "/" else ""
|
84 |
+
|
85 |
+
for part in parts:
|
86 |
+
if part:
|
87 |
+
new_path = f"{new_path}/{part}" if new_path else part
|
88 |
+
if not self.exists(new_path):
|
89 |
+
self.ftp.mkdir(new_path, mode)
|
90 |
+
|
91 |
+
def rmdir(self, path):
|
92 |
+
logger.debug("Removing folder %s", path)
|
93 |
+
self.ftp.rmdir(path)
|
94 |
+
|
95 |
+
def info(self, path):
|
96 |
+
stat = self._decode_stat(self.ftp.stat(path))
|
97 |
+
stat["name"] = path
|
98 |
+
return stat
|
99 |
+
|
100 |
+
@staticmethod
|
101 |
+
def _decode_stat(stat, parent_path=None):
|
102 |
+
if S_ISDIR(stat.st_mode):
|
103 |
+
t = "directory"
|
104 |
+
elif S_ISLNK(stat.st_mode):
|
105 |
+
t = "link"
|
106 |
+
else:
|
107 |
+
t = "file"
|
108 |
+
out = {
|
109 |
+
"name": "",
|
110 |
+
"size": stat.st_size,
|
111 |
+
"type": t,
|
112 |
+
"uid": stat.st_uid,
|
113 |
+
"gid": stat.st_gid,
|
114 |
+
"time": datetime.datetime.fromtimestamp(
|
115 |
+
stat.st_atime, tz=datetime.timezone.utc
|
116 |
+
),
|
117 |
+
"mtime": datetime.datetime.fromtimestamp(
|
118 |
+
stat.st_mtime, tz=datetime.timezone.utc
|
119 |
+
),
|
120 |
+
}
|
121 |
+
if parent_path:
|
122 |
+
out["name"] = "/".join([parent_path.rstrip("/"), stat.filename])
|
123 |
+
return out
|
124 |
+
|
125 |
+
def ls(self, path, detail=False):
|
126 |
+
logger.debug("Listing folder %s", path)
|
127 |
+
stats = [self._decode_stat(stat, path) for stat in self.ftp.listdir_iter(path)]
|
128 |
+
if detail:
|
129 |
+
return stats
|
130 |
+
else:
|
131 |
+
paths = [stat["name"] for stat in stats]
|
132 |
+
return sorted(paths)
|
133 |
+
|
134 |
+
def put(self, lpath, rpath, callback=None, **kwargs):
|
135 |
+
logger.debug("Put file %s into %s", lpath, rpath)
|
136 |
+
self.ftp.put(lpath, rpath)
|
137 |
+
|
138 |
+
def get_file(self, rpath, lpath, **kwargs):
|
139 |
+
if self.isdir(rpath):
|
140 |
+
os.makedirs(lpath, exist_ok=True)
|
141 |
+
else:
|
142 |
+
self.ftp.get(self._strip_protocol(rpath), lpath)
|
143 |
+
|
144 |
+
def _open(self, path, mode="rb", block_size=None, **kwargs):
|
145 |
+
"""
|
146 |
+
block_size: int or None
|
147 |
+
If 0, no buffering, if 1, line buffering, if >1, buffer that many
|
148 |
+
bytes, if None use default from paramiko.
|
149 |
+
"""
|
150 |
+
logger.debug("Opening file %s", path)
|
151 |
+
if kwargs.get("autocommit", True) is False:
|
152 |
+
# writes to temporary file, move on commit
|
153 |
+
path2 = "/".join([self.temppath, str(uuid.uuid4())])
|
154 |
+
f = self.ftp.open(path2, mode, bufsize=block_size if block_size else -1)
|
155 |
+
f.temppath = path2
|
156 |
+
f.targetpath = path
|
157 |
+
f.fs = self
|
158 |
+
f.commit = types.MethodType(commit_a_file, f)
|
159 |
+
f.discard = types.MethodType(discard_a_file, f)
|
160 |
+
else:
|
161 |
+
f = self.ftp.open(path, mode, bufsize=block_size if block_size else -1)
|
162 |
+
return f
|
163 |
+
|
164 |
+
def _rm(self, path):
|
165 |
+
if self.isdir(path):
|
166 |
+
self.ftp.rmdir(path)
|
167 |
+
else:
|
168 |
+
self.ftp.remove(path)
|
169 |
+
|
170 |
+
def mv(self, old, new):
|
171 |
+
logger.debug("Renaming %s into %s", old, new)
|
172 |
+
self.ftp.posix_rename(old, new)
|
173 |
+
|
174 |
+
|
175 |
+
def commit_a_file(self):
|
176 |
+
self.fs.mv(self.temppath, self.targetpath)
|
177 |
+
|
178 |
+
|
179 |
+
def discard_a_file(self):
|
180 |
+
self.fs._rm(self.temppath)
|
.venv/Lib/site-packages/fsspec/implementations/smb.py
ADDED
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This module contains SMBFileSystem class responsible for handling access to
|
3 |
+
Windows Samba network shares by using package smbprotocol
|
4 |
+
"""
|
5 |
+
|
6 |
+
import datetime
|
7 |
+
import uuid
|
8 |
+
from stat import S_ISDIR, S_ISLNK
|
9 |
+
|
10 |
+
import smbclient
|
11 |
+
|
12 |
+
from .. import AbstractFileSystem
|
13 |
+
from ..utils import infer_storage_options
|
14 |
+
|
15 |
+
# ! pylint: disable=bad-continuation
|
16 |
+
|
17 |
+
|
18 |
+
class SMBFileSystem(AbstractFileSystem):
|
19 |
+
"""Allow reading and writing to Windows and Samba network shares.
|
20 |
+
|
21 |
+
When using `fsspec.open()` for getting a file-like object the URI
|
22 |
+
should be specified as this format:
|
23 |
+
``smb://workgroup;user:password@server:port/share/folder/file.csv``.
|
24 |
+
|
25 |
+
Example::
|
26 |
+
|
27 |
+
>>> import fsspec
|
28 |
+
>>> with fsspec.open(
|
29 |
+
... 'smb://myuser:[email protected]/' 'share/folder/file.csv'
|
30 |
+
... ) as smbfile:
|
31 |
+
... df = pd.read_csv(smbfile, sep='|', header=None)
|
32 |
+
|
33 |
+
Note that you need to pass in a valid hostname or IP address for the host
|
34 |
+
component of the URL. Do not use the Windows/NetBIOS machine name for the
|
35 |
+
host component.
|
36 |
+
|
37 |
+
The first component of the path in the URL points to the name of the shared
|
38 |
+
folder. Subsequent path components will point to the directory/folder/file.
|
39 |
+
|
40 |
+
The URL components ``workgroup`` , ``user``, ``password`` and ``port`` may be
|
41 |
+
optional.
|
42 |
+
|
43 |
+
.. note::
|
44 |
+
|
45 |
+
For working this source require `smbprotocol`_ to be installed, e.g.::
|
46 |
+
|
47 |
+
$ pip install smbprotocol
|
48 |
+
# or
|
49 |
+
# pip install smbprotocol[kerberos]
|
50 |
+
|
51 |
+
.. _smbprotocol: https://github.com/jborean93/smbprotocol#requirements
|
52 |
+
|
53 |
+
Note: if using this with the ``open`` or ``open_files``, with full URLs,
|
54 |
+
there is no way to tell if a path is relative, so all paths are assumed
|
55 |
+
to be absolute.
|
56 |
+
"""
|
57 |
+
|
58 |
+
protocol = "smb"
|
59 |
+
|
60 |
+
# pylint: disable=too-many-arguments
|
61 |
+
def __init__(
|
62 |
+
self,
|
63 |
+
host,
|
64 |
+
port=None,
|
65 |
+
username=None,
|
66 |
+
password=None,
|
67 |
+
timeout=60,
|
68 |
+
encrypt=None,
|
69 |
+
share_access=None,
|
70 |
+
**kwargs,
|
71 |
+
):
|
72 |
+
"""
|
73 |
+
You can use _get_kwargs_from_urls to get some kwargs from
|
74 |
+
a reasonable SMB url.
|
75 |
+
|
76 |
+
Authentication will be anonymous or integrated if username/password are not
|
77 |
+
given.
|
78 |
+
|
79 |
+
Parameters
|
80 |
+
----------
|
81 |
+
host: str
|
82 |
+
The remote server name/ip to connect to
|
83 |
+
port: int or None
|
84 |
+
Port to connect with. Usually 445, sometimes 139.
|
85 |
+
username: str or None
|
86 |
+
Username to connect with. Required if Kerberos auth is not being used.
|
87 |
+
password: str or None
|
88 |
+
User's password on the server, if using username
|
89 |
+
timeout: int
|
90 |
+
Connection timeout in seconds
|
91 |
+
encrypt: bool
|
92 |
+
Whether to force encryption or not, once this has been set to True
|
93 |
+
the session cannot be changed back to False.
|
94 |
+
share_access: str or None
|
95 |
+
Specifies the default access applied to file open operations
|
96 |
+
performed with this file system object.
|
97 |
+
This affects whether other processes can concurrently open a handle
|
98 |
+
to the same file.
|
99 |
+
|
100 |
+
- None (the default): exclusively locks the file until closed.
|
101 |
+
- 'r': Allow other handles to be opened with read access.
|
102 |
+
- 'w': Allow other handles to be opened with write access.
|
103 |
+
- 'd': Allow other handles to be opened with delete access.
|
104 |
+
"""
|
105 |
+
super().__init__(**kwargs)
|
106 |
+
self.host = host
|
107 |
+
self.port = port
|
108 |
+
self.username = username
|
109 |
+
self.password = password
|
110 |
+
self.timeout = timeout
|
111 |
+
self.encrypt = encrypt
|
112 |
+
self.temppath = kwargs.pop("temppath", "")
|
113 |
+
self.share_access = share_access
|
114 |
+
self._connect()
|
115 |
+
|
116 |
+
@property
|
117 |
+
def _port(self):
|
118 |
+
return 445 if self.port is None else self.port
|
119 |
+
|
120 |
+
def _connect(self):
|
121 |
+
smbclient.register_session(
|
122 |
+
self.host,
|
123 |
+
username=self.username,
|
124 |
+
password=self.password,
|
125 |
+
port=self._port,
|
126 |
+
encrypt=self.encrypt,
|
127 |
+
connection_timeout=self.timeout,
|
128 |
+
)
|
129 |
+
|
130 |
+
@classmethod
|
131 |
+
def _strip_protocol(cls, path):
|
132 |
+
return infer_storage_options(path)["path"]
|
133 |
+
|
134 |
+
@staticmethod
|
135 |
+
def _get_kwargs_from_urls(path):
|
136 |
+
# smb://workgroup;user:password@host:port/share/folder/file.csv
|
137 |
+
out = infer_storage_options(path)
|
138 |
+
out.pop("path", None)
|
139 |
+
out.pop("protocol", None)
|
140 |
+
return out
|
141 |
+
|
142 |
+
def mkdir(self, path, create_parents=True, **kwargs):
|
143 |
+
wpath = _as_unc_path(self.host, path)
|
144 |
+
if create_parents:
|
145 |
+
smbclient.makedirs(wpath, exist_ok=False, port=self._port, **kwargs)
|
146 |
+
else:
|
147 |
+
smbclient.mkdir(wpath, port=self._port, **kwargs)
|
148 |
+
|
149 |
+
def makedirs(self, path, exist_ok=False):
|
150 |
+
if _share_has_path(path):
|
151 |
+
wpath = _as_unc_path(self.host, path)
|
152 |
+
smbclient.makedirs(wpath, exist_ok=exist_ok, port=self._port)
|
153 |
+
|
154 |
+
def rmdir(self, path):
|
155 |
+
if _share_has_path(path):
|
156 |
+
wpath = _as_unc_path(self.host, path)
|
157 |
+
smbclient.rmdir(wpath, port=self._port)
|
158 |
+
|
159 |
+
def info(self, path, **kwargs):
|
160 |
+
wpath = _as_unc_path(self.host, path)
|
161 |
+
stats = smbclient.stat(wpath, port=self._port, **kwargs)
|
162 |
+
if S_ISDIR(stats.st_mode):
|
163 |
+
stype = "directory"
|
164 |
+
elif S_ISLNK(stats.st_mode):
|
165 |
+
stype = "link"
|
166 |
+
else:
|
167 |
+
stype = "file"
|
168 |
+
res = {
|
169 |
+
"name": path + "/" if stype == "directory" else path,
|
170 |
+
"size": stats.st_size,
|
171 |
+
"type": stype,
|
172 |
+
"uid": stats.st_uid,
|
173 |
+
"gid": stats.st_gid,
|
174 |
+
"time": stats.st_atime,
|
175 |
+
"mtime": stats.st_mtime,
|
176 |
+
}
|
177 |
+
return res
|
178 |
+
|
179 |
+
def created(self, path):
|
180 |
+
"""Return the created timestamp of a file as a datetime.datetime"""
|
181 |
+
wpath = _as_unc_path(self.host, path)
|
182 |
+
stats = smbclient.stat(wpath, port=self._port)
|
183 |
+
return datetime.datetime.fromtimestamp(stats.st_ctime, tz=datetime.timezone.utc)
|
184 |
+
|
185 |
+
def modified(self, path):
|
186 |
+
"""Return the modified timestamp of a file as a datetime.datetime"""
|
187 |
+
wpath = _as_unc_path(self.host, path)
|
188 |
+
stats = smbclient.stat(wpath, port=self._port)
|
189 |
+
return datetime.datetime.fromtimestamp(stats.st_mtime, tz=datetime.timezone.utc)
|
190 |
+
|
191 |
+
def ls(self, path, detail=True, **kwargs):
|
192 |
+
unc = _as_unc_path(self.host, path)
|
193 |
+
listed = smbclient.listdir(unc, port=self._port, **kwargs)
|
194 |
+
dirs = ["/".join([path.rstrip("/"), p]) for p in listed]
|
195 |
+
if detail:
|
196 |
+
dirs = [self.info(d) for d in dirs]
|
197 |
+
return dirs
|
198 |
+
|
199 |
+
# pylint: disable=too-many-arguments
|
200 |
+
def _open(
|
201 |
+
self,
|
202 |
+
path,
|
203 |
+
mode="rb",
|
204 |
+
block_size=-1,
|
205 |
+
autocommit=True,
|
206 |
+
cache_options=None,
|
207 |
+
**kwargs,
|
208 |
+
):
|
209 |
+
"""
|
210 |
+
block_size: int or None
|
211 |
+
If 0, no buffering, 1, line buffering, >1, buffer that many bytes
|
212 |
+
|
213 |
+
Notes
|
214 |
+
-----
|
215 |
+
By specifying 'share_access' in 'kwargs' it is possible to override the
|
216 |
+
default shared access setting applied in the constructor of this object.
|
217 |
+
"""
|
218 |
+
bls = block_size if block_size is not None and block_size >= 0 else -1
|
219 |
+
wpath = _as_unc_path(self.host, path)
|
220 |
+
share_access = kwargs.pop("share_access", self.share_access)
|
221 |
+
if "w" in mode and autocommit is False:
|
222 |
+
temp = _as_temp_path(self.host, path, self.temppath)
|
223 |
+
return SMBFileOpener(
|
224 |
+
wpath, temp, mode, port=self._port, block_size=bls, **kwargs
|
225 |
+
)
|
226 |
+
return smbclient.open_file(
|
227 |
+
wpath,
|
228 |
+
mode,
|
229 |
+
buffering=bls,
|
230 |
+
share_access=share_access,
|
231 |
+
port=self._port,
|
232 |
+
**kwargs,
|
233 |
+
)
|
234 |
+
|
235 |
+
def copy(self, path1, path2, **kwargs):
|
236 |
+
"""Copy within two locations in the same filesystem"""
|
237 |
+
wpath1 = _as_unc_path(self.host, path1)
|
238 |
+
wpath2 = _as_unc_path(self.host, path2)
|
239 |
+
smbclient.copyfile(wpath1, wpath2, port=self._port, **kwargs)
|
240 |
+
|
241 |
+
def _rm(self, path):
|
242 |
+
if _share_has_path(path):
|
243 |
+
wpath = _as_unc_path(self.host, path)
|
244 |
+
stats = smbclient.stat(wpath, port=self._port)
|
245 |
+
if S_ISDIR(stats.st_mode):
|
246 |
+
smbclient.rmdir(wpath, port=self._port)
|
247 |
+
else:
|
248 |
+
smbclient.remove(wpath, port=self._port)
|
249 |
+
|
250 |
+
def mv(self, path1, path2, recursive=None, maxdepth=None, **kwargs):
|
251 |
+
wpath1 = _as_unc_path(self.host, path1)
|
252 |
+
wpath2 = _as_unc_path(self.host, path2)
|
253 |
+
smbclient.rename(wpath1, wpath2, port=self._port, **kwargs)
|
254 |
+
|
255 |
+
|
256 |
+
def _as_unc_path(host, path):
|
257 |
+
rpath = path.replace("/", "\\")
|
258 |
+
unc = f"\\\\{host}{rpath}"
|
259 |
+
return unc
|
260 |
+
|
261 |
+
|
262 |
+
def _as_temp_path(host, path, temppath):
|
263 |
+
share = path.split("/")[1]
|
264 |
+
temp_file = f"/{share}{temppath}/{uuid.uuid4()}"
|
265 |
+
unc = _as_unc_path(host, temp_file)
|
266 |
+
return unc
|
267 |
+
|
268 |
+
|
269 |
+
def _share_has_path(path):
|
270 |
+
parts = path.count("/")
|
271 |
+
if path.endswith("/"):
|
272 |
+
return parts > 2
|
273 |
+
return parts > 1
|
274 |
+
|
275 |
+
|
276 |
+
class SMBFileOpener:
|
277 |
+
"""writes to remote temporary file, move on commit"""
|
278 |
+
|
279 |
+
def __init__(self, path, temp, mode, port=445, block_size=-1, **kwargs):
|
280 |
+
self.path = path
|
281 |
+
self.temp = temp
|
282 |
+
self.mode = mode
|
283 |
+
self.block_size = block_size
|
284 |
+
self.kwargs = kwargs
|
285 |
+
self.smbfile = None
|
286 |
+
self._incontext = False
|
287 |
+
self.port = port
|
288 |
+
self._open()
|
289 |
+
|
290 |
+
def _open(self):
|
291 |
+
if self.smbfile is None or self.smbfile.closed:
|
292 |
+
self.smbfile = smbclient.open_file(
|
293 |
+
self.temp,
|
294 |
+
self.mode,
|
295 |
+
port=self.port,
|
296 |
+
buffering=self.block_size,
|
297 |
+
**self.kwargs,
|
298 |
+
)
|
299 |
+
|
300 |
+
def commit(self):
|
301 |
+
"""Move temp file to definitive on success."""
|
302 |
+
# TODO: use transaction support in SMB protocol
|
303 |
+
smbclient.replace(self.temp, self.path, port=self.port)
|
304 |
+
|
305 |
+
def discard(self):
|
306 |
+
"""Remove the temp file on failure."""
|
307 |
+
smbclient.remove(self.temp, port=self.port)
|
308 |
+
|
309 |
+
def __fspath__(self):
|
310 |
+
return self.path
|
311 |
+
|
312 |
+
def __iter__(self):
|
313 |
+
return self.smbfile.__iter__()
|
314 |
+
|
315 |
+
def __getattr__(self, item):
|
316 |
+
return getattr(self.smbfile, item)
|
317 |
+
|
318 |
+
def __enter__(self):
|
319 |
+
self._incontext = True
|
320 |
+
return self.smbfile.__enter__()
|
321 |
+
|
322 |
+
def __exit__(self, exc_type, exc_value, traceback):
|
323 |
+
self._incontext = False
|
324 |
+
self.smbfile.__exit__(exc_type, exc_value, traceback)
|
.venv/Lib/site-packages/fsspec/implementations/tar.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import tarfile
|
3 |
+
|
4 |
+
import fsspec
|
5 |
+
from fsspec.archive import AbstractArchiveFileSystem
|
6 |
+
from fsspec.compression import compr
|
7 |
+
from fsspec.utils import infer_compression
|
8 |
+
|
9 |
+
typemap = {b"0": "file", b"5": "directory"}
|
10 |
+
|
11 |
+
logger = logging.getLogger("tar")
|
12 |
+
|
13 |
+
|
14 |
+
class TarFileSystem(AbstractArchiveFileSystem):
|
15 |
+
"""Compressed Tar archives as a file-system (read-only)
|
16 |
+
|
17 |
+
Supports the following formats:
|
18 |
+
tar.gz, tar.bz2, tar.xz
|
19 |
+
"""
|
20 |
+
|
21 |
+
root_marker = ""
|
22 |
+
protocol = "tar"
|
23 |
+
cachable = False
|
24 |
+
|
25 |
+
def __init__(
|
26 |
+
self,
|
27 |
+
fo="",
|
28 |
+
index_store=None,
|
29 |
+
target_options=None,
|
30 |
+
target_protocol=None,
|
31 |
+
compression=None,
|
32 |
+
**kwargs,
|
33 |
+
):
|
34 |
+
super().__init__(**kwargs)
|
35 |
+
target_options = target_options or {}
|
36 |
+
|
37 |
+
if isinstance(fo, str):
|
38 |
+
self.of = fsspec.open(fo, protocol=target_protocol, **target_options)
|
39 |
+
fo = self.of.open() # keep the reference
|
40 |
+
|
41 |
+
# Try to infer compression.
|
42 |
+
if compression is None:
|
43 |
+
name = None
|
44 |
+
|
45 |
+
# Try different ways to get hold of the filename. `fo` might either
|
46 |
+
# be a `fsspec.LocalFileOpener`, an `io.BufferedReader` or an
|
47 |
+
# `fsspec.AbstractFileSystem` instance.
|
48 |
+
try:
|
49 |
+
# Amended io.BufferedReader or similar.
|
50 |
+
# This uses a "protocol extension" where original filenames are
|
51 |
+
# propagated to archive-like filesystems in order to let them
|
52 |
+
# infer the right compression appropriately.
|
53 |
+
if hasattr(fo, "original"):
|
54 |
+
name = fo.original
|
55 |
+
|
56 |
+
# fsspec.LocalFileOpener
|
57 |
+
elif hasattr(fo, "path"):
|
58 |
+
name = fo.path
|
59 |
+
|
60 |
+
# io.BufferedReader
|
61 |
+
elif hasattr(fo, "name"):
|
62 |
+
name = fo.name
|
63 |
+
|
64 |
+
# fsspec.AbstractFileSystem
|
65 |
+
elif hasattr(fo, "info"):
|
66 |
+
name = fo.info()["name"]
|
67 |
+
|
68 |
+
except Exception as ex:
|
69 |
+
logger.warning(
|
70 |
+
f"Unable to determine file name, not inferring compression: {ex}"
|
71 |
+
)
|
72 |
+
|
73 |
+
if name is not None:
|
74 |
+
compression = infer_compression(name)
|
75 |
+
logger.info(f"Inferred compression {compression} from file name {name}")
|
76 |
+
|
77 |
+
if compression is not None:
|
78 |
+
# TODO: tarfile already implements compression with modes like "'r:gz'",
|
79 |
+
# but then would seek to offset in the file work?
|
80 |
+
fo = compr[compression](fo)
|
81 |
+
|
82 |
+
self._fo_ref = fo
|
83 |
+
self.fo = fo # the whole instance is a context
|
84 |
+
self.tar = tarfile.TarFile(fileobj=self.fo)
|
85 |
+
self.dir_cache = None
|
86 |
+
|
87 |
+
self.index_store = index_store
|
88 |
+
self.index = None
|
89 |
+
self._index()
|
90 |
+
|
91 |
+
def _index(self):
|
92 |
+
# TODO: load and set saved index, if exists
|
93 |
+
out = {}
|
94 |
+
for ti in self.tar:
|
95 |
+
info = ti.get_info()
|
96 |
+
info["type"] = typemap.get(info["type"], "file")
|
97 |
+
name = ti.get_info()["name"].rstrip("/")
|
98 |
+
out[name] = (info, ti.offset_data)
|
99 |
+
|
100 |
+
self.index = out
|
101 |
+
# TODO: save index to self.index_store here, if set
|
102 |
+
|
103 |
+
def _get_dirs(self):
|
104 |
+
if self.dir_cache is not None:
|
105 |
+
return
|
106 |
+
|
107 |
+
# This enables ls to get directories as children as well as files
|
108 |
+
self.dir_cache = {
|
109 |
+
dirname: {"name": dirname, "size": 0, "type": "directory"}
|
110 |
+
for dirname in self._all_dirnames(self.tar.getnames())
|
111 |
+
}
|
112 |
+
for member in self.tar.getmembers():
|
113 |
+
info = member.get_info()
|
114 |
+
info["name"] = info["name"].rstrip("/")
|
115 |
+
info["type"] = typemap.get(info["type"], "file")
|
116 |
+
self.dir_cache[info["name"]] = info
|
117 |
+
|
118 |
+
def _open(self, path, mode="rb", **kwargs):
|
119 |
+
if mode != "rb":
|
120 |
+
raise ValueError("Read-only filesystem implementation")
|
121 |
+
details, offset = self.index[path]
|
122 |
+
if details["type"] != "file":
|
123 |
+
raise ValueError("Can only handle regular files")
|
124 |
+
return self.tar.extractfile(path)
|
.venv/Lib/site-packages/fsspec/implementations/webhdfs.py
ADDED
@@ -0,0 +1,486 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://hadoop.apache.org/docs/r1.0.4/webhdfs.html
|
2 |
+
|
3 |
+
import logging
|
4 |
+
import os
|
5 |
+
import secrets
|
6 |
+
import shutil
|
7 |
+
import tempfile
|
8 |
+
import uuid
|
9 |
+
from contextlib import suppress
|
10 |
+
from urllib.parse import quote
|
11 |
+
|
12 |
+
import requests
|
13 |
+
|
14 |
+
from ..spec import AbstractBufferedFile, AbstractFileSystem
|
15 |
+
from ..utils import infer_storage_options, tokenize
|
16 |
+
|
17 |
+
logger = logging.getLogger("webhdfs")
|
18 |
+
|
19 |
+
|
20 |
+
class WebHDFS(AbstractFileSystem):
|
21 |
+
"""
|
22 |
+
Interface to HDFS over HTTP using the WebHDFS API. Supports also HttpFS gateways.
|
23 |
+
|
24 |
+
Four auth mechanisms are supported:
|
25 |
+
|
26 |
+
insecure: no auth is done, and the user is assumed to be whoever they
|
27 |
+
say they are (parameter ``user``), or a predefined value such as
|
28 |
+
"dr.who" if not given
|
29 |
+
spnego: when kerberos authentication is enabled, auth is negotiated by
|
30 |
+
requests_kerberos https://github.com/requests/requests-kerberos .
|
31 |
+
This establishes a session based on existing kinit login and/or
|
32 |
+
specified principal/password; parameters are passed with ``kerb_kwargs``
|
33 |
+
token: uses an existing Hadoop delegation token from another secured
|
34 |
+
service. Indeed, this client can also generate such tokens when
|
35 |
+
not insecure. Note that tokens expire, but can be renewed (by a
|
36 |
+
previously specified user) and may allow for proxying.
|
37 |
+
basic-auth: used when both parameter ``user`` and parameter ``password``
|
38 |
+
are provided.
|
39 |
+
|
40 |
+
"""
|
41 |
+
|
42 |
+
tempdir = str(tempfile.gettempdir())
|
43 |
+
protocol = "webhdfs", "webHDFS"
|
44 |
+
|
45 |
+
def __init__(
|
46 |
+
self,
|
47 |
+
host,
|
48 |
+
port=50070,
|
49 |
+
kerberos=False,
|
50 |
+
token=None,
|
51 |
+
user=None,
|
52 |
+
password=None,
|
53 |
+
proxy_to=None,
|
54 |
+
kerb_kwargs=None,
|
55 |
+
data_proxy=None,
|
56 |
+
use_https=False,
|
57 |
+
session_cert=None,
|
58 |
+
session_verify=True,
|
59 |
+
**kwargs,
|
60 |
+
):
|
61 |
+
"""
|
62 |
+
Parameters
|
63 |
+
----------
|
64 |
+
host: str
|
65 |
+
Name-node address
|
66 |
+
port: int
|
67 |
+
Port for webHDFS
|
68 |
+
kerberos: bool
|
69 |
+
Whether to authenticate with kerberos for this connection
|
70 |
+
token: str or None
|
71 |
+
If given, use this token on every call to authenticate. A user
|
72 |
+
and user-proxy may be encoded in the token and should not be also
|
73 |
+
given
|
74 |
+
user: str or None
|
75 |
+
If given, assert the user name to connect with
|
76 |
+
password: str or None
|
77 |
+
If given, assert the password to use for basic auth. If password
|
78 |
+
is provided, user must be provided also
|
79 |
+
proxy_to: str or None
|
80 |
+
If given, the user has the authority to proxy, and this value is
|
81 |
+
the user in who's name actions are taken
|
82 |
+
kerb_kwargs: dict
|
83 |
+
Any extra arguments for HTTPKerberosAuth, see
|
84 |
+
`<https://github.com/requests/requests-kerberos/blob/master/requests_kerberos/kerberos_.py>`_
|
85 |
+
data_proxy: dict, callable or None
|
86 |
+
If given, map data-node addresses. This can be necessary if the
|
87 |
+
HDFS cluster is behind a proxy, running on Docker or otherwise has
|
88 |
+
a mismatch between the host-names given by the name-node and the
|
89 |
+
address by which to refer to them from the client. If a dict,
|
90 |
+
maps host names ``host->data_proxy[host]``; if a callable, full
|
91 |
+
URLs are passed, and function must conform to
|
92 |
+
``url->data_proxy(url)``.
|
93 |
+
use_https: bool
|
94 |
+
Whether to connect to the Name-node using HTTPS instead of HTTP
|
95 |
+
session_cert: str or Tuple[str, str] or None
|
96 |
+
Path to a certificate file, or tuple of (cert, key) files to use
|
97 |
+
for the requests.Session
|
98 |
+
session_verify: str, bool or None
|
99 |
+
Path to a certificate file to use for verifying the requests.Session.
|
100 |
+
kwargs
|
101 |
+
"""
|
102 |
+
if self._cached:
|
103 |
+
return
|
104 |
+
super().__init__(**kwargs)
|
105 |
+
self.url = (
|
106 |
+
f"{'https' if use_https else 'http'}://{host}:{port}/webhdfs/v1" # noqa
|
107 |
+
)
|
108 |
+
self.kerb = kerberos
|
109 |
+
self.kerb_kwargs = kerb_kwargs or {}
|
110 |
+
self.pars = {}
|
111 |
+
self.proxy = data_proxy or {}
|
112 |
+
if token is not None:
|
113 |
+
if user is not None or proxy_to is not None:
|
114 |
+
raise ValueError(
|
115 |
+
"If passing a delegation token, must not set "
|
116 |
+
"user or proxy_to, as these are encoded in the"
|
117 |
+
" token"
|
118 |
+
)
|
119 |
+
self.pars["delegation"] = token
|
120 |
+
self.user = user
|
121 |
+
self.password = password
|
122 |
+
|
123 |
+
if password is not None:
|
124 |
+
if user is None:
|
125 |
+
raise ValueError(
|
126 |
+
"If passing a password, the user must also be"
|
127 |
+
"set in order to set up the basic-auth"
|
128 |
+
)
|
129 |
+
else:
|
130 |
+
if user is not None:
|
131 |
+
self.pars["user.name"] = user
|
132 |
+
|
133 |
+
if proxy_to is not None:
|
134 |
+
self.pars["doas"] = proxy_to
|
135 |
+
if kerberos and user is not None:
|
136 |
+
raise ValueError(
|
137 |
+
"If using Kerberos auth, do not specify the "
|
138 |
+
"user, this is handled by kinit."
|
139 |
+
)
|
140 |
+
|
141 |
+
self.session_cert = session_cert
|
142 |
+
self.session_verify = session_verify
|
143 |
+
|
144 |
+
self._connect()
|
145 |
+
|
146 |
+
self._fsid = f"webhdfs_{tokenize(host, port)}"
|
147 |
+
|
148 |
+
@property
|
149 |
+
def fsid(self):
|
150 |
+
return self._fsid
|
151 |
+
|
152 |
+
def _connect(self):
|
153 |
+
self.session = requests.Session()
|
154 |
+
|
155 |
+
if self.session_cert:
|
156 |
+
self.session.cert = self.session_cert
|
157 |
+
|
158 |
+
self.session.verify = self.session_verify
|
159 |
+
|
160 |
+
if self.kerb:
|
161 |
+
from requests_kerberos import HTTPKerberosAuth
|
162 |
+
|
163 |
+
self.session.auth = HTTPKerberosAuth(**self.kerb_kwargs)
|
164 |
+
|
165 |
+
if self.user is not None and self.password is not None:
|
166 |
+
from requests.auth import HTTPBasicAuth
|
167 |
+
|
168 |
+
self.session.auth = HTTPBasicAuth(self.user, self.password)
|
169 |
+
|
170 |
+
def _call(self, op, method="get", path=None, data=None, redirect=True, **kwargs):
|
171 |
+
url = self._apply_proxy(self.url + quote(path or "", safe="/="))
|
172 |
+
args = kwargs.copy()
|
173 |
+
args.update(self.pars)
|
174 |
+
args["op"] = op.upper()
|
175 |
+
logger.debug("sending %s with %s", url, method)
|
176 |
+
out = self.session.request(
|
177 |
+
method=method.upper(),
|
178 |
+
url=url,
|
179 |
+
params=args,
|
180 |
+
data=data,
|
181 |
+
allow_redirects=redirect,
|
182 |
+
)
|
183 |
+
if out.status_code in [400, 401, 403, 404, 500]:
|
184 |
+
try:
|
185 |
+
err = out.json()
|
186 |
+
msg = err["RemoteException"]["message"]
|
187 |
+
exp = err["RemoteException"]["exception"]
|
188 |
+
except (ValueError, KeyError):
|
189 |
+
pass
|
190 |
+
else:
|
191 |
+
if exp in ["IllegalArgumentException", "UnsupportedOperationException"]:
|
192 |
+
raise ValueError(msg)
|
193 |
+
elif exp in ["SecurityException", "AccessControlException"]:
|
194 |
+
raise PermissionError(msg)
|
195 |
+
elif exp in ["FileNotFoundException"]:
|
196 |
+
raise FileNotFoundError(msg)
|
197 |
+
else:
|
198 |
+
raise RuntimeError(msg)
|
199 |
+
out.raise_for_status()
|
200 |
+
return out
|
201 |
+
|
202 |
+
def _open(
|
203 |
+
self,
|
204 |
+
path,
|
205 |
+
mode="rb",
|
206 |
+
block_size=None,
|
207 |
+
autocommit=True,
|
208 |
+
replication=None,
|
209 |
+
permissions=None,
|
210 |
+
**kwargs,
|
211 |
+
):
|
212 |
+
"""
|
213 |
+
|
214 |
+
Parameters
|
215 |
+
----------
|
216 |
+
path: str
|
217 |
+
File location
|
218 |
+
mode: str
|
219 |
+
'rb', 'wb', etc.
|
220 |
+
block_size: int
|
221 |
+
Client buffer size for read-ahead or write buffer
|
222 |
+
autocommit: bool
|
223 |
+
If False, writes to temporary file that only gets put in final
|
224 |
+
location upon commit
|
225 |
+
replication: int
|
226 |
+
Number of copies of file on the cluster, write mode only
|
227 |
+
permissions: str or int
|
228 |
+
posix permissions, write mode only
|
229 |
+
kwargs
|
230 |
+
|
231 |
+
Returns
|
232 |
+
-------
|
233 |
+
WebHDFile instance
|
234 |
+
"""
|
235 |
+
block_size = block_size or self.blocksize
|
236 |
+
return WebHDFile(
|
237 |
+
self,
|
238 |
+
path,
|
239 |
+
mode=mode,
|
240 |
+
block_size=block_size,
|
241 |
+
tempdir=self.tempdir,
|
242 |
+
autocommit=autocommit,
|
243 |
+
replication=replication,
|
244 |
+
permissions=permissions,
|
245 |
+
)
|
246 |
+
|
247 |
+
@staticmethod
|
248 |
+
def _process_info(info):
|
249 |
+
info["type"] = info["type"].lower()
|
250 |
+
info["size"] = info["length"]
|
251 |
+
return info
|
252 |
+
|
253 |
+
@classmethod
|
254 |
+
def _strip_protocol(cls, path):
|
255 |
+
return infer_storage_options(path)["path"]
|
256 |
+
|
257 |
+
@staticmethod
|
258 |
+
def _get_kwargs_from_urls(urlpath):
|
259 |
+
out = infer_storage_options(urlpath)
|
260 |
+
out.pop("path", None)
|
261 |
+
out.pop("protocol", None)
|
262 |
+
if "username" in out:
|
263 |
+
out["user"] = out.pop("username")
|
264 |
+
return out
|
265 |
+
|
266 |
+
def info(self, path):
|
267 |
+
out = self._call("GETFILESTATUS", path=path)
|
268 |
+
info = out.json()["FileStatus"]
|
269 |
+
info["name"] = path
|
270 |
+
return self._process_info(info)
|
271 |
+
|
272 |
+
def ls(self, path, detail=False):
|
273 |
+
out = self._call("LISTSTATUS", path=path)
|
274 |
+
infos = out.json()["FileStatuses"]["FileStatus"]
|
275 |
+
for info in infos:
|
276 |
+
self._process_info(info)
|
277 |
+
info["name"] = path.rstrip("/") + "/" + info["pathSuffix"]
|
278 |
+
if detail:
|
279 |
+
return sorted(infos, key=lambda i: i["name"])
|
280 |
+
else:
|
281 |
+
return sorted(info["name"] for info in infos)
|
282 |
+
|
283 |
+
def content_summary(self, path):
|
284 |
+
"""Total numbers of files, directories and bytes under path"""
|
285 |
+
out = self._call("GETCONTENTSUMMARY", path=path)
|
286 |
+
return out.json()["ContentSummary"]
|
287 |
+
|
288 |
+
def ukey(self, path):
|
289 |
+
"""Checksum info of file, giving method and result"""
|
290 |
+
out = self._call("GETFILECHECKSUM", path=path, redirect=False)
|
291 |
+
if "Location" in out.headers:
|
292 |
+
location = self._apply_proxy(out.headers["Location"])
|
293 |
+
out2 = self.session.get(location)
|
294 |
+
out2.raise_for_status()
|
295 |
+
return out2.json()["FileChecksum"]
|
296 |
+
else:
|
297 |
+
out.raise_for_status()
|
298 |
+
return out.json()["FileChecksum"]
|
299 |
+
|
300 |
+
def home_directory(self):
|
301 |
+
"""Get user's home directory"""
|
302 |
+
out = self._call("GETHOMEDIRECTORY")
|
303 |
+
return out.json()["Path"]
|
304 |
+
|
305 |
+
def get_delegation_token(self, renewer=None):
|
306 |
+
"""Retrieve token which can give the same authority to other uses
|
307 |
+
|
308 |
+
Parameters
|
309 |
+
----------
|
310 |
+
renewer: str or None
|
311 |
+
User who may use this token; if None, will be current user
|
312 |
+
"""
|
313 |
+
if renewer:
|
314 |
+
out = self._call("GETDELEGATIONTOKEN", renewer=renewer)
|
315 |
+
else:
|
316 |
+
out = self._call("GETDELEGATIONTOKEN")
|
317 |
+
t = out.json()["Token"]
|
318 |
+
if t is None:
|
319 |
+
raise ValueError("No token available for this user/security context")
|
320 |
+
return t["urlString"]
|
321 |
+
|
322 |
+
def renew_delegation_token(self, token):
|
323 |
+
"""Make token live longer. Returns new expiry time"""
|
324 |
+
out = self._call("RENEWDELEGATIONTOKEN", method="put", token=token)
|
325 |
+
return out.json()["long"]
|
326 |
+
|
327 |
+
def cancel_delegation_token(self, token):
|
328 |
+
"""Stop the token from being useful"""
|
329 |
+
self._call("CANCELDELEGATIONTOKEN", method="put", token=token)
|
330 |
+
|
331 |
+
def chmod(self, path, mod):
|
332 |
+
"""Set the permission at path
|
333 |
+
|
334 |
+
Parameters
|
335 |
+
----------
|
336 |
+
path: str
|
337 |
+
location to set (file or directory)
|
338 |
+
mod: str or int
|
339 |
+
posix epresentation or permission, give as oct string, e.g, '777'
|
340 |
+
or 0o777
|
341 |
+
"""
|
342 |
+
self._call("SETPERMISSION", method="put", path=path, permission=mod)
|
343 |
+
|
344 |
+
def chown(self, path, owner=None, group=None):
|
345 |
+
"""Change owning user and/or group"""
|
346 |
+
kwargs = {}
|
347 |
+
if owner is not None:
|
348 |
+
kwargs["owner"] = owner
|
349 |
+
if group is not None:
|
350 |
+
kwargs["group"] = group
|
351 |
+
self._call("SETOWNER", method="put", path=path, **kwargs)
|
352 |
+
|
353 |
+
def set_replication(self, path, replication):
|
354 |
+
"""
|
355 |
+
Set file replication factor
|
356 |
+
|
357 |
+
Parameters
|
358 |
+
----------
|
359 |
+
path: str
|
360 |
+
File location (not for directories)
|
361 |
+
replication: int
|
362 |
+
Number of copies of file on the cluster. Should be smaller than
|
363 |
+
number of data nodes; normally 3 on most systems.
|
364 |
+
"""
|
365 |
+
self._call("SETREPLICATION", path=path, method="put", replication=replication)
|
366 |
+
|
367 |
+
def mkdir(self, path, **kwargs):
|
368 |
+
self._call("MKDIRS", method="put", path=path)
|
369 |
+
|
370 |
+
def makedirs(self, path, exist_ok=False):
|
371 |
+
if exist_ok is False and self.exists(path):
|
372 |
+
raise FileExistsError(path)
|
373 |
+
self.mkdir(path)
|
374 |
+
|
375 |
+
def mv(self, path1, path2, **kwargs):
|
376 |
+
self._call("RENAME", method="put", path=path1, destination=path2)
|
377 |
+
|
378 |
+
def rm(self, path, recursive=False, **kwargs):
|
379 |
+
self._call(
|
380 |
+
"DELETE",
|
381 |
+
method="delete",
|
382 |
+
path=path,
|
383 |
+
recursive="true" if recursive else "false",
|
384 |
+
)
|
385 |
+
|
386 |
+
def rm_file(self, path, **kwargs):
|
387 |
+
self.rm(path)
|
388 |
+
|
389 |
+
def cp_file(self, lpath, rpath, **kwargs):
|
390 |
+
with self.open(lpath) as lstream:
|
391 |
+
tmp_fname = "/".join([self._parent(rpath), f".tmp.{secrets.token_hex(16)}"])
|
392 |
+
# Perform an atomic copy (stream to a temporary file and
|
393 |
+
# move it to the actual destination).
|
394 |
+
try:
|
395 |
+
with self.open(tmp_fname, "wb") as rstream:
|
396 |
+
shutil.copyfileobj(lstream, rstream)
|
397 |
+
self.mv(tmp_fname, rpath)
|
398 |
+
except BaseException: # noqa
|
399 |
+
with suppress(FileNotFoundError):
|
400 |
+
self.rm(tmp_fname)
|
401 |
+
raise
|
402 |
+
|
403 |
+
def _apply_proxy(self, location):
|
404 |
+
if self.proxy and callable(self.proxy):
|
405 |
+
location = self.proxy(location)
|
406 |
+
elif self.proxy:
|
407 |
+
# as a dict
|
408 |
+
for k, v in self.proxy.items():
|
409 |
+
location = location.replace(k, v, 1)
|
410 |
+
return location
|
411 |
+
|
412 |
+
|
413 |
+
class WebHDFile(AbstractBufferedFile):
|
414 |
+
"""A file living in HDFS over webHDFS"""
|
415 |
+
|
416 |
+
def __init__(self, fs, path, **kwargs):
|
417 |
+
super().__init__(fs, path, **kwargs)
|
418 |
+
kwargs = kwargs.copy()
|
419 |
+
if kwargs.get("permissions", None) is None:
|
420 |
+
kwargs.pop("permissions", None)
|
421 |
+
if kwargs.get("replication", None) is None:
|
422 |
+
kwargs.pop("replication", None)
|
423 |
+
self.permissions = kwargs.pop("permissions", 511)
|
424 |
+
tempdir = kwargs.pop("tempdir")
|
425 |
+
if kwargs.pop("autocommit", False) is False:
|
426 |
+
self.target = self.path
|
427 |
+
self.path = os.path.join(tempdir, str(uuid.uuid4()))
|
428 |
+
|
429 |
+
def _upload_chunk(self, final=False):
|
430 |
+
"""Write one part of a multi-block file upload
|
431 |
+
|
432 |
+
Parameters
|
433 |
+
==========
|
434 |
+
final: bool
|
435 |
+
This is the last block, so should complete file, if
|
436 |
+
self.autocommit is True.
|
437 |
+
"""
|
438 |
+
out = self.fs.session.post(
|
439 |
+
self.location,
|
440 |
+
data=self.buffer.getvalue(),
|
441 |
+
headers={"content-type": "application/octet-stream"},
|
442 |
+
)
|
443 |
+
out.raise_for_status()
|
444 |
+
return True
|
445 |
+
|
446 |
+
def _initiate_upload(self):
|
447 |
+
"""Create remote file/upload"""
|
448 |
+
kwargs = self.kwargs.copy()
|
449 |
+
if "a" in self.mode:
|
450 |
+
op, method = "APPEND", "POST"
|
451 |
+
else:
|
452 |
+
op, method = "CREATE", "PUT"
|
453 |
+
kwargs["overwrite"] = "true"
|
454 |
+
out = self.fs._call(op, method, self.path, redirect=False, **kwargs)
|
455 |
+
location = self.fs._apply_proxy(out.headers["Location"])
|
456 |
+
if "w" in self.mode:
|
457 |
+
# create empty file to append to
|
458 |
+
out2 = self.fs.session.put(
|
459 |
+
location, headers={"content-type": "application/octet-stream"}
|
460 |
+
)
|
461 |
+
out2.raise_for_status()
|
462 |
+
# after creating empty file, change location to append to
|
463 |
+
out2 = self.fs._call("APPEND", "POST", self.path, redirect=False, **kwargs)
|
464 |
+
self.location = self.fs._apply_proxy(out2.headers["Location"])
|
465 |
+
|
466 |
+
def _fetch_range(self, start, end):
|
467 |
+
start = max(start, 0)
|
468 |
+
end = min(self.size, end)
|
469 |
+
if start >= end or start >= self.size:
|
470 |
+
return b""
|
471 |
+
out = self.fs._call(
|
472 |
+
"OPEN", path=self.path, offset=start, length=end - start, redirect=False
|
473 |
+
)
|
474 |
+
out.raise_for_status()
|
475 |
+
if "Location" in out.headers:
|
476 |
+
location = out.headers["Location"]
|
477 |
+
out2 = self.fs.session.get(self.fs._apply_proxy(location))
|
478 |
+
return out2.content
|
479 |
+
else:
|
480 |
+
return out.content
|
481 |
+
|
482 |
+
def commit(self):
|
483 |
+
self.fs.mv(self.path, self.target)
|
484 |
+
|
485 |
+
def discard(self):
|
486 |
+
self.fs.rm(self.path)
|
.venv/Lib/site-packages/fsspec/implementations/zip.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import zipfile
|
2 |
+
|
3 |
+
import fsspec
|
4 |
+
from fsspec.archive import AbstractArchiveFileSystem
|
5 |
+
|
6 |
+
|
7 |
+
class ZipFileSystem(AbstractArchiveFileSystem):
|
8 |
+
"""Read/Write contents of ZIP archive as a file-system
|
9 |
+
|
10 |
+
Keeps file object open while instance lives.
|
11 |
+
|
12 |
+
This class is pickleable, but not necessarily thread-safe
|
13 |
+
"""
|
14 |
+
|
15 |
+
root_marker = ""
|
16 |
+
protocol = "zip"
|
17 |
+
cachable = False
|
18 |
+
|
19 |
+
def __init__(
|
20 |
+
self,
|
21 |
+
fo="",
|
22 |
+
mode="r",
|
23 |
+
target_protocol=None,
|
24 |
+
target_options=None,
|
25 |
+
compression=zipfile.ZIP_STORED,
|
26 |
+
allowZip64=True,
|
27 |
+
compresslevel=None,
|
28 |
+
**kwargs,
|
29 |
+
):
|
30 |
+
"""
|
31 |
+
Parameters
|
32 |
+
----------
|
33 |
+
fo: str or file-like
|
34 |
+
Contains ZIP, and must exist. If a str, will fetch file using
|
35 |
+
:meth:`~fsspec.open_files`, which must return one file exactly.
|
36 |
+
mode: str
|
37 |
+
Accept: "r", "w", "a"
|
38 |
+
target_protocol: str (optional)
|
39 |
+
If ``fo`` is a string, this value can be used to override the
|
40 |
+
FS protocol inferred from a URL
|
41 |
+
target_options: dict (optional)
|
42 |
+
Kwargs passed when instantiating the target FS, if ``fo`` is
|
43 |
+
a string.
|
44 |
+
compression, allowZip64, compresslevel: passed to ZipFile
|
45 |
+
Only relevant when creating a ZIP
|
46 |
+
"""
|
47 |
+
super().__init__(self, **kwargs)
|
48 |
+
if mode not in set("rwa"):
|
49 |
+
raise ValueError(f"mode '{mode}' no understood")
|
50 |
+
self.mode = mode
|
51 |
+
if isinstance(fo, str):
|
52 |
+
if mode == "a":
|
53 |
+
m = "r+b"
|
54 |
+
else:
|
55 |
+
m = mode + "b"
|
56 |
+
fo = fsspec.open(
|
57 |
+
fo, mode=m, protocol=target_protocol, **(target_options or {})
|
58 |
+
)
|
59 |
+
self.of = fo
|
60 |
+
self.fo = fo.__enter__() # the whole instance is a context
|
61 |
+
self.zip = zipfile.ZipFile(
|
62 |
+
self.fo,
|
63 |
+
mode=mode,
|
64 |
+
compression=compression,
|
65 |
+
allowZip64=allowZip64,
|
66 |
+
compresslevel=compresslevel,
|
67 |
+
)
|
68 |
+
self.dir_cache = None
|
69 |
+
|
70 |
+
@classmethod
|
71 |
+
def _strip_protocol(cls, path):
|
72 |
+
# zip file paths are always relative to the archive root
|
73 |
+
return super()._strip_protocol(path).lstrip("/")
|
74 |
+
|
75 |
+
def __del__(self):
|
76 |
+
if hasattr(self, "zip"):
|
77 |
+
self.close()
|
78 |
+
del self.zip
|
79 |
+
|
80 |
+
def close(self):
|
81 |
+
"""Commits any write changes to the file. Done on ``del`` too."""
|
82 |
+
self.zip.close()
|
83 |
+
|
84 |
+
def _get_dirs(self):
|
85 |
+
if self.dir_cache is None or self.mode in set("wa"):
|
86 |
+
# when writing, dir_cache is always in the ZipFile's attributes,
|
87 |
+
# not read from the file.
|
88 |
+
files = self.zip.infolist()
|
89 |
+
self.dir_cache = {
|
90 |
+
dirname.rstrip("/"): {
|
91 |
+
"name": dirname.rstrip("/"),
|
92 |
+
"size": 0,
|
93 |
+
"type": "directory",
|
94 |
+
}
|
95 |
+
for dirname in self._all_dirnames(self.zip.namelist())
|
96 |
+
}
|
97 |
+
for z in files:
|
98 |
+
f = {s: getattr(z, s, None) for s in zipfile.ZipInfo.__slots__}
|
99 |
+
f.update(
|
100 |
+
{
|
101 |
+
"name": z.filename.rstrip("/"),
|
102 |
+
"size": z.file_size,
|
103 |
+
"type": ("directory" if z.is_dir() else "file"),
|
104 |
+
}
|
105 |
+
)
|
106 |
+
self.dir_cache[f["name"]] = f
|
107 |
+
|
108 |
+
def pipe_file(self, path, value, **kwargs):
|
109 |
+
# override upstream, because we know the exact file size in this case
|
110 |
+
self.zip.writestr(path, value, **kwargs)
|
111 |
+
|
112 |
+
def _open(
|
113 |
+
self,
|
114 |
+
path,
|
115 |
+
mode="rb",
|
116 |
+
block_size=None,
|
117 |
+
autocommit=True,
|
118 |
+
cache_options=None,
|
119 |
+
**kwargs,
|
120 |
+
):
|
121 |
+
path = self._strip_protocol(path)
|
122 |
+
if "r" in mode and self.mode in set("wa"):
|
123 |
+
if self.exists(path):
|
124 |
+
raise OSError("ZipFS can only be open for reading or writing, not both")
|
125 |
+
raise FileNotFoundError(path)
|
126 |
+
if "r" in self.mode and "w" in mode:
|
127 |
+
raise OSError("ZipFS can only be open for reading or writing, not both")
|
128 |
+
out = self.zip.open(path, mode.strip("b"))
|
129 |
+
if "r" in mode:
|
130 |
+
info = self.info(path)
|
131 |
+
out.size = info["size"]
|
132 |
+
out.name = info["name"]
|
133 |
+
return out
|
.venv/Lib/site-packages/fsspec/tests/abstract/__init__.py
ADDED
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from hashlib import md5
|
3 |
+
|
4 |
+
import pytest
|
5 |
+
|
6 |
+
from fsspec.implementations.local import LocalFileSystem
|
7 |
+
from fsspec.tests.abstract.copy import AbstractCopyTests # noqa
|
8 |
+
from fsspec.tests.abstract.get import AbstractGetTests # noqa
|
9 |
+
from fsspec.tests.abstract.put import AbstractPutTests # noqa
|
10 |
+
|
11 |
+
|
12 |
+
class BaseAbstractFixtures:
|
13 |
+
"""
|
14 |
+
Abstract base class containing fixtures that are used by but never need to
|
15 |
+
be overridden in derived filesystem-specific classes to run the abstract
|
16 |
+
tests on such filesystems.
|
17 |
+
"""
|
18 |
+
|
19 |
+
@pytest.fixture
|
20 |
+
def fs_bulk_operations_scenario_0(self, fs, fs_join, fs_path):
|
21 |
+
"""
|
22 |
+
Scenario on remote filesystem that is used for many cp/get/put tests.
|
23 |
+
|
24 |
+
Cleans up at the end of each test it which it is used.
|
25 |
+
"""
|
26 |
+
source = self._bulk_operations_scenario_0(fs, fs_join, fs_path)
|
27 |
+
yield source
|
28 |
+
fs.rm(source, recursive=True)
|
29 |
+
|
30 |
+
@pytest.fixture
|
31 |
+
def fs_glob_edge_cases_files(self, fs, fs_join, fs_path):
|
32 |
+
"""
|
33 |
+
Scenario on remote filesystem that is used for glob edge cases cp/get/put tests.
|
34 |
+
|
35 |
+
Cleans up at the end of each test it which it is used.
|
36 |
+
"""
|
37 |
+
source = self._glob_edge_cases_files(fs, fs_join, fs_path)
|
38 |
+
yield source
|
39 |
+
fs.rm(source, recursive=True)
|
40 |
+
|
41 |
+
@pytest.fixture
|
42 |
+
def fs_dir_and_file_with_same_name_prefix(self, fs, fs_join, fs_path):
|
43 |
+
"""
|
44 |
+
Scenario on remote filesystem that is used to check cp/get/put on directory
|
45 |
+
and file with the same name prefixes.
|
46 |
+
|
47 |
+
Cleans up at the end of each test it which it is used.
|
48 |
+
"""
|
49 |
+
source = self._dir_and_file_with_same_name_prefix(fs, fs_join, fs_path)
|
50 |
+
yield source
|
51 |
+
fs.rm(source, recursive=True)
|
52 |
+
|
53 |
+
@pytest.fixture
|
54 |
+
def fs_10_files_with_hashed_names(self, fs, fs_join, fs_path):
|
55 |
+
"""
|
56 |
+
Scenario on remote filesystem that is used to check cp/get/put files order
|
57 |
+
when source and destination are lists.
|
58 |
+
|
59 |
+
Cleans up at the end of each test it which it is used.
|
60 |
+
"""
|
61 |
+
source = self._10_files_with_hashed_names(fs, fs_join, fs_path)
|
62 |
+
yield source
|
63 |
+
fs.rm(source, recursive=True)
|
64 |
+
|
65 |
+
@pytest.fixture
|
66 |
+
def fs_target(self, fs, fs_join, fs_path):
|
67 |
+
"""
|
68 |
+
Return name of remote directory that does not yet exist to copy into.
|
69 |
+
|
70 |
+
Cleans up at the end of each test it which it is used.
|
71 |
+
"""
|
72 |
+
target = fs_join(fs_path, "target")
|
73 |
+
yield target
|
74 |
+
if fs.exists(target):
|
75 |
+
fs.rm(target, recursive=True)
|
76 |
+
|
77 |
+
@pytest.fixture
|
78 |
+
def local_bulk_operations_scenario_0(self, local_fs, local_join, local_path):
|
79 |
+
"""
|
80 |
+
Scenario on local filesystem that is used for many cp/get/put tests.
|
81 |
+
|
82 |
+
Cleans up at the end of each test it which it is used.
|
83 |
+
"""
|
84 |
+
source = self._bulk_operations_scenario_0(local_fs, local_join, local_path)
|
85 |
+
yield source
|
86 |
+
local_fs.rm(source, recursive=True)
|
87 |
+
|
88 |
+
@pytest.fixture
|
89 |
+
def local_glob_edge_cases_files(self, local_fs, local_join, local_path):
|
90 |
+
"""
|
91 |
+
Scenario on local filesystem that is used for glob edge cases cp/get/put tests.
|
92 |
+
|
93 |
+
Cleans up at the end of each test it which it is used.
|
94 |
+
"""
|
95 |
+
source = self._glob_edge_cases_files(local_fs, local_join, local_path)
|
96 |
+
yield source
|
97 |
+
local_fs.rm(source, recursive=True)
|
98 |
+
|
99 |
+
@pytest.fixture
|
100 |
+
def local_dir_and_file_with_same_name_prefix(
|
101 |
+
self, local_fs, local_join, local_path
|
102 |
+
):
|
103 |
+
"""
|
104 |
+
Scenario on local filesystem that is used to check cp/get/put on directory
|
105 |
+
and file with the same name prefixes.
|
106 |
+
|
107 |
+
Cleans up at the end of each test it which it is used.
|
108 |
+
"""
|
109 |
+
source = self._dir_and_file_with_same_name_prefix(
|
110 |
+
local_fs, local_join, local_path
|
111 |
+
)
|
112 |
+
yield source
|
113 |
+
local_fs.rm(source, recursive=True)
|
114 |
+
|
115 |
+
@pytest.fixture
|
116 |
+
def local_10_files_with_hashed_names(self, local_fs, local_join, local_path):
|
117 |
+
"""
|
118 |
+
Scenario on local filesystem that is used to check cp/get/put files order
|
119 |
+
when source and destination are lists.
|
120 |
+
|
121 |
+
Cleans up at the end of each test it which it is used.
|
122 |
+
"""
|
123 |
+
source = self._10_files_with_hashed_names(local_fs, local_join, local_path)
|
124 |
+
yield source
|
125 |
+
local_fs.rm(source, recursive=True)
|
126 |
+
|
127 |
+
@pytest.fixture
|
128 |
+
def local_target(self, local_fs, local_join, local_path):
|
129 |
+
"""
|
130 |
+
Return name of local directory that does not yet exist to copy into.
|
131 |
+
|
132 |
+
Cleans up at the end of each test it which it is used.
|
133 |
+
"""
|
134 |
+
target = local_join(local_path, "target")
|
135 |
+
yield target
|
136 |
+
if local_fs.exists(target):
|
137 |
+
local_fs.rm(target, recursive=True)
|
138 |
+
|
139 |
+
def _glob_edge_cases_files(self, some_fs, some_join, some_path):
|
140 |
+
"""
|
141 |
+
Scenario that is used for glob edge cases cp/get/put tests.
|
142 |
+
Creates the following directory and file structure:
|
143 |
+
|
144 |
+
📁 source
|
145 |
+
├── 📄 file1
|
146 |
+
├── 📄 file2
|
147 |
+
├── 📁 subdir0
|
148 |
+
│ ├── 📄 subfile1
|
149 |
+
│ ├── 📄 subfile2
|
150 |
+
│ └── 📁 nesteddir
|
151 |
+
│ └── 📄 nestedfile
|
152 |
+
└── 📁 subdir1
|
153 |
+
├── 📄 subfile1
|
154 |
+
├── 📄 subfile2
|
155 |
+
└── 📁 nesteddir
|
156 |
+
└── 📄 nestedfile
|
157 |
+
"""
|
158 |
+
source = some_join(some_path, "source")
|
159 |
+
some_fs.touch(some_join(source, "file1"))
|
160 |
+
some_fs.touch(some_join(source, "file2"))
|
161 |
+
|
162 |
+
for subdir_idx in range(2):
|
163 |
+
subdir = some_join(source, f"subdir{subdir_idx}")
|
164 |
+
nesteddir = some_join(subdir, "nesteddir")
|
165 |
+
some_fs.makedirs(nesteddir)
|
166 |
+
some_fs.touch(some_join(subdir, "subfile1"))
|
167 |
+
some_fs.touch(some_join(subdir, "subfile2"))
|
168 |
+
some_fs.touch(some_join(nesteddir, "nestedfile"))
|
169 |
+
|
170 |
+
return source
|
171 |
+
|
172 |
+
def _bulk_operations_scenario_0(self, some_fs, some_join, some_path):
|
173 |
+
"""
|
174 |
+
Scenario that is used for many cp/get/put tests. Creates the following
|
175 |
+
directory and file structure:
|
176 |
+
|
177 |
+
📁 source
|
178 |
+
├── 📄 file1
|
179 |
+
├── 📄 file2
|
180 |
+
└── 📁 subdir
|
181 |
+
├── 📄 subfile1
|
182 |
+
├── 📄 subfile2
|
183 |
+
└── 📁 nesteddir
|
184 |
+
└── 📄 nestedfile
|
185 |
+
"""
|
186 |
+
source = some_join(some_path, "source")
|
187 |
+
subdir = some_join(source, "subdir")
|
188 |
+
nesteddir = some_join(subdir, "nesteddir")
|
189 |
+
some_fs.makedirs(nesteddir)
|
190 |
+
some_fs.touch(some_join(source, "file1"))
|
191 |
+
some_fs.touch(some_join(source, "file2"))
|
192 |
+
some_fs.touch(some_join(subdir, "subfile1"))
|
193 |
+
some_fs.touch(some_join(subdir, "subfile2"))
|
194 |
+
some_fs.touch(some_join(nesteddir, "nestedfile"))
|
195 |
+
return source
|
196 |
+
|
197 |
+
def _dir_and_file_with_same_name_prefix(self, some_fs, some_join, some_path):
|
198 |
+
"""
|
199 |
+
Scenario that is used to check cp/get/put on directory and file with
|
200 |
+
the same name prefixes. Creates the following directory and file structure:
|
201 |
+
|
202 |
+
📁 source
|
203 |
+
├── 📄 subdir.txt
|
204 |
+
└── 📁 subdir
|
205 |
+
└── 📄 subfile.txt
|
206 |
+
"""
|
207 |
+
source = some_join(some_path, "source")
|
208 |
+
subdir = some_join(source, "subdir")
|
209 |
+
file = some_join(source, "subdir.txt")
|
210 |
+
subfile = some_join(subdir, "subfile.txt")
|
211 |
+
some_fs.makedirs(subdir)
|
212 |
+
some_fs.touch(file)
|
213 |
+
some_fs.touch(subfile)
|
214 |
+
return source
|
215 |
+
|
216 |
+
def _10_files_with_hashed_names(self, some_fs, some_join, some_path):
|
217 |
+
"""
|
218 |
+
Scenario that is used to check cp/get/put files order when source and
|
219 |
+
destination are lists. Creates the following directory and file structure:
|
220 |
+
|
221 |
+
📁 source
|
222 |
+
└── 📄 {hashed([0-9])}.txt
|
223 |
+
"""
|
224 |
+
source = some_join(some_path, "source")
|
225 |
+
for i in range(10):
|
226 |
+
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
|
227 |
+
path = some_join(source, f"{hashed_i}.txt")
|
228 |
+
some_fs.pipe(path=path, value=f"{i}".encode("utf-8"))
|
229 |
+
return source
|
230 |
+
|
231 |
+
|
232 |
+
class AbstractFixtures(BaseAbstractFixtures):
|
233 |
+
"""
|
234 |
+
Abstract base class containing fixtures that may be overridden in derived
|
235 |
+
filesystem-specific classes to run the abstract tests on such filesystems.
|
236 |
+
|
237 |
+
For any particular filesystem some of these fixtures must be overridden,
|
238 |
+
such as ``fs`` and ``fs_path``, and others may be overridden if the
|
239 |
+
default functions here are not appropriate, such as ``fs_join``.
|
240 |
+
"""
|
241 |
+
|
242 |
+
@pytest.fixture
|
243 |
+
def fs(self):
|
244 |
+
raise NotImplementedError("This function must be overridden in derived classes")
|
245 |
+
|
246 |
+
@pytest.fixture
|
247 |
+
def fs_join(self):
|
248 |
+
"""
|
249 |
+
Return a function that joins its arguments together into a path.
|
250 |
+
|
251 |
+
Most fsspec implementations join paths in a platform-dependent way,
|
252 |
+
but some will override this to always use a forward slash.
|
253 |
+
"""
|
254 |
+
return os.path.join
|
255 |
+
|
256 |
+
@pytest.fixture
|
257 |
+
def fs_path(self):
|
258 |
+
raise NotImplementedError("This function must be overridden in derived classes")
|
259 |
+
|
260 |
+
@pytest.fixture(scope="class")
|
261 |
+
def local_fs(self):
|
262 |
+
# Maybe need an option for auto_mkdir=False? This is only relevant
|
263 |
+
# for certain implementations.
|
264 |
+
return LocalFileSystem(auto_mkdir=True)
|
265 |
+
|
266 |
+
@pytest.fixture
|
267 |
+
def local_join(self):
|
268 |
+
"""
|
269 |
+
Return a function that joins its arguments together into a path, on
|
270 |
+
the local filesystem.
|
271 |
+
"""
|
272 |
+
return os.path.join
|
273 |
+
|
274 |
+
@pytest.fixture
|
275 |
+
def local_path(self, tmpdir):
|
276 |
+
return tmpdir
|
277 |
+
|
278 |
+
@pytest.fixture
|
279 |
+
def supports_empty_directories(self):
|
280 |
+
"""
|
281 |
+
Return whether this implementation supports empty directories.
|
282 |
+
"""
|
283 |
+
return True
|
284 |
+
|
285 |
+
@pytest.fixture
|
286 |
+
def fs_sanitize_path(self):
|
287 |
+
return lambda x: x
|
.venv/Lib/site-packages/fsspec/tests/abstract/common.py
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
GLOB_EDGE_CASES_TESTS = {
|
2 |
+
"argnames": ("path", "recursive", "maxdepth", "expected"),
|
3 |
+
"argvalues": [
|
4 |
+
("fil?1", False, None, ["file1"]),
|
5 |
+
("fil?1", True, None, ["file1"]),
|
6 |
+
("file[1-2]", False, None, ["file1", "file2"]),
|
7 |
+
("file[1-2]", True, None, ["file1", "file2"]),
|
8 |
+
("*", False, None, ["file1", "file2"]),
|
9 |
+
(
|
10 |
+
"*",
|
11 |
+
True,
|
12 |
+
None,
|
13 |
+
[
|
14 |
+
"file1",
|
15 |
+
"file2",
|
16 |
+
"subdir0/subfile1",
|
17 |
+
"subdir0/subfile2",
|
18 |
+
"subdir0/nesteddir/nestedfile",
|
19 |
+
"subdir1/subfile1",
|
20 |
+
"subdir1/subfile2",
|
21 |
+
"subdir1/nesteddir/nestedfile",
|
22 |
+
],
|
23 |
+
),
|
24 |
+
("*", True, 1, ["file1", "file2"]),
|
25 |
+
(
|
26 |
+
"*",
|
27 |
+
True,
|
28 |
+
2,
|
29 |
+
[
|
30 |
+
"file1",
|
31 |
+
"file2",
|
32 |
+
"subdir0/subfile1",
|
33 |
+
"subdir0/subfile2",
|
34 |
+
"subdir1/subfile1",
|
35 |
+
"subdir1/subfile2",
|
36 |
+
],
|
37 |
+
),
|
38 |
+
("*1", False, None, ["file1"]),
|
39 |
+
(
|
40 |
+
"*1",
|
41 |
+
True,
|
42 |
+
None,
|
43 |
+
[
|
44 |
+
"file1",
|
45 |
+
"subdir1/subfile1",
|
46 |
+
"subdir1/subfile2",
|
47 |
+
"subdir1/nesteddir/nestedfile",
|
48 |
+
],
|
49 |
+
),
|
50 |
+
("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]),
|
51 |
+
(
|
52 |
+
"**",
|
53 |
+
False,
|
54 |
+
None,
|
55 |
+
[
|
56 |
+
"file1",
|
57 |
+
"file2",
|
58 |
+
"subdir0/subfile1",
|
59 |
+
"subdir0/subfile2",
|
60 |
+
"subdir0/nesteddir/nestedfile",
|
61 |
+
"subdir1/subfile1",
|
62 |
+
"subdir1/subfile2",
|
63 |
+
"subdir1/nesteddir/nestedfile",
|
64 |
+
],
|
65 |
+
),
|
66 |
+
(
|
67 |
+
"**",
|
68 |
+
True,
|
69 |
+
None,
|
70 |
+
[
|
71 |
+
"file1",
|
72 |
+
"file2",
|
73 |
+
"subdir0/subfile1",
|
74 |
+
"subdir0/subfile2",
|
75 |
+
"subdir0/nesteddir/nestedfile",
|
76 |
+
"subdir1/subfile1",
|
77 |
+
"subdir1/subfile2",
|
78 |
+
"subdir1/nesteddir/nestedfile",
|
79 |
+
],
|
80 |
+
),
|
81 |
+
("**", True, 1, ["file1", "file2"]),
|
82 |
+
(
|
83 |
+
"**",
|
84 |
+
True,
|
85 |
+
2,
|
86 |
+
[
|
87 |
+
"file1",
|
88 |
+
"file2",
|
89 |
+
"subdir0/subfile1",
|
90 |
+
"subdir0/subfile2",
|
91 |
+
"subdir0/nesteddir/nestedfile",
|
92 |
+
"subdir1/subfile1",
|
93 |
+
"subdir1/subfile2",
|
94 |
+
"subdir1/nesteddir/nestedfile",
|
95 |
+
],
|
96 |
+
),
|
97 |
+
(
|
98 |
+
"**",
|
99 |
+
False,
|
100 |
+
2,
|
101 |
+
[
|
102 |
+
"file1",
|
103 |
+
"file2",
|
104 |
+
"subdir0/subfile1",
|
105 |
+
"subdir0/subfile2",
|
106 |
+
"subdir1/subfile1",
|
107 |
+
"subdir1/subfile2",
|
108 |
+
],
|
109 |
+
),
|
110 |
+
("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
|
111 |
+
(
|
112 |
+
"**/*1",
|
113 |
+
True,
|
114 |
+
None,
|
115 |
+
[
|
116 |
+
"file1",
|
117 |
+
"subdir0/subfile1",
|
118 |
+
"subdir1/subfile1",
|
119 |
+
"subdir1/subfile2",
|
120 |
+
"subdir1/nesteddir/nestedfile",
|
121 |
+
],
|
122 |
+
),
|
123 |
+
("**/*1", True, 1, ["file1"]),
|
124 |
+
(
|
125 |
+
"**/*1",
|
126 |
+
True,
|
127 |
+
2,
|
128 |
+
["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
|
129 |
+
),
|
130 |
+
("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
|
131 |
+
("**/subdir0", False, None, []),
|
132 |
+
("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
|
133 |
+
("**/subdir0/nested*", False, 2, []),
|
134 |
+
("**/subdir0/nested*", True, 2, ["nestedfile"]),
|
135 |
+
("subdir[1-2]", False, None, []),
|
136 |
+
("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
|
137 |
+
("subdir[1-2]", True, 2, ["subfile1", "subfile2"]),
|
138 |
+
("subdir[0-1]", False, None, []),
|
139 |
+
(
|
140 |
+
"subdir[0-1]",
|
141 |
+
True,
|
142 |
+
None,
|
143 |
+
[
|
144 |
+
"subdir0/subfile1",
|
145 |
+
"subdir0/subfile2",
|
146 |
+
"subdir0/nesteddir/nestedfile",
|
147 |
+
"subdir1/subfile1",
|
148 |
+
"subdir1/subfile2",
|
149 |
+
"subdir1/nesteddir/nestedfile",
|
150 |
+
],
|
151 |
+
),
|
152 |
+
(
|
153 |
+
"subdir[0-1]/*fil[e]*",
|
154 |
+
False,
|
155 |
+
None,
|
156 |
+
[
|
157 |
+
"subdir0/subfile1",
|
158 |
+
"subdir0/subfile2",
|
159 |
+
"subdir1/subfile1",
|
160 |
+
"subdir1/subfile2",
|
161 |
+
],
|
162 |
+
),
|
163 |
+
(
|
164 |
+
"subdir[0-1]/*fil[e]*",
|
165 |
+
True,
|
166 |
+
None,
|
167 |
+
[
|
168 |
+
"subdir0/subfile1",
|
169 |
+
"subdir0/subfile2",
|
170 |
+
"subdir1/subfile1",
|
171 |
+
"subdir1/subfile2",
|
172 |
+
],
|
173 |
+
),
|
174 |
+
],
|
175 |
+
}
|
.venv/Lib/site-packages/fsspec/tests/abstract/copy.py
ADDED
@@ -0,0 +1,557 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from hashlib import md5
|
2 |
+
from itertools import product
|
3 |
+
|
4 |
+
import pytest
|
5 |
+
|
6 |
+
from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
|
7 |
+
|
8 |
+
|
9 |
+
class AbstractCopyTests:
|
10 |
+
def test_copy_file_to_existing_directory(
|
11 |
+
self,
|
12 |
+
fs,
|
13 |
+
fs_join,
|
14 |
+
fs_bulk_operations_scenario_0,
|
15 |
+
fs_target,
|
16 |
+
supports_empty_directories,
|
17 |
+
):
|
18 |
+
# Copy scenario 1a
|
19 |
+
source = fs_bulk_operations_scenario_0
|
20 |
+
|
21 |
+
target = fs_target
|
22 |
+
fs.mkdir(target)
|
23 |
+
if not supports_empty_directories:
|
24 |
+
# Force target directory to exist by adding a dummy file
|
25 |
+
fs.touch(fs_join(target, "dummy"))
|
26 |
+
assert fs.isdir(target)
|
27 |
+
|
28 |
+
target_file2 = fs_join(target, "file2")
|
29 |
+
target_subfile1 = fs_join(target, "subfile1")
|
30 |
+
|
31 |
+
# Copy from source directory
|
32 |
+
fs.cp(fs_join(source, "file2"), target)
|
33 |
+
assert fs.isfile(target_file2)
|
34 |
+
|
35 |
+
# Copy from sub directory
|
36 |
+
fs.cp(fs_join(source, "subdir", "subfile1"), target)
|
37 |
+
assert fs.isfile(target_subfile1)
|
38 |
+
|
39 |
+
# Remove copied files
|
40 |
+
fs.rm([target_file2, target_subfile1])
|
41 |
+
assert not fs.exists(target_file2)
|
42 |
+
assert not fs.exists(target_subfile1)
|
43 |
+
|
44 |
+
# Repeat with trailing slash on target
|
45 |
+
fs.cp(fs_join(source, "file2"), target + "/")
|
46 |
+
assert fs.isdir(target)
|
47 |
+
assert fs.isfile(target_file2)
|
48 |
+
|
49 |
+
fs.cp(fs_join(source, "subdir", "subfile1"), target + "/")
|
50 |
+
assert fs.isfile(target_subfile1)
|
51 |
+
|
52 |
+
def test_copy_file_to_new_directory(
|
53 |
+
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
54 |
+
):
|
55 |
+
# Copy scenario 1b
|
56 |
+
source = fs_bulk_operations_scenario_0
|
57 |
+
|
58 |
+
target = fs_target
|
59 |
+
fs.mkdir(target)
|
60 |
+
|
61 |
+
fs.cp(
|
62 |
+
fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
|
63 |
+
) # Note trailing slash
|
64 |
+
assert fs.isdir(target)
|
65 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
66 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
67 |
+
|
68 |
+
def test_copy_file_to_file_in_existing_directory(
|
69 |
+
self,
|
70 |
+
fs,
|
71 |
+
fs_join,
|
72 |
+
fs_bulk_operations_scenario_0,
|
73 |
+
fs_target,
|
74 |
+
supports_empty_directories,
|
75 |
+
):
|
76 |
+
# Copy scenario 1c
|
77 |
+
source = fs_bulk_operations_scenario_0
|
78 |
+
|
79 |
+
target = fs_target
|
80 |
+
fs.mkdir(target)
|
81 |
+
if not supports_empty_directories:
|
82 |
+
# Force target directory to exist by adding a dummy file
|
83 |
+
fs.touch(fs_join(target, "dummy"))
|
84 |
+
assert fs.isdir(target)
|
85 |
+
|
86 |
+
fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
|
87 |
+
assert fs.isfile(fs_join(target, "newfile"))
|
88 |
+
|
89 |
+
def test_copy_file_to_file_in_new_directory(
|
90 |
+
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
91 |
+
):
|
92 |
+
# Copy scenario 1d
|
93 |
+
source = fs_bulk_operations_scenario_0
|
94 |
+
|
95 |
+
target = fs_target
|
96 |
+
fs.mkdir(target)
|
97 |
+
|
98 |
+
fs.cp(
|
99 |
+
fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir", "newfile")
|
100 |
+
)
|
101 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
102 |
+
assert fs.isfile(fs_join(target, "newdir", "newfile"))
|
103 |
+
|
104 |
+
def test_copy_directory_to_existing_directory(
|
105 |
+
self,
|
106 |
+
fs,
|
107 |
+
fs_join,
|
108 |
+
fs_bulk_operations_scenario_0,
|
109 |
+
fs_target,
|
110 |
+
supports_empty_directories,
|
111 |
+
):
|
112 |
+
# Copy scenario 1e
|
113 |
+
source = fs_bulk_operations_scenario_0
|
114 |
+
|
115 |
+
target = fs_target
|
116 |
+
fs.mkdir(target)
|
117 |
+
if not supports_empty_directories:
|
118 |
+
# Force target directory to exist by adding a dummy file
|
119 |
+
dummy = fs_join(target, "dummy")
|
120 |
+
fs.touch(dummy)
|
121 |
+
assert fs.isdir(target)
|
122 |
+
|
123 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
124 |
+
s = fs_join(source, "subdir")
|
125 |
+
if source_slash:
|
126 |
+
s += "/"
|
127 |
+
t = target + "/" if target_slash else target
|
128 |
+
|
129 |
+
# Without recursive does nothing
|
130 |
+
fs.cp(s, t)
|
131 |
+
assert fs.ls(target, detail=False) == (
|
132 |
+
[] if supports_empty_directories else [dummy]
|
133 |
+
)
|
134 |
+
|
135 |
+
# With recursive
|
136 |
+
fs.cp(s, t, recursive=True)
|
137 |
+
if source_slash:
|
138 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
139 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
140 |
+
assert fs.isdir(fs_join(target, "nesteddir"))
|
141 |
+
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
|
142 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
143 |
+
|
144 |
+
fs.rm(
|
145 |
+
[
|
146 |
+
fs_join(target, "subfile1"),
|
147 |
+
fs_join(target, "subfile2"),
|
148 |
+
fs_join(target, "nesteddir"),
|
149 |
+
],
|
150 |
+
recursive=True,
|
151 |
+
)
|
152 |
+
else:
|
153 |
+
assert fs.isdir(fs_join(target, "subdir"))
|
154 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
|
155 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
|
156 |
+
assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
|
157 |
+
assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
|
158 |
+
|
159 |
+
fs.rm(fs_join(target, "subdir"), recursive=True)
|
160 |
+
assert fs.ls(target, detail=False) == (
|
161 |
+
[] if supports_empty_directories else [dummy]
|
162 |
+
)
|
163 |
+
|
164 |
+
# Limit recursive by maxdepth
|
165 |
+
fs.cp(s, t, recursive=True, maxdepth=1)
|
166 |
+
if source_slash:
|
167 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
168 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
169 |
+
assert not fs.exists(fs_join(target, "nesteddir"))
|
170 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
171 |
+
|
172 |
+
fs.rm(
|
173 |
+
[
|
174 |
+
fs_join(target, "subfile1"),
|
175 |
+
fs_join(target, "subfile2"),
|
176 |
+
],
|
177 |
+
recursive=True,
|
178 |
+
)
|
179 |
+
else:
|
180 |
+
assert fs.isdir(fs_join(target, "subdir"))
|
181 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
|
182 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
|
183 |
+
assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
|
184 |
+
|
185 |
+
fs.rm(fs_join(target, "subdir"), recursive=True)
|
186 |
+
assert fs.ls(target, detail=False) == (
|
187 |
+
[] if supports_empty_directories else [dummy]
|
188 |
+
)
|
189 |
+
|
190 |
+
def test_copy_directory_to_new_directory(
|
191 |
+
self,
|
192 |
+
fs,
|
193 |
+
fs_join,
|
194 |
+
fs_bulk_operations_scenario_0,
|
195 |
+
fs_target,
|
196 |
+
supports_empty_directories,
|
197 |
+
):
|
198 |
+
# Copy scenario 1f
|
199 |
+
source = fs_bulk_operations_scenario_0
|
200 |
+
|
201 |
+
target = fs_target
|
202 |
+
fs.mkdir(target)
|
203 |
+
|
204 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
205 |
+
s = fs_join(source, "subdir")
|
206 |
+
if source_slash:
|
207 |
+
s += "/"
|
208 |
+
t = fs_join(target, "newdir")
|
209 |
+
if target_slash:
|
210 |
+
t += "/"
|
211 |
+
|
212 |
+
# Without recursive does nothing
|
213 |
+
fs.cp(s, t)
|
214 |
+
if supports_empty_directories:
|
215 |
+
assert fs.ls(target) == []
|
216 |
+
else:
|
217 |
+
with pytest.raises(FileNotFoundError):
|
218 |
+
fs.ls(target)
|
219 |
+
|
220 |
+
# With recursive
|
221 |
+
fs.cp(s, t, recursive=True)
|
222 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
223 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
224 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
225 |
+
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
|
226 |
+
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
227 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
228 |
+
|
229 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
230 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
231 |
+
|
232 |
+
# Limit recursive by maxdepth
|
233 |
+
fs.cp(s, t, recursive=True, maxdepth=1)
|
234 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
235 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
236 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
237 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
238 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
239 |
+
|
240 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
241 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
242 |
+
|
243 |
+
def test_copy_glob_to_existing_directory(
|
244 |
+
self,
|
245 |
+
fs,
|
246 |
+
fs_join,
|
247 |
+
fs_bulk_operations_scenario_0,
|
248 |
+
fs_target,
|
249 |
+
supports_empty_directories,
|
250 |
+
):
|
251 |
+
# Copy scenario 1g
|
252 |
+
source = fs_bulk_operations_scenario_0
|
253 |
+
|
254 |
+
target = fs_target
|
255 |
+
fs.mkdir(target)
|
256 |
+
if not supports_empty_directories:
|
257 |
+
# Force target directory to exist by adding a dummy file
|
258 |
+
dummy = fs_join(target, "dummy")
|
259 |
+
fs.touch(dummy)
|
260 |
+
assert fs.isdir(target)
|
261 |
+
|
262 |
+
for target_slash in [False, True]:
|
263 |
+
t = target + "/" if target_slash else target
|
264 |
+
|
265 |
+
# Without recursive
|
266 |
+
fs.cp(fs_join(source, "subdir", "*"), t)
|
267 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
268 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
269 |
+
assert not fs.isdir(fs_join(target, "nesteddir"))
|
270 |
+
assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
|
271 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
272 |
+
|
273 |
+
fs.rm(
|
274 |
+
[
|
275 |
+
fs_join(target, "subfile1"),
|
276 |
+
fs_join(target, "subfile2"),
|
277 |
+
],
|
278 |
+
recursive=True,
|
279 |
+
)
|
280 |
+
assert fs.ls(target, detail=False) == (
|
281 |
+
[] if supports_empty_directories else [dummy]
|
282 |
+
)
|
283 |
+
|
284 |
+
# With recursive
|
285 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
286 |
+
fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
|
287 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
288 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
289 |
+
assert fs.isdir(fs_join(target, "nesteddir"))
|
290 |
+
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
|
291 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
292 |
+
|
293 |
+
fs.rm(
|
294 |
+
[
|
295 |
+
fs_join(target, "subfile1"),
|
296 |
+
fs_join(target, "subfile2"),
|
297 |
+
fs_join(target, "nesteddir"),
|
298 |
+
],
|
299 |
+
recursive=True,
|
300 |
+
)
|
301 |
+
assert fs.ls(target, detail=False) == (
|
302 |
+
[] if supports_empty_directories else [dummy]
|
303 |
+
)
|
304 |
+
|
305 |
+
# Limit recursive by maxdepth
|
306 |
+
fs.cp(
|
307 |
+
fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
|
308 |
+
)
|
309 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
310 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
311 |
+
assert not fs.exists(fs_join(target, "nesteddir"))
|
312 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
313 |
+
|
314 |
+
fs.rm(
|
315 |
+
[
|
316 |
+
fs_join(target, "subfile1"),
|
317 |
+
fs_join(target, "subfile2"),
|
318 |
+
],
|
319 |
+
recursive=True,
|
320 |
+
)
|
321 |
+
assert fs.ls(target, detail=False) == (
|
322 |
+
[] if supports_empty_directories else [dummy]
|
323 |
+
)
|
324 |
+
|
325 |
+
def test_copy_glob_to_new_directory(
|
326 |
+
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
327 |
+
):
|
328 |
+
# Copy scenario 1h
|
329 |
+
source = fs_bulk_operations_scenario_0
|
330 |
+
|
331 |
+
target = fs_target
|
332 |
+
fs.mkdir(target)
|
333 |
+
|
334 |
+
for target_slash in [False, True]:
|
335 |
+
t = fs_join(target, "newdir")
|
336 |
+
if target_slash:
|
337 |
+
t += "/"
|
338 |
+
|
339 |
+
# Without recursive
|
340 |
+
fs.cp(fs_join(source, "subdir", "*"), t)
|
341 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
342 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
343 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
344 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
345 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
346 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
347 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
348 |
+
|
349 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
350 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
351 |
+
|
352 |
+
# With recursive
|
353 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
354 |
+
fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
|
355 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
356 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
357 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
358 |
+
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
|
359 |
+
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
360 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
361 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
362 |
+
|
363 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
364 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
365 |
+
|
366 |
+
# Limit recursive by maxdepth
|
367 |
+
fs.cp(
|
368 |
+
fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
|
369 |
+
)
|
370 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
371 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
372 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
373 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
374 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
375 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
376 |
+
|
377 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
378 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
379 |
+
|
380 |
+
@pytest.mark.parametrize(
|
381 |
+
GLOB_EDGE_CASES_TESTS["argnames"],
|
382 |
+
GLOB_EDGE_CASES_TESTS["argvalues"],
|
383 |
+
)
|
384 |
+
def test_copy_glob_edge_cases(
|
385 |
+
self,
|
386 |
+
path,
|
387 |
+
recursive,
|
388 |
+
maxdepth,
|
389 |
+
expected,
|
390 |
+
fs,
|
391 |
+
fs_join,
|
392 |
+
fs_glob_edge_cases_files,
|
393 |
+
fs_target,
|
394 |
+
fs_sanitize_path,
|
395 |
+
):
|
396 |
+
# Copy scenario 1g
|
397 |
+
source = fs_glob_edge_cases_files
|
398 |
+
|
399 |
+
target = fs_target
|
400 |
+
|
401 |
+
for new_dir, target_slash in product([True, False], [True, False]):
|
402 |
+
fs.mkdir(target)
|
403 |
+
|
404 |
+
t = fs_join(target, "newdir") if new_dir else target
|
405 |
+
t = t + "/" if target_slash else t
|
406 |
+
|
407 |
+
fs.copy(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
|
408 |
+
|
409 |
+
output = fs.find(target)
|
410 |
+
if new_dir:
|
411 |
+
prefixed_expected = [
|
412 |
+
fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
|
413 |
+
]
|
414 |
+
else:
|
415 |
+
prefixed_expected = [
|
416 |
+
fs_sanitize_path(fs_join(target, p)) for p in expected
|
417 |
+
]
|
418 |
+
assert sorted(output) == sorted(prefixed_expected)
|
419 |
+
|
420 |
+
try:
|
421 |
+
fs.rm(target, recursive=True)
|
422 |
+
except FileNotFoundError:
|
423 |
+
pass
|
424 |
+
|
425 |
+
def test_copy_list_of_files_to_existing_directory(
|
426 |
+
self,
|
427 |
+
fs,
|
428 |
+
fs_join,
|
429 |
+
fs_bulk_operations_scenario_0,
|
430 |
+
fs_target,
|
431 |
+
supports_empty_directories,
|
432 |
+
):
|
433 |
+
# Copy scenario 2a
|
434 |
+
source = fs_bulk_operations_scenario_0
|
435 |
+
|
436 |
+
target = fs_target
|
437 |
+
fs.mkdir(target)
|
438 |
+
if not supports_empty_directories:
|
439 |
+
# Force target directory to exist by adding a dummy file
|
440 |
+
dummy = fs_join(target, "dummy")
|
441 |
+
fs.touch(dummy)
|
442 |
+
assert fs.isdir(target)
|
443 |
+
|
444 |
+
source_files = [
|
445 |
+
fs_join(source, "file1"),
|
446 |
+
fs_join(source, "file2"),
|
447 |
+
fs_join(source, "subdir", "subfile1"),
|
448 |
+
]
|
449 |
+
|
450 |
+
for target_slash in [False, True]:
|
451 |
+
t = target + "/" if target_slash else target
|
452 |
+
|
453 |
+
fs.cp(source_files, t)
|
454 |
+
assert fs.isfile(fs_join(target, "file1"))
|
455 |
+
assert fs.isfile(fs_join(target, "file2"))
|
456 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
457 |
+
|
458 |
+
fs.rm(
|
459 |
+
[
|
460 |
+
fs_join(target, "file1"),
|
461 |
+
fs_join(target, "file2"),
|
462 |
+
fs_join(target, "subfile1"),
|
463 |
+
],
|
464 |
+
recursive=True,
|
465 |
+
)
|
466 |
+
assert fs.ls(target, detail=False) == (
|
467 |
+
[] if supports_empty_directories else [dummy]
|
468 |
+
)
|
469 |
+
|
470 |
+
def test_copy_list_of_files_to_new_directory(
|
471 |
+
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
472 |
+
):
|
473 |
+
# Copy scenario 2b
|
474 |
+
source = fs_bulk_operations_scenario_0
|
475 |
+
|
476 |
+
target = fs_target
|
477 |
+
fs.mkdir(target)
|
478 |
+
|
479 |
+
source_files = [
|
480 |
+
fs_join(source, "file1"),
|
481 |
+
fs_join(source, "file2"),
|
482 |
+
fs_join(source, "subdir", "subfile1"),
|
483 |
+
]
|
484 |
+
|
485 |
+
fs.cp(source_files, fs_join(target, "newdir") + "/") # Note trailing slash
|
486 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
487 |
+
assert fs.isfile(fs_join(target, "newdir", "file1"))
|
488 |
+
assert fs.isfile(fs_join(target, "newdir", "file2"))
|
489 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
490 |
+
|
491 |
+
def test_copy_two_files_new_directory(
|
492 |
+
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
493 |
+
):
|
494 |
+
# This is a duplicate of test_copy_list_of_files_to_new_directory and
|
495 |
+
# can eventually be removed.
|
496 |
+
source = fs_bulk_operations_scenario_0
|
497 |
+
|
498 |
+
target = fs_target
|
499 |
+
assert not fs.exists(target)
|
500 |
+
fs.cp([fs_join(source, "file1"), fs_join(source, "file2")], target)
|
501 |
+
|
502 |
+
assert fs.isdir(target)
|
503 |
+
assert fs.isfile(fs_join(target, "file1"))
|
504 |
+
assert fs.isfile(fs_join(target, "file2"))
|
505 |
+
|
506 |
+
def test_copy_directory_without_files_with_same_name_prefix(
|
507 |
+
self,
|
508 |
+
fs,
|
509 |
+
fs_join,
|
510 |
+
fs_target,
|
511 |
+
fs_dir_and_file_with_same_name_prefix,
|
512 |
+
supports_empty_directories,
|
513 |
+
):
|
514 |
+
# Create the test dirs
|
515 |
+
source = fs_dir_and_file_with_same_name_prefix
|
516 |
+
target = fs_target
|
517 |
+
|
518 |
+
# Test without glob
|
519 |
+
fs.cp(fs_join(source, "subdir"), target, recursive=True)
|
520 |
+
|
521 |
+
assert fs.isfile(fs_join(target, "subfile.txt"))
|
522 |
+
assert not fs.isfile(fs_join(target, "subdir.txt"))
|
523 |
+
|
524 |
+
fs.rm([fs_join(target, "subfile.txt")])
|
525 |
+
if supports_empty_directories:
|
526 |
+
assert fs.ls(target) == []
|
527 |
+
else:
|
528 |
+
assert not fs.exists(target)
|
529 |
+
|
530 |
+
# Test with glob
|
531 |
+
fs.cp(fs_join(source, "subdir*"), target, recursive=True)
|
532 |
+
|
533 |
+
assert fs.isdir(fs_join(target, "subdir"))
|
534 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile.txt"))
|
535 |
+
assert fs.isfile(fs_join(target, "subdir.txt"))
|
536 |
+
|
537 |
+
def test_copy_with_source_and_destination_as_list(
|
538 |
+
self, fs, fs_target, fs_join, fs_10_files_with_hashed_names
|
539 |
+
):
|
540 |
+
# Create the test dir
|
541 |
+
source = fs_10_files_with_hashed_names
|
542 |
+
target = fs_target
|
543 |
+
|
544 |
+
# Create list of files for source and destination
|
545 |
+
source_files = []
|
546 |
+
destination_files = []
|
547 |
+
for i in range(10):
|
548 |
+
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
|
549 |
+
source_files.append(fs_join(source, f"{hashed_i}.txt"))
|
550 |
+
destination_files.append(fs_join(target, f"{hashed_i}.txt"))
|
551 |
+
|
552 |
+
# Copy and assert order was kept
|
553 |
+
fs.copy(path1=source_files, path2=destination_files)
|
554 |
+
|
555 |
+
for i in range(10):
|
556 |
+
file_content = fs.cat(destination_files[i]).decode("utf-8")
|
557 |
+
assert file_content == str(i)
|
.venv/Lib/site-packages/fsspec/tests/abstract/get.py
ADDED
@@ -0,0 +1,587 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from hashlib import md5
|
2 |
+
from itertools import product
|
3 |
+
|
4 |
+
import pytest
|
5 |
+
|
6 |
+
from fsspec.implementations.local import make_path_posix
|
7 |
+
from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
|
8 |
+
|
9 |
+
|
10 |
+
class AbstractGetTests:
|
11 |
+
def test_get_file_to_existing_directory(
|
12 |
+
self,
|
13 |
+
fs,
|
14 |
+
fs_join,
|
15 |
+
fs_bulk_operations_scenario_0,
|
16 |
+
local_fs,
|
17 |
+
local_join,
|
18 |
+
local_target,
|
19 |
+
):
|
20 |
+
# Copy scenario 1a
|
21 |
+
source = fs_bulk_operations_scenario_0
|
22 |
+
|
23 |
+
target = local_target
|
24 |
+
local_fs.mkdir(target)
|
25 |
+
assert local_fs.isdir(target)
|
26 |
+
|
27 |
+
target_file2 = local_join(target, "file2")
|
28 |
+
target_subfile1 = local_join(target, "subfile1")
|
29 |
+
|
30 |
+
# Copy from source directory
|
31 |
+
fs.get(fs_join(source, "file2"), target)
|
32 |
+
assert local_fs.isfile(target_file2)
|
33 |
+
|
34 |
+
# Copy from sub directory
|
35 |
+
fs.get(fs_join(source, "subdir", "subfile1"), target)
|
36 |
+
assert local_fs.isfile(target_subfile1)
|
37 |
+
|
38 |
+
# Remove copied files
|
39 |
+
local_fs.rm([target_file2, target_subfile1])
|
40 |
+
assert not local_fs.exists(target_file2)
|
41 |
+
assert not local_fs.exists(target_subfile1)
|
42 |
+
|
43 |
+
# Repeat with trailing slash on target
|
44 |
+
fs.get(fs_join(source, "file2"), target + "/")
|
45 |
+
assert local_fs.isdir(target)
|
46 |
+
assert local_fs.isfile(target_file2)
|
47 |
+
|
48 |
+
fs.get(fs_join(source, "subdir", "subfile1"), target + "/")
|
49 |
+
assert local_fs.isfile(target_subfile1)
|
50 |
+
|
51 |
+
def test_get_file_to_new_directory(
|
52 |
+
self,
|
53 |
+
fs,
|
54 |
+
fs_join,
|
55 |
+
fs_bulk_operations_scenario_0,
|
56 |
+
local_fs,
|
57 |
+
local_join,
|
58 |
+
local_target,
|
59 |
+
):
|
60 |
+
# Copy scenario 1b
|
61 |
+
source = fs_bulk_operations_scenario_0
|
62 |
+
|
63 |
+
target = local_target
|
64 |
+
local_fs.mkdir(target)
|
65 |
+
|
66 |
+
fs.get(
|
67 |
+
fs_join(source, "subdir", "subfile1"), local_join(target, "newdir/")
|
68 |
+
) # Note trailing slash
|
69 |
+
|
70 |
+
assert local_fs.isdir(target)
|
71 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
72 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
73 |
+
|
74 |
+
def test_get_file_to_file_in_existing_directory(
|
75 |
+
self,
|
76 |
+
fs,
|
77 |
+
fs_join,
|
78 |
+
fs_bulk_operations_scenario_0,
|
79 |
+
local_fs,
|
80 |
+
local_join,
|
81 |
+
local_target,
|
82 |
+
):
|
83 |
+
# Copy scenario 1c
|
84 |
+
source = fs_bulk_operations_scenario_0
|
85 |
+
|
86 |
+
target = local_target
|
87 |
+
local_fs.mkdir(target)
|
88 |
+
|
89 |
+
fs.get(fs_join(source, "subdir", "subfile1"), local_join(target, "newfile"))
|
90 |
+
assert local_fs.isfile(local_join(target, "newfile"))
|
91 |
+
|
92 |
+
def test_get_file_to_file_in_new_directory(
|
93 |
+
self,
|
94 |
+
fs,
|
95 |
+
fs_join,
|
96 |
+
fs_bulk_operations_scenario_0,
|
97 |
+
local_fs,
|
98 |
+
local_join,
|
99 |
+
local_target,
|
100 |
+
):
|
101 |
+
# Copy scenario 1d
|
102 |
+
source = fs_bulk_operations_scenario_0
|
103 |
+
|
104 |
+
target = local_target
|
105 |
+
local_fs.mkdir(target)
|
106 |
+
|
107 |
+
fs.get(
|
108 |
+
fs_join(source, "subdir", "subfile1"),
|
109 |
+
local_join(target, "newdir", "newfile"),
|
110 |
+
)
|
111 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
112 |
+
assert local_fs.isfile(local_join(target, "newdir", "newfile"))
|
113 |
+
|
114 |
+
def test_get_directory_to_existing_directory(
|
115 |
+
self,
|
116 |
+
fs,
|
117 |
+
fs_join,
|
118 |
+
fs_bulk_operations_scenario_0,
|
119 |
+
local_fs,
|
120 |
+
local_join,
|
121 |
+
local_target,
|
122 |
+
):
|
123 |
+
# Copy scenario 1e
|
124 |
+
source = fs_bulk_operations_scenario_0
|
125 |
+
|
126 |
+
target = local_target
|
127 |
+
local_fs.mkdir(target)
|
128 |
+
assert local_fs.isdir(target)
|
129 |
+
|
130 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
131 |
+
s = fs_join(source, "subdir")
|
132 |
+
if source_slash:
|
133 |
+
s += "/"
|
134 |
+
t = target + "/" if target_slash else target
|
135 |
+
|
136 |
+
# Without recursive does nothing
|
137 |
+
fs.get(s, t)
|
138 |
+
assert local_fs.ls(target) == []
|
139 |
+
|
140 |
+
# With recursive
|
141 |
+
fs.get(s, t, recursive=True)
|
142 |
+
if source_slash:
|
143 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
144 |
+
assert local_fs.isfile(local_join(target, "subfile2"))
|
145 |
+
assert local_fs.isdir(local_join(target, "nesteddir"))
|
146 |
+
assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
|
147 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
148 |
+
|
149 |
+
local_fs.rm(
|
150 |
+
[
|
151 |
+
local_join(target, "subfile1"),
|
152 |
+
local_join(target, "subfile2"),
|
153 |
+
local_join(target, "nesteddir"),
|
154 |
+
],
|
155 |
+
recursive=True,
|
156 |
+
)
|
157 |
+
else:
|
158 |
+
assert local_fs.isdir(local_join(target, "subdir"))
|
159 |
+
assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
|
160 |
+
assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
|
161 |
+
assert local_fs.isdir(local_join(target, "subdir", "nesteddir"))
|
162 |
+
assert local_fs.isfile(
|
163 |
+
local_join(target, "subdir", "nesteddir", "nestedfile")
|
164 |
+
)
|
165 |
+
|
166 |
+
local_fs.rm(local_join(target, "subdir"), recursive=True)
|
167 |
+
assert local_fs.ls(target) == []
|
168 |
+
|
169 |
+
# Limit recursive by maxdepth
|
170 |
+
fs.get(s, t, recursive=True, maxdepth=1)
|
171 |
+
if source_slash:
|
172 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
173 |
+
assert local_fs.isfile(local_join(target, "subfile2"))
|
174 |
+
assert not local_fs.exists(local_join(target, "nesteddir"))
|
175 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
176 |
+
|
177 |
+
local_fs.rm(
|
178 |
+
[
|
179 |
+
local_join(target, "subfile1"),
|
180 |
+
local_join(target, "subfile2"),
|
181 |
+
],
|
182 |
+
recursive=True,
|
183 |
+
)
|
184 |
+
else:
|
185 |
+
assert local_fs.isdir(local_join(target, "subdir"))
|
186 |
+
assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
|
187 |
+
assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
|
188 |
+
assert not local_fs.exists(local_join(target, "subdir", "nesteddir"))
|
189 |
+
|
190 |
+
local_fs.rm(local_join(target, "subdir"), recursive=True)
|
191 |
+
assert local_fs.ls(target) == []
|
192 |
+
|
193 |
+
def test_get_directory_to_new_directory(
|
194 |
+
self,
|
195 |
+
fs,
|
196 |
+
fs_join,
|
197 |
+
fs_bulk_operations_scenario_0,
|
198 |
+
local_fs,
|
199 |
+
local_join,
|
200 |
+
local_target,
|
201 |
+
):
|
202 |
+
# Copy scenario 1f
|
203 |
+
source = fs_bulk_operations_scenario_0
|
204 |
+
|
205 |
+
target = local_target
|
206 |
+
local_fs.mkdir(target)
|
207 |
+
|
208 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
209 |
+
s = fs_join(source, "subdir")
|
210 |
+
if source_slash:
|
211 |
+
s += "/"
|
212 |
+
t = local_join(target, "newdir")
|
213 |
+
if target_slash:
|
214 |
+
t += "/"
|
215 |
+
|
216 |
+
# Without recursive does nothing
|
217 |
+
fs.get(s, t)
|
218 |
+
assert local_fs.ls(target) == []
|
219 |
+
|
220 |
+
# With recursive
|
221 |
+
fs.get(s, t, recursive=True)
|
222 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
223 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
224 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
|
225 |
+
assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
|
226 |
+
assert local_fs.isfile(
|
227 |
+
local_join(target, "newdir", "nesteddir", "nestedfile")
|
228 |
+
)
|
229 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
230 |
+
|
231 |
+
local_fs.rm(local_join(target, "newdir"), recursive=True)
|
232 |
+
assert local_fs.ls(target) == []
|
233 |
+
|
234 |
+
# Limit recursive by maxdepth
|
235 |
+
fs.get(s, t, recursive=True, maxdepth=1)
|
236 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
237 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
238 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
|
239 |
+
assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
|
240 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
241 |
+
|
242 |
+
local_fs.rm(local_join(target, "newdir"), recursive=True)
|
243 |
+
assert not local_fs.exists(local_join(target, "newdir"))
|
244 |
+
|
245 |
+
def test_get_glob_to_existing_directory(
|
246 |
+
self,
|
247 |
+
fs,
|
248 |
+
fs_join,
|
249 |
+
fs_bulk_operations_scenario_0,
|
250 |
+
local_fs,
|
251 |
+
local_join,
|
252 |
+
local_target,
|
253 |
+
):
|
254 |
+
# Copy scenario 1g
|
255 |
+
source = fs_bulk_operations_scenario_0
|
256 |
+
|
257 |
+
target = local_target
|
258 |
+
local_fs.mkdir(target)
|
259 |
+
|
260 |
+
for target_slash in [False, True]:
|
261 |
+
t = target + "/" if target_slash else target
|
262 |
+
|
263 |
+
# Without recursive
|
264 |
+
fs.get(fs_join(source, "subdir", "*"), t)
|
265 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
266 |
+
assert local_fs.isfile(local_join(target, "subfile2"))
|
267 |
+
assert not local_fs.isdir(local_join(target, "nesteddir"))
|
268 |
+
assert not local_fs.exists(local_join(target, "nesteddir", "nestedfile"))
|
269 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
270 |
+
|
271 |
+
local_fs.rm(
|
272 |
+
[
|
273 |
+
local_join(target, "subfile1"),
|
274 |
+
local_join(target, "subfile2"),
|
275 |
+
],
|
276 |
+
recursive=True,
|
277 |
+
)
|
278 |
+
assert local_fs.ls(target) == []
|
279 |
+
|
280 |
+
# With recursive
|
281 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
282 |
+
fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
|
283 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
284 |
+
assert local_fs.isfile(local_join(target, "subfile2"))
|
285 |
+
assert local_fs.isdir(local_join(target, "nesteddir"))
|
286 |
+
assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
|
287 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
288 |
+
|
289 |
+
local_fs.rm(
|
290 |
+
[
|
291 |
+
local_join(target, "subfile1"),
|
292 |
+
local_join(target, "subfile2"),
|
293 |
+
local_join(target, "nesteddir"),
|
294 |
+
],
|
295 |
+
recursive=True,
|
296 |
+
)
|
297 |
+
assert local_fs.ls(target) == []
|
298 |
+
|
299 |
+
# Limit recursive by maxdepth
|
300 |
+
fs.get(
|
301 |
+
fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
|
302 |
+
)
|
303 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
304 |
+
assert local_fs.isfile(local_join(target, "subfile2"))
|
305 |
+
assert not local_fs.exists(local_join(target, "nesteddir"))
|
306 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
307 |
+
|
308 |
+
local_fs.rm(
|
309 |
+
[
|
310 |
+
local_join(target, "subfile1"),
|
311 |
+
local_join(target, "subfile2"),
|
312 |
+
],
|
313 |
+
recursive=True,
|
314 |
+
)
|
315 |
+
assert local_fs.ls(target) == []
|
316 |
+
|
317 |
+
def test_get_glob_to_new_directory(
|
318 |
+
self,
|
319 |
+
fs,
|
320 |
+
fs_join,
|
321 |
+
fs_bulk_operations_scenario_0,
|
322 |
+
local_fs,
|
323 |
+
local_join,
|
324 |
+
local_target,
|
325 |
+
):
|
326 |
+
# Copy scenario 1h
|
327 |
+
source = fs_bulk_operations_scenario_0
|
328 |
+
|
329 |
+
target = local_target
|
330 |
+
local_fs.mkdir(target)
|
331 |
+
|
332 |
+
for target_slash in [False, True]:
|
333 |
+
t = fs_join(target, "newdir")
|
334 |
+
if target_slash:
|
335 |
+
t += "/"
|
336 |
+
|
337 |
+
# Without recursive
|
338 |
+
fs.get(fs_join(source, "subdir", "*"), t)
|
339 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
340 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
341 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
|
342 |
+
assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
|
343 |
+
assert not local_fs.exists(
|
344 |
+
local_join(target, "newdir", "nesteddir", "nestedfile")
|
345 |
+
)
|
346 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
347 |
+
assert not local_fs.exists(local_join(target, "newdir", "subdir"))
|
348 |
+
|
349 |
+
local_fs.rm(local_join(target, "newdir"), recursive=True)
|
350 |
+
assert local_fs.ls(target) == []
|
351 |
+
|
352 |
+
# With recursive
|
353 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
354 |
+
fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
|
355 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
356 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
357 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
|
358 |
+
assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
|
359 |
+
assert local_fs.isfile(
|
360 |
+
local_join(target, "newdir", "nesteddir", "nestedfile")
|
361 |
+
)
|
362 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
363 |
+
assert not local_fs.exists(local_join(target, "newdir", "subdir"))
|
364 |
+
|
365 |
+
local_fs.rm(local_join(target, "newdir"), recursive=True)
|
366 |
+
assert not local_fs.exists(local_join(target, "newdir"))
|
367 |
+
|
368 |
+
# Limit recursive by maxdepth
|
369 |
+
fs.get(
|
370 |
+
fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
|
371 |
+
)
|
372 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
373 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
374 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
|
375 |
+
assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
|
376 |
+
assert not local_fs.exists(local_join(target, "subdir"))
|
377 |
+
assert not local_fs.exists(local_join(target, "newdir", "subdir"))
|
378 |
+
|
379 |
+
local_fs.rm(local_fs.ls(target, detail=False), recursive=True)
|
380 |
+
assert not local_fs.exists(local_join(target, "newdir"))
|
381 |
+
|
382 |
+
@pytest.mark.parametrize(
|
383 |
+
GLOB_EDGE_CASES_TESTS["argnames"],
|
384 |
+
GLOB_EDGE_CASES_TESTS["argvalues"],
|
385 |
+
)
|
386 |
+
def test_get_glob_edge_cases(
|
387 |
+
self,
|
388 |
+
path,
|
389 |
+
recursive,
|
390 |
+
maxdepth,
|
391 |
+
expected,
|
392 |
+
fs,
|
393 |
+
fs_join,
|
394 |
+
fs_glob_edge_cases_files,
|
395 |
+
local_fs,
|
396 |
+
local_join,
|
397 |
+
local_target,
|
398 |
+
):
|
399 |
+
# Copy scenario 1g
|
400 |
+
source = fs_glob_edge_cases_files
|
401 |
+
|
402 |
+
target = local_target
|
403 |
+
|
404 |
+
for new_dir, target_slash in product([True, False], [True, False]):
|
405 |
+
local_fs.mkdir(target)
|
406 |
+
|
407 |
+
t = local_join(target, "newdir") if new_dir else target
|
408 |
+
t = t + "/" if target_slash else t
|
409 |
+
|
410 |
+
fs.get(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
|
411 |
+
|
412 |
+
output = local_fs.find(target)
|
413 |
+
if new_dir:
|
414 |
+
prefixed_expected = [
|
415 |
+
make_path_posix(local_join(target, "newdir", p)) for p in expected
|
416 |
+
]
|
417 |
+
else:
|
418 |
+
prefixed_expected = [
|
419 |
+
make_path_posix(local_join(target, p)) for p in expected
|
420 |
+
]
|
421 |
+
assert sorted(output) == sorted(prefixed_expected)
|
422 |
+
|
423 |
+
try:
|
424 |
+
local_fs.rm(target, recursive=True)
|
425 |
+
except FileNotFoundError:
|
426 |
+
pass
|
427 |
+
|
428 |
+
def test_get_list_of_files_to_existing_directory(
|
429 |
+
self,
|
430 |
+
fs,
|
431 |
+
fs_join,
|
432 |
+
fs_bulk_operations_scenario_0,
|
433 |
+
local_fs,
|
434 |
+
local_join,
|
435 |
+
local_target,
|
436 |
+
):
|
437 |
+
# Copy scenario 2a
|
438 |
+
source = fs_bulk_operations_scenario_0
|
439 |
+
|
440 |
+
target = local_target
|
441 |
+
local_fs.mkdir(target)
|
442 |
+
|
443 |
+
source_files = [
|
444 |
+
fs_join(source, "file1"),
|
445 |
+
fs_join(source, "file2"),
|
446 |
+
fs_join(source, "subdir", "subfile1"),
|
447 |
+
]
|
448 |
+
|
449 |
+
for target_slash in [False, True]:
|
450 |
+
t = target + "/" if target_slash else target
|
451 |
+
|
452 |
+
fs.get(source_files, t)
|
453 |
+
assert local_fs.isfile(local_join(target, "file1"))
|
454 |
+
assert local_fs.isfile(local_join(target, "file2"))
|
455 |
+
assert local_fs.isfile(local_join(target, "subfile1"))
|
456 |
+
|
457 |
+
local_fs.rm(
|
458 |
+
[
|
459 |
+
local_join(target, "file1"),
|
460 |
+
local_join(target, "file2"),
|
461 |
+
local_join(target, "subfile1"),
|
462 |
+
],
|
463 |
+
recursive=True,
|
464 |
+
)
|
465 |
+
assert local_fs.ls(target) == []
|
466 |
+
|
467 |
+
def test_get_list_of_files_to_new_directory(
|
468 |
+
self,
|
469 |
+
fs,
|
470 |
+
fs_join,
|
471 |
+
fs_bulk_operations_scenario_0,
|
472 |
+
local_fs,
|
473 |
+
local_join,
|
474 |
+
local_target,
|
475 |
+
):
|
476 |
+
# Copy scenario 2b
|
477 |
+
source = fs_bulk_operations_scenario_0
|
478 |
+
|
479 |
+
target = local_target
|
480 |
+
local_fs.mkdir(target)
|
481 |
+
|
482 |
+
source_files = [
|
483 |
+
fs_join(source, "file1"),
|
484 |
+
fs_join(source, "file2"),
|
485 |
+
fs_join(source, "subdir", "subfile1"),
|
486 |
+
]
|
487 |
+
|
488 |
+
fs.get(source_files, local_join(target, "newdir") + "/") # Note trailing slash
|
489 |
+
assert local_fs.isdir(local_join(target, "newdir"))
|
490 |
+
assert local_fs.isfile(local_join(target, "newdir", "file1"))
|
491 |
+
assert local_fs.isfile(local_join(target, "newdir", "file2"))
|
492 |
+
assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
|
493 |
+
|
494 |
+
def test_get_directory_recursive(
|
495 |
+
self, fs, fs_join, fs_path, local_fs, local_join, local_target
|
496 |
+
):
|
497 |
+
# https://github.com/fsspec/filesystem_spec/issues/1062
|
498 |
+
# Recursive cp/get/put of source directory into non-existent target directory.
|
499 |
+
src = fs_join(fs_path, "src")
|
500 |
+
src_file = fs_join(src, "file")
|
501 |
+
fs.mkdir(src)
|
502 |
+
fs.touch(src_file)
|
503 |
+
|
504 |
+
target = local_target
|
505 |
+
|
506 |
+
# get without slash
|
507 |
+
assert not local_fs.exists(target)
|
508 |
+
for loop in range(2):
|
509 |
+
fs.get(src, target, recursive=True)
|
510 |
+
assert local_fs.isdir(target)
|
511 |
+
|
512 |
+
if loop == 0:
|
513 |
+
assert local_fs.isfile(local_join(target, "file"))
|
514 |
+
assert not local_fs.exists(local_join(target, "src"))
|
515 |
+
else:
|
516 |
+
assert local_fs.isfile(local_join(target, "file"))
|
517 |
+
assert local_fs.isdir(local_join(target, "src"))
|
518 |
+
assert local_fs.isfile(local_join(target, "src", "file"))
|
519 |
+
|
520 |
+
local_fs.rm(target, recursive=True)
|
521 |
+
|
522 |
+
# get with slash
|
523 |
+
assert not local_fs.exists(target)
|
524 |
+
for loop in range(2):
|
525 |
+
fs.get(src + "/", target, recursive=True)
|
526 |
+
assert local_fs.isdir(target)
|
527 |
+
assert local_fs.isfile(local_join(target, "file"))
|
528 |
+
assert not local_fs.exists(local_join(target, "src"))
|
529 |
+
|
530 |
+
def test_get_directory_without_files_with_same_name_prefix(
|
531 |
+
self,
|
532 |
+
fs,
|
533 |
+
fs_join,
|
534 |
+
local_fs,
|
535 |
+
local_join,
|
536 |
+
local_target,
|
537 |
+
fs_dir_and_file_with_same_name_prefix,
|
538 |
+
):
|
539 |
+
# Create the test dirs
|
540 |
+
source = fs_dir_and_file_with_same_name_prefix
|
541 |
+
target = local_target
|
542 |
+
|
543 |
+
# Test without glob
|
544 |
+
fs.get(fs_join(source, "subdir"), target, recursive=True)
|
545 |
+
|
546 |
+
assert local_fs.isfile(local_join(target, "subfile.txt"))
|
547 |
+
assert not local_fs.isfile(local_join(target, "subdir.txt"))
|
548 |
+
|
549 |
+
local_fs.rm([local_join(target, "subfile.txt")])
|
550 |
+
assert local_fs.ls(target) == []
|
551 |
+
|
552 |
+
# Test with glob
|
553 |
+
fs.get(fs_join(source, "subdir*"), target, recursive=True)
|
554 |
+
|
555 |
+
assert local_fs.isdir(local_join(target, "subdir"))
|
556 |
+
assert local_fs.isfile(local_join(target, "subdir", "subfile.txt"))
|
557 |
+
assert local_fs.isfile(local_join(target, "subdir.txt"))
|
558 |
+
|
559 |
+
def test_get_with_source_and_destination_as_list(
|
560 |
+
self,
|
561 |
+
fs,
|
562 |
+
fs_join,
|
563 |
+
local_fs,
|
564 |
+
local_join,
|
565 |
+
local_target,
|
566 |
+
fs_10_files_with_hashed_names,
|
567 |
+
):
|
568 |
+
# Create the test dir
|
569 |
+
source = fs_10_files_with_hashed_names
|
570 |
+
target = local_target
|
571 |
+
|
572 |
+
# Create list of files for source and destination
|
573 |
+
source_files = []
|
574 |
+
destination_files = []
|
575 |
+
for i in range(10):
|
576 |
+
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
|
577 |
+
source_files.append(fs_join(source, f"{hashed_i}.txt"))
|
578 |
+
destination_files.append(
|
579 |
+
make_path_posix(local_join(target, f"{hashed_i}.txt"))
|
580 |
+
)
|
581 |
+
|
582 |
+
# Copy and assert order was kept
|
583 |
+
fs.get(rpath=source_files, lpath=destination_files)
|
584 |
+
|
585 |
+
for i in range(10):
|
586 |
+
file_content = local_fs.cat(destination_files[i]).decode("utf-8")
|
587 |
+
assert file_content == str(i)
|
.venv/Lib/site-packages/fsspec/tests/abstract/put.py
ADDED
@@ -0,0 +1,591 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from hashlib import md5
|
2 |
+
from itertools import product
|
3 |
+
|
4 |
+
import pytest
|
5 |
+
|
6 |
+
from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
|
7 |
+
|
8 |
+
|
9 |
+
class AbstractPutTests:
|
10 |
+
def test_put_file_to_existing_directory(
|
11 |
+
self,
|
12 |
+
fs,
|
13 |
+
fs_join,
|
14 |
+
fs_target,
|
15 |
+
local_join,
|
16 |
+
local_bulk_operations_scenario_0,
|
17 |
+
supports_empty_directories,
|
18 |
+
):
|
19 |
+
# Copy scenario 1a
|
20 |
+
source = local_bulk_operations_scenario_0
|
21 |
+
|
22 |
+
target = fs_target
|
23 |
+
fs.mkdir(target)
|
24 |
+
if not supports_empty_directories:
|
25 |
+
# Force target directory to exist by adding a dummy file
|
26 |
+
fs.touch(fs_join(target, "dummy"))
|
27 |
+
assert fs.isdir(target)
|
28 |
+
|
29 |
+
target_file2 = fs_join(target, "file2")
|
30 |
+
target_subfile1 = fs_join(target, "subfile1")
|
31 |
+
|
32 |
+
# Copy from source directory
|
33 |
+
fs.put(local_join(source, "file2"), target)
|
34 |
+
assert fs.isfile(target_file2)
|
35 |
+
|
36 |
+
# Copy from sub directory
|
37 |
+
fs.put(local_join(source, "subdir", "subfile1"), target)
|
38 |
+
assert fs.isfile(target_subfile1)
|
39 |
+
|
40 |
+
# Remove copied files
|
41 |
+
fs.rm([target_file2, target_subfile1])
|
42 |
+
assert not fs.exists(target_file2)
|
43 |
+
assert not fs.exists(target_subfile1)
|
44 |
+
|
45 |
+
# Repeat with trailing slash on target
|
46 |
+
fs.put(local_join(source, "file2"), target + "/")
|
47 |
+
assert fs.isdir(target)
|
48 |
+
assert fs.isfile(target_file2)
|
49 |
+
|
50 |
+
fs.put(local_join(source, "subdir", "subfile1"), target + "/")
|
51 |
+
assert fs.isfile(target_subfile1)
|
52 |
+
|
53 |
+
def test_put_file_to_new_directory(
|
54 |
+
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
|
55 |
+
):
|
56 |
+
# Copy scenario 1b
|
57 |
+
source = local_bulk_operations_scenario_0
|
58 |
+
|
59 |
+
target = fs_target
|
60 |
+
fs.mkdir(target)
|
61 |
+
|
62 |
+
fs.put(
|
63 |
+
local_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
|
64 |
+
) # Note trailing slash
|
65 |
+
assert fs.isdir(target)
|
66 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
67 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
68 |
+
|
69 |
+
def test_put_file_to_file_in_existing_directory(
|
70 |
+
self,
|
71 |
+
fs,
|
72 |
+
fs_join,
|
73 |
+
fs_target,
|
74 |
+
local_join,
|
75 |
+
supports_empty_directories,
|
76 |
+
local_bulk_operations_scenario_0,
|
77 |
+
):
|
78 |
+
# Copy scenario 1c
|
79 |
+
source = local_bulk_operations_scenario_0
|
80 |
+
|
81 |
+
target = fs_target
|
82 |
+
fs.mkdir(target)
|
83 |
+
if not supports_empty_directories:
|
84 |
+
# Force target directory to exist by adding a dummy file
|
85 |
+
fs.touch(fs_join(target, "dummy"))
|
86 |
+
assert fs.isdir(target)
|
87 |
+
|
88 |
+
fs.put(local_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
|
89 |
+
assert fs.isfile(fs_join(target, "newfile"))
|
90 |
+
|
91 |
+
def test_put_file_to_file_in_new_directory(
|
92 |
+
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
|
93 |
+
):
|
94 |
+
# Copy scenario 1d
|
95 |
+
source = local_bulk_operations_scenario_0
|
96 |
+
|
97 |
+
target = fs_target
|
98 |
+
fs.mkdir(target)
|
99 |
+
|
100 |
+
fs.put(
|
101 |
+
local_join(source, "subdir", "subfile1"),
|
102 |
+
fs_join(target, "newdir", "newfile"),
|
103 |
+
)
|
104 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
105 |
+
assert fs.isfile(fs_join(target, "newdir", "newfile"))
|
106 |
+
|
107 |
+
def test_put_directory_to_existing_directory(
|
108 |
+
self,
|
109 |
+
fs,
|
110 |
+
fs_join,
|
111 |
+
fs_target,
|
112 |
+
local_bulk_operations_scenario_0,
|
113 |
+
supports_empty_directories,
|
114 |
+
):
|
115 |
+
# Copy scenario 1e
|
116 |
+
source = local_bulk_operations_scenario_0
|
117 |
+
|
118 |
+
target = fs_target
|
119 |
+
fs.mkdir(target)
|
120 |
+
if not supports_empty_directories:
|
121 |
+
# Force target directory to exist by adding a dummy file
|
122 |
+
dummy = fs_join(target, "dummy")
|
123 |
+
fs.touch(dummy)
|
124 |
+
assert fs.isdir(target)
|
125 |
+
|
126 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
127 |
+
s = fs_join(source, "subdir")
|
128 |
+
if source_slash:
|
129 |
+
s += "/"
|
130 |
+
t = target + "/" if target_slash else target
|
131 |
+
|
132 |
+
# Without recursive does nothing
|
133 |
+
fs.put(s, t)
|
134 |
+
assert fs.ls(target, detail=False) == (
|
135 |
+
[] if supports_empty_directories else [dummy]
|
136 |
+
)
|
137 |
+
|
138 |
+
# With recursive
|
139 |
+
fs.put(s, t, recursive=True)
|
140 |
+
if source_slash:
|
141 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
142 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
143 |
+
assert fs.isdir(fs_join(target, "nesteddir"))
|
144 |
+
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
|
145 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
146 |
+
|
147 |
+
fs.rm(
|
148 |
+
[
|
149 |
+
fs_join(target, "subfile1"),
|
150 |
+
fs_join(target, "subfile2"),
|
151 |
+
fs_join(target, "nesteddir"),
|
152 |
+
],
|
153 |
+
recursive=True,
|
154 |
+
)
|
155 |
+
else:
|
156 |
+
assert fs.isdir(fs_join(target, "subdir"))
|
157 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
|
158 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
|
159 |
+
assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
|
160 |
+
assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
|
161 |
+
|
162 |
+
fs.rm(fs_join(target, "subdir"), recursive=True)
|
163 |
+
assert fs.ls(target, detail=False) == (
|
164 |
+
[] if supports_empty_directories else [dummy]
|
165 |
+
)
|
166 |
+
|
167 |
+
# Limit recursive by maxdepth
|
168 |
+
fs.put(s, t, recursive=True, maxdepth=1)
|
169 |
+
if source_slash:
|
170 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
171 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
172 |
+
assert not fs.exists(fs_join(target, "nesteddir"))
|
173 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
174 |
+
|
175 |
+
fs.rm(
|
176 |
+
[
|
177 |
+
fs_join(target, "subfile1"),
|
178 |
+
fs_join(target, "subfile2"),
|
179 |
+
],
|
180 |
+
recursive=True,
|
181 |
+
)
|
182 |
+
else:
|
183 |
+
assert fs.isdir(fs_join(target, "subdir"))
|
184 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile1"))
|
185 |
+
assert fs.isfile(fs_join(target, "subdir", "subfile2"))
|
186 |
+
assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
|
187 |
+
|
188 |
+
fs.rm(fs_join(target, "subdir"), recursive=True)
|
189 |
+
assert fs.ls(target, detail=False) == (
|
190 |
+
[] if supports_empty_directories else [dummy]
|
191 |
+
)
|
192 |
+
|
193 |
+
def test_put_directory_to_new_directory(
|
194 |
+
self,
|
195 |
+
fs,
|
196 |
+
fs_join,
|
197 |
+
fs_target,
|
198 |
+
local_bulk_operations_scenario_0,
|
199 |
+
supports_empty_directories,
|
200 |
+
):
|
201 |
+
# Copy scenario 1f
|
202 |
+
source = local_bulk_operations_scenario_0
|
203 |
+
|
204 |
+
target = fs_target
|
205 |
+
fs.mkdir(target)
|
206 |
+
|
207 |
+
for source_slash, target_slash in zip([False, True], [False, True]):
|
208 |
+
s = fs_join(source, "subdir")
|
209 |
+
if source_slash:
|
210 |
+
s += "/"
|
211 |
+
t = fs_join(target, "newdir")
|
212 |
+
if target_slash:
|
213 |
+
t += "/"
|
214 |
+
|
215 |
+
# Without recursive does nothing
|
216 |
+
fs.put(s, t)
|
217 |
+
if supports_empty_directories:
|
218 |
+
assert fs.ls(target) == []
|
219 |
+
else:
|
220 |
+
with pytest.raises(FileNotFoundError):
|
221 |
+
fs.ls(target)
|
222 |
+
|
223 |
+
# With recursive
|
224 |
+
fs.put(s, t, recursive=True)
|
225 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
226 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
227 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
228 |
+
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
|
229 |
+
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
230 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
231 |
+
|
232 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
233 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
234 |
+
|
235 |
+
# Limit recursive by maxdepth
|
236 |
+
fs.put(s, t, recursive=True, maxdepth=1)
|
237 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
238 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
239 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
240 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
241 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
242 |
+
|
243 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
244 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
245 |
+
|
246 |
+
def test_put_glob_to_existing_directory(
|
247 |
+
self,
|
248 |
+
fs,
|
249 |
+
fs_join,
|
250 |
+
fs_target,
|
251 |
+
local_join,
|
252 |
+
supports_empty_directories,
|
253 |
+
local_bulk_operations_scenario_0,
|
254 |
+
):
|
255 |
+
# Copy scenario 1g
|
256 |
+
source = local_bulk_operations_scenario_0
|
257 |
+
|
258 |
+
target = fs_target
|
259 |
+
fs.mkdir(target)
|
260 |
+
if not supports_empty_directories:
|
261 |
+
# Force target directory to exist by adding a dummy file
|
262 |
+
dummy = fs_join(target, "dummy")
|
263 |
+
fs.touch(dummy)
|
264 |
+
assert fs.isdir(target)
|
265 |
+
|
266 |
+
for target_slash in [False, True]:
|
267 |
+
t = target + "/" if target_slash else target
|
268 |
+
|
269 |
+
# Without recursive
|
270 |
+
fs.put(local_join(source, "subdir", "*"), t)
|
271 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
272 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
273 |
+
assert not fs.isdir(fs_join(target, "nesteddir"))
|
274 |
+
assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
|
275 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
276 |
+
|
277 |
+
fs.rm(
|
278 |
+
[
|
279 |
+
fs_join(target, "subfile1"),
|
280 |
+
fs_join(target, "subfile2"),
|
281 |
+
],
|
282 |
+
recursive=True,
|
283 |
+
)
|
284 |
+
assert fs.ls(target, detail=False) == (
|
285 |
+
[] if supports_empty_directories else [dummy]
|
286 |
+
)
|
287 |
+
|
288 |
+
# With recursive
|
289 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
290 |
+
fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
|
291 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
292 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
293 |
+
assert fs.isdir(fs_join(target, "nesteddir"))
|
294 |
+
assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
|
295 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
296 |
+
|
297 |
+
fs.rm(
|
298 |
+
[
|
299 |
+
fs_join(target, "subfile1"),
|
300 |
+
fs_join(target, "subfile2"),
|
301 |
+
fs_join(target, "nesteddir"),
|
302 |
+
],
|
303 |
+
recursive=True,
|
304 |
+
)
|
305 |
+
assert fs.ls(target, detail=False) == (
|
306 |
+
[] if supports_empty_directories else [dummy]
|
307 |
+
)
|
308 |
+
|
309 |
+
# Limit recursive by maxdepth
|
310 |
+
fs.put(
|
311 |
+
local_join(source, "subdir", glob),
|
312 |
+
t,
|
313 |
+
recursive=recursive,
|
314 |
+
maxdepth=1,
|
315 |
+
)
|
316 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
317 |
+
assert fs.isfile(fs_join(target, "subfile2"))
|
318 |
+
assert not fs.exists(fs_join(target, "nesteddir"))
|
319 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
320 |
+
|
321 |
+
fs.rm(
|
322 |
+
[
|
323 |
+
fs_join(target, "subfile1"),
|
324 |
+
fs_join(target, "subfile2"),
|
325 |
+
],
|
326 |
+
recursive=True,
|
327 |
+
)
|
328 |
+
assert fs.ls(target, detail=False) == (
|
329 |
+
[] if supports_empty_directories else [dummy]
|
330 |
+
)
|
331 |
+
|
332 |
+
def test_put_glob_to_new_directory(
|
333 |
+
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
|
334 |
+
):
|
335 |
+
# Copy scenario 1h
|
336 |
+
source = local_bulk_operations_scenario_0
|
337 |
+
|
338 |
+
target = fs_target
|
339 |
+
fs.mkdir(target)
|
340 |
+
|
341 |
+
for target_slash in [False, True]:
|
342 |
+
t = fs_join(target, "newdir")
|
343 |
+
if target_slash:
|
344 |
+
t += "/"
|
345 |
+
|
346 |
+
# Without recursive
|
347 |
+
fs.put(local_join(source, "subdir", "*"), t)
|
348 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
349 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
350 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
351 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
352 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
353 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
354 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
355 |
+
|
356 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
357 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
358 |
+
|
359 |
+
# With recursive
|
360 |
+
for glob, recursive in zip(["*", "**"], [True, False]):
|
361 |
+
fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
|
362 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
363 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
364 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
365 |
+
assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
|
366 |
+
assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
|
367 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
368 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
369 |
+
|
370 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
371 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
372 |
+
|
373 |
+
# Limit recursive by maxdepth
|
374 |
+
fs.put(
|
375 |
+
local_join(source, "subdir", glob),
|
376 |
+
t,
|
377 |
+
recursive=recursive,
|
378 |
+
maxdepth=1,
|
379 |
+
)
|
380 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
381 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
382 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile2"))
|
383 |
+
assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
|
384 |
+
assert not fs.exists(fs_join(target, "subdir"))
|
385 |
+
assert not fs.exists(fs_join(target, "newdir", "subdir"))
|
386 |
+
|
387 |
+
fs.rm(fs_join(target, "newdir"), recursive=True)
|
388 |
+
assert not fs.exists(fs_join(target, "newdir"))
|
389 |
+
|
390 |
+
@pytest.mark.parametrize(
|
391 |
+
GLOB_EDGE_CASES_TESTS["argnames"],
|
392 |
+
GLOB_EDGE_CASES_TESTS["argvalues"],
|
393 |
+
)
|
394 |
+
def test_put_glob_edge_cases(
|
395 |
+
self,
|
396 |
+
path,
|
397 |
+
recursive,
|
398 |
+
maxdepth,
|
399 |
+
expected,
|
400 |
+
fs,
|
401 |
+
fs_join,
|
402 |
+
fs_target,
|
403 |
+
local_glob_edge_cases_files,
|
404 |
+
local_join,
|
405 |
+
fs_sanitize_path,
|
406 |
+
):
|
407 |
+
# Copy scenario 1g
|
408 |
+
source = local_glob_edge_cases_files
|
409 |
+
|
410 |
+
target = fs_target
|
411 |
+
|
412 |
+
for new_dir, target_slash in product([True, False], [True, False]):
|
413 |
+
fs.mkdir(target)
|
414 |
+
|
415 |
+
t = fs_join(target, "newdir") if new_dir else target
|
416 |
+
t = t + "/" if target_slash else t
|
417 |
+
|
418 |
+
fs.put(local_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
|
419 |
+
|
420 |
+
output = fs.find(target)
|
421 |
+
if new_dir:
|
422 |
+
prefixed_expected = [
|
423 |
+
fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
|
424 |
+
]
|
425 |
+
else:
|
426 |
+
prefixed_expected = [
|
427 |
+
fs_sanitize_path(fs_join(target, p)) for p in expected
|
428 |
+
]
|
429 |
+
assert sorted(output) == sorted(prefixed_expected)
|
430 |
+
|
431 |
+
try:
|
432 |
+
fs.rm(target, recursive=True)
|
433 |
+
except FileNotFoundError:
|
434 |
+
pass
|
435 |
+
|
436 |
+
def test_put_list_of_files_to_existing_directory(
|
437 |
+
self,
|
438 |
+
fs,
|
439 |
+
fs_join,
|
440 |
+
fs_target,
|
441 |
+
local_join,
|
442 |
+
local_bulk_operations_scenario_0,
|
443 |
+
supports_empty_directories,
|
444 |
+
):
|
445 |
+
# Copy scenario 2a
|
446 |
+
source = local_bulk_operations_scenario_0
|
447 |
+
|
448 |
+
target = fs_target
|
449 |
+
fs.mkdir(target)
|
450 |
+
if not supports_empty_directories:
|
451 |
+
# Force target directory to exist by adding a dummy file
|
452 |
+
dummy = fs_join(target, "dummy")
|
453 |
+
fs.touch(dummy)
|
454 |
+
assert fs.isdir(target)
|
455 |
+
|
456 |
+
source_files = [
|
457 |
+
local_join(source, "file1"),
|
458 |
+
local_join(source, "file2"),
|
459 |
+
local_join(source, "subdir", "subfile1"),
|
460 |
+
]
|
461 |
+
|
462 |
+
for target_slash in [False, True]:
|
463 |
+
t = target + "/" if target_slash else target
|
464 |
+
|
465 |
+
fs.put(source_files, t)
|
466 |
+
assert fs.isfile(fs_join(target, "file1"))
|
467 |
+
assert fs.isfile(fs_join(target, "file2"))
|
468 |
+
assert fs.isfile(fs_join(target, "subfile1"))
|
469 |
+
|
470 |
+
fs.rm(
|
471 |
+
[
|
472 |
+
fs_join(target, "file1"),
|
473 |
+
fs_join(target, "file2"),
|
474 |
+
fs_join(target, "subfile1"),
|
475 |
+
],
|
476 |
+
recursive=True,
|
477 |
+
)
|
478 |
+
assert fs.ls(target, detail=False) == (
|
479 |
+
[] if supports_empty_directories else [dummy]
|
480 |
+
)
|
481 |
+
|
482 |
+
def test_put_list_of_files_to_new_directory(
|
483 |
+
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
|
484 |
+
):
|
485 |
+
# Copy scenario 2b
|
486 |
+
source = local_bulk_operations_scenario_0
|
487 |
+
|
488 |
+
target = fs_target
|
489 |
+
fs.mkdir(target)
|
490 |
+
|
491 |
+
source_files = [
|
492 |
+
local_join(source, "file1"),
|
493 |
+
local_join(source, "file2"),
|
494 |
+
local_join(source, "subdir", "subfile1"),
|
495 |
+
]
|
496 |
+
|
497 |
+
fs.put(source_files, fs_join(target, "newdir") + "/") # Note trailing slash
|
498 |
+
assert fs.isdir(fs_join(target, "newdir"))
|
499 |
+
assert fs.isfile(fs_join(target, "newdir", "file1"))
|
500 |
+
assert fs.isfile(fs_join(target, "newdir", "file2"))
|
501 |
+
assert fs.isfile(fs_join(target, "newdir", "subfile1"))
|
502 |
+
|
503 |
+
def test_put_directory_recursive(
|
504 |
+
self, fs, fs_join, fs_target, local_fs, local_join, local_path
|
505 |
+
):
|
506 |
+
# https://github.com/fsspec/filesystem_spec/issues/1062
|
507 |
+
# Recursive cp/get/put of source directory into non-existent target directory.
|
508 |
+
src = local_join(local_path, "src")
|
509 |
+
src_file = local_join(src, "file")
|
510 |
+
local_fs.mkdir(src)
|
511 |
+
local_fs.touch(src_file)
|
512 |
+
|
513 |
+
target = fs_target
|
514 |
+
|
515 |
+
# put without slash
|
516 |
+
assert not fs.exists(target)
|
517 |
+
for loop in range(2):
|
518 |
+
fs.put(src, target, recursive=True)
|
519 |
+
assert fs.isdir(target)
|
520 |
+
|
521 |
+
if loop == 0:
|
522 |
+
assert fs.isfile(fs_join(target, "file"))
|
523 |
+
assert not fs.exists(fs_join(target, "src"))
|
524 |
+
else:
|
525 |
+
assert fs.isfile(fs_join(target, "file"))
|
526 |
+
assert fs.isdir(fs_join(target, "src"))
|
527 |
+
assert fs.isfile(fs_join(target, "src", "file"))
|
528 |
+
|
529 |
+
fs.rm(target, recursive=True)
|
530 |
+
|
531 |
+
# put with slash
|
532 |
+
assert not fs.exists(target)
|
533 |
+
for loop in range(2):
|
534 |
+
fs.put(src + "/", target, recursive=True)
|
535 |
+
assert fs.isdir(target)
|
536 |
+
assert fs.isfile(fs_join(target, "file"))
|
537 |
+
assert not fs.exists(fs_join(target, "src"))
|
538 |
+
|
539 |
+
def test_put_directory_without_files_with_same_name_prefix(
|
540 |
+
self,
|
541 |
+
fs,
|
542 |
+
fs_join,
|
543 |
+
fs_target,
|
544 |
+
local_join,
|
545 |
+
local_dir_and_file_with_same_name_prefix,
|
546 |
+
supports_empty_directories,
|
547 |
+
):
|
548 |
+
# Create the test dirs
|
549 |
+
source = local_dir_and_file_with_same_name_prefix
|
550 |
+
target = fs_target
|
551 |
+
|
552 |
+
# Test without glob
|
553 |
+
fs.put(local_join(source, "subdir"), fs_target, recursive=True)
|
554 |
+
|
555 |
+
assert fs.isfile(fs_join(fs_target, "subfile.txt"))
|
556 |
+
assert not fs.isfile(fs_join(fs_target, "subdir.txt"))
|
557 |
+
|
558 |
+
fs.rm([fs_join(target, "subfile.txt")])
|
559 |
+
if supports_empty_directories:
|
560 |
+
assert fs.ls(target) == []
|
561 |
+
else:
|
562 |
+
assert not fs.exists(target)
|
563 |
+
|
564 |
+
# Test with glob
|
565 |
+
fs.put(local_join(source, "subdir*"), fs_target, recursive=True)
|
566 |
+
|
567 |
+
assert fs.isdir(fs_join(fs_target, "subdir"))
|
568 |
+
assert fs.isfile(fs_join(fs_target, "subdir", "subfile.txt"))
|
569 |
+
assert fs.isfile(fs_join(fs_target, "subdir.txt"))
|
570 |
+
|
571 |
+
def test_copy_with_source_and_destination_as_list(
|
572 |
+
self, fs, fs_target, fs_join, local_join, local_10_files_with_hashed_names
|
573 |
+
):
|
574 |
+
# Create the test dir
|
575 |
+
source = local_10_files_with_hashed_names
|
576 |
+
target = fs_target
|
577 |
+
|
578 |
+
# Create list of files for source and destination
|
579 |
+
source_files = []
|
580 |
+
destination_files = []
|
581 |
+
for i in range(10):
|
582 |
+
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
|
583 |
+
source_files.append(local_join(source, f"{hashed_i}.txt"))
|
584 |
+
destination_files.append(fs_join(target, f"{hashed_i}.txt"))
|
585 |
+
|
586 |
+
# Copy and assert order was kept
|
587 |
+
fs.put(lpath=source_files, rpath=destination_files)
|
588 |
+
|
589 |
+
for i in range(10):
|
590 |
+
file_content = fs.cat(destination_files[i]).decode("utf-8")
|
591 |
+
assert file_content == str(i)
|
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/INSTALLER
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
uv
|
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2019 Paul O'Leary McCann
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/LICENSE.mecab
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Copyright (c) 2001-2008, Taku Kudo
|
2 |
+
Copyright (c) 2004-2008, Nippon Telegraph and Telephone Corporation
|
3 |
+
All rights reserved.
|
4 |
+
|
5 |
+
Redistribution and use in source and binary forms, with or without modification, are
|
6 |
+
permitted provided that the following conditions are met:
|
7 |
+
|
8 |
+
* Redistributions of source code must retain the above
|
9 |
+
copyright notice, this list of conditions and the
|
10 |
+
following disclaimer.
|
11 |
+
|
12 |
+
* Redistributions in binary form must reproduce the above
|
13 |
+
copyright notice, this list of conditions and the
|
14 |
+
following disclaimer in the documentation and/or other
|
15 |
+
materials provided with the distribution.
|
16 |
+
|
17 |
+
* Neither the name of the Nippon Telegraph and Telegraph Corporation
|
18 |
+
nor the names of its contributors may be used to endorse or
|
19 |
+
promote products derived from this software without specific
|
20 |
+
prior written permission.
|
21 |
+
|
22 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
|
23 |
+
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
24 |
+
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
25 |
+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
26 |
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
27 |
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
28 |
+
TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
29 |
+
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/METADATA
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Metadata-Version: 2.1
|
2 |
+
Name: fugashi
|
3 |
+
Version: 1.4.0
|
4 |
+
Summary: A Cython MeCab wrapper for fast, pythonic Japanese tokenization.
|
5 |
+
Home-page: https://github.com/polm/fugashi
|
6 |
+
Author: Paul O'Leary McCann
|
7 |
+
Author-email: [email protected]
|
8 |
+
License: MIT
|
9 |
+
Classifier: License :: OSI Approved :: MIT License
|
10 |
+
Classifier: Natural Language :: Japanese
|
11 |
+
Requires-Python: >=3.8
|
12 |
+
Description-Content-Type: text/markdown
|
13 |
+
License-File: LICENSE
|
14 |
+
License-File: LICENSE.mecab
|
15 |
+
Provides-Extra: unidic
|
16 |
+
Requires-Dist: unidic; extra == "unidic"
|
17 |
+
Provides-Extra: unidic-lite
|
18 |
+
Requires-Dist: unidic-lite; extra == "unidic-lite"
|
19 |
+
|
20 |
+
[](https://fugashi.streamlit.app)
|
21 |
+
[](https://pypi.org/project/fugashi/)
|
22 |
+

|
23 |
+
[](https://pypi.org/project/fugashi/)
|
24 |
+

|
25 |
+
|
26 |
+
# fugashi
|
27 |
+
|
28 |
+
<img src="https://github.com/polm/fugashi/raw/master/fugashi.png" width=125 height=125 alt="fugashi by Irasutoya" />
|
29 |
+
|
30 |
+
fugashi is a Cython wrapper for [MeCab](https://taku910.github.io/mecab/), a
|
31 |
+
Japanese tokenizer and morphological analysis tool. Wheels are provided for
|
32 |
+
Linux, OSX (Intel), and Win64, and UniDic is [easy to install](#installing-a-dictionary).
|
33 |
+
|
34 |
+
**issueを英語で書く必要はありません。**
|
35 |
+
|
36 |
+
Check out the [interactive demo][], see the [blog post](https://www.dampfkraft.com/nlp/fugashi.html) for background
|
37 |
+
on why fugashi exists and some of the design decisions, or see [this
|
38 |
+
guide][guide] for a basic introduction to Japanese tokenization.
|
39 |
+
|
40 |
+
[guide]: https://www.dampfkraft.com/nlp/how-to-tokenize-japanese.html
|
41 |
+
[interactive demo]: https://fugashi.streamlit.app
|
42 |
+
|
43 |
+
If you are on a platform for which wheels are not provided, you'll need to
|
44 |
+
install MeCab first. It's recommended you install [from
|
45 |
+
source](https://github.com/taku910/mecab). If you need to build from source on
|
46 |
+
Windows, [@chezou's fork](https://github.com/chezou/mecab) is recommended; see
|
47 |
+
[issue #44](https://github.com/polm/fugashi/issues/44#issuecomment-954426115)
|
48 |
+
for an explanation of the problems with the official repo.
|
49 |
+
|
50 |
+
Known platforms without wheels:
|
51 |
+
|
52 |
+
- musl-based distros like alpine [#77](https://github.com/polm/fugashi/issues/77)
|
53 |
+
- PowerPC
|
54 |
+
- Windows 32bit
|
55 |
+
|
56 |
+
## Usage
|
57 |
+
|
58 |
+
```python
|
59 |
+
from fugashi import Tagger
|
60 |
+
|
61 |
+
tagger = Tagger('-Owakati')
|
62 |
+
text = "麩菓子は、麩を主材料とした日本の菓子。"
|
63 |
+
tagger.parse(text)
|
64 |
+
# => '麩 菓子 は 、 麩 を 主材 料 と し た 日本 の 菓子 。'
|
65 |
+
for word in tagger(text):
|
66 |
+
print(word, word.feature.lemma, word.pos, sep='\t')
|
67 |
+
# "feature" is the Unidic feature data as a named tuple
|
68 |
+
```
|
69 |
+
|
70 |
+
## Installing a Dictionary
|
71 |
+
|
72 |
+
fugashi requires a dictionary. [UniDic](https://unidic.ninjal.ac.jp/) is
|
73 |
+
recommended, and two easy-to-install versions are provided.
|
74 |
+
|
75 |
+
- [unidic-lite](https://github.com/polm/unidic-lite), a slightly modified version 2.1.2 of Unidic (from 2013) that's relatively small
|
76 |
+
- [unidic](https://github.com/polm/unidic-py), the latest UniDic 3.1.0, which is 770MB on disk and requires a separate download step
|
77 |
+
|
78 |
+
If you just want to make sure things work you can start with `unidic-lite`, but
|
79 |
+
for more serious processing `unidic` is recommended. For production use you'll
|
80 |
+
generally want to generate your own dictionary too; for details see the [MeCab
|
81 |
+
documentation](https://taku910.github.io/mecab/learn.html).
|
82 |
+
|
83 |
+
To get either of these dictionaries, you can install them directly using `pip`
|
84 |
+
or do the below:
|
85 |
+
|
86 |
+
```sh
|
87 |
+
pip install 'fugashi[unidic-lite]'
|
88 |
+
|
89 |
+
# The full version of UniDic requires a separate download step
|
90 |
+
pip install 'fugashi[unidic]'
|
91 |
+
python -m unidic download
|
92 |
+
```
|
93 |
+
|
94 |
+
For more information on the different MeCab dictionaries available, see [this article](https://www.dampfkraft.com/nlp/japanese-tokenizer-dictionaries.html).
|
95 |
+
|
96 |
+
## Dictionary Use
|
97 |
+
|
98 |
+
fugashi is written with the assumption you'll use Unidic to process Japanese,
|
99 |
+
but it supports arbitrary dictionaries.
|
100 |
+
|
101 |
+
If you're using a dictionary besides Unidic you can use the GenericTagger like this:
|
102 |
+
|
103 |
+
```python
|
104 |
+
from fugashi import GenericTagger
|
105 |
+
tagger = GenericTagger()
|
106 |
+
|
107 |
+
# parse can be used as normal
|
108 |
+
tagger.parse('something')
|
109 |
+
# features from the dictionary can be accessed by field numbers
|
110 |
+
for word in tagger(text):
|
111 |
+
print(word.surface, word.feature[0])
|
112 |
+
```
|
113 |
+
|
114 |
+
You can also create a dictionary wrapper to get feature information as a named tuple.
|
115 |
+
|
116 |
+
```python
|
117 |
+
from fugashi import GenericTagger, create_feature_wrapper
|
118 |
+
CustomFeatures = create_feature_wrapper('CustomFeatures', 'alpha beta gamma')
|
119 |
+
tagger = GenericTagger(wrapper=CustomFeatures)
|
120 |
+
for word in tagger.parseToNodeList(text):
|
121 |
+
print(word.surface, word.feature.alpha)
|
122 |
+
```
|
123 |
+
|
124 |
+
## Citation
|
125 |
+
|
126 |
+
If you use fugashi in research, it would be appreciated if you cite this paper. You can read it at [the ACL Anthology](https://www.aclweb.org/anthology/2020.nlposs-1.7/) or [on Arxiv](https://arxiv.org/abs/2010.06858).
|
127 |
+
|
128 |
+
@inproceedings{mccann-2020-fugashi,
|
129 |
+
title = "fugashi, a Tool for Tokenizing {J}apanese in Python",
|
130 |
+
author = "McCann, Paul",
|
131 |
+
booktitle = "Proceedings of Second Workshop for NLP Open Source Software (NLP-OSS)",
|
132 |
+
month = nov,
|
133 |
+
year = "2020",
|
134 |
+
address = "Online",
|
135 |
+
publisher = "Association for Computational Linguistics",
|
136 |
+
url = "https://www.aclweb.org/anthology/2020.nlposs-1.7",
|
137 |
+
pages = "44--51",
|
138 |
+
abstract = "Recent years have seen an increase in the number of large-scale multilingual NLP projects. However, even in such projects, languages with special processing requirements are often excluded. One such language is Japanese. Japanese is written without spaces, tokenization is non-trivial, and while high quality open source tokenizers exist they can be hard to use and lack English documentation. This paper introduces fugashi, a MeCab wrapper for Python, and gives an introduction to tokenizing Japanese.",
|
139 |
+
}
|
140 |
+
|
141 |
+
## Alternatives
|
142 |
+
|
143 |
+
If you have a problem with fugashi feel free to open an issue. However, there
|
144 |
+
are some cases where it might be better to use a different library.
|
145 |
+
|
146 |
+
- If you don't want to deal with installing MeCab at all, try [SudachiPy](https://github.com/WorksApplications/sudachi.rs).
|
147 |
+
- If you need to work with Korean, try [pymecab-ko](https://github.com/NoUnique/pymecab-ko) or [KoNLPy](https://konlpy.org/en/latest/).
|
148 |
+
|
149 |
+
## License and Copyright Notice
|
150 |
+
|
151 |
+
fugashi is released under the terms of the [MIT license](./LICENSE). Please
|
152 |
+
copy it far and wide.
|
153 |
+
|
154 |
+
fugashi is a wrapper for MeCab, and fugashi wheels include MeCab binaries.
|
155 |
+
MeCab is copyrighted free software by Taku Kudo `<[email protected]>` and Nippon
|
156 |
+
Telegraph and Telephone Corporation, and is redistributed under the [BSD
|
157 |
+
License](./LICENSE.mecab).
|
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/RECORD
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
..\..\Scripts\fugashi-build-dict.exe,sha256=LZb2-amxDd5IoFQbVAzDDziSB4U-xnLpaTEW1UJqCRU,41432
|
2 |
+
..\..\Scripts\fugashi-info.exe,sha256=gy05XrnUBSd9AORdNnNmHG6DkudXGw03U4uul0n6NTo,41420
|
3 |
+
..\..\Scripts\fugashi.exe,sha256=dgp3IR-hWxA25GVyVKrbnMck3KTU7emSHsucry_rwZY,41420
|
4 |
+
..\..\lib\site-packages\fugashi\libmecab.dll,sha256=2N3AeRQ3zoxKGHrnxpovaDzkI2g7el7P2hxM70NsHKs,1910784
|
5 |
+
fugashi-1.4.0.dist-info/LICENSE,sha256=2vfu3p70KKWeqFRofnatHm5flYb_aZjXy2GJqHiQRvk,1097
|
6 |
+
fugashi-1.4.0.dist-info/LICENSE.mecab,sha256=Pb-TvC2ag2gCYgej6C7fwu67r-83z1cBIU9C_dP4pxk,1631
|
7 |
+
fugashi-1.4.0.dist-info/METADATA,sha256=lPJ1OXNya8_ikeo7cUopng_cDpk8Np9LOdULri2-X1g,7059
|
8 |
+
fugashi-1.4.0.dist-info/RECORD,,
|
9 |
+
fugashi-1.4.0.dist-info/WHEEL,sha256=zq3MnTB53_Huh0eFGROKhLNn5cmUbG6gUFCG6-LWXTY,99
|
10 |
+
fugashi-1.4.0.dist-info/entry_points.txt,sha256=jV282mMQTVkhqOVFTdm_ZQ03pJndByW2JtrSa_a2Wms,121
|
11 |
+
fugashi-1.4.0.dist-info/top_level.txt,sha256=1CQTgPUFi4hjTQg2nHdIR-oH6EfyXtpLhiUglCmuOoM,8
|
12 |
+
fugashi-1.4.0.dist-info\INSTALLER,sha256=5hhM4Q4mYTT9z6QB6PGpUAW81PGNFrYrdXMj4oM_6ak,2
|
13 |
+
fugashi-1.4.0.dist-info\REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14 |
+
fugashi/__init__.py,sha256=KW98SIOE-cAtzst_n-YNtEhYznwWLTX6tm_5XJJlOPA,26
|
15 |
+
fugashi/cli.py,sha256=wwLj3Nkl1Dtx1SjDeAAaYB3KWsRp5PALqmhdvKN4ZAk,1553
|
16 |
+
fugashi/fugashi.cp39-win_amd64.pyd,sha256=XRyL_8gC8WWR6OLV-mdqnFtQHJ387AqiLnT6aiQzVag,112640
|
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/REQUESTED
ADDED
File without changes
|
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/WHEEL
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Wheel-Version: 1.0
|
2 |
+
Generator: setuptools (75.3.0)
|
3 |
+
Root-Is-Purelib: false
|
4 |
+
Tag: cp39-cp39-win_amd64
|
5 |
+
|
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/entry_points.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[console_scripts]
|
2 |
+
fugashi = fugashi.cli:main
|
3 |
+
fugashi-build-dict = fugashi.cli:build_dict
|
4 |
+
fugashi-info = fugashi.cli:info
|
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/top_level.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
fugashi
|
.venv/Lib/site-packages/fugashi/__init__.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from .fugashi import *
|
2 |
+
|
.venv/Lib/site-packages/fugashi/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (203 Bytes). View file
|
|
.venv/Lib/site-packages/fugashi/cli.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fugashi import GenericTagger, Tagger, build_dictionary
|
2 |
+
import sys
|
3 |
+
import fileinput
|
4 |
+
|
5 |
+
def main():
|
6 |
+
"""
|
7 |
+
This is a simple wrapper for fugashi so you can test it from the command line.
|
8 |
+
Like the mecab binary, it treats each line of stdin as one sentence. You can
|
9 |
+
pass tagger arguments here too.
|
10 |
+
"""
|
11 |
+
args = ' '.join(sys.argv[1:])
|
12 |
+
|
13 |
+
# This should work if you specify a different dictionary,
|
14 |
+
# but it should also work with the pip unidic.
|
15 |
+
# Try the GenericTagger and then try the Unidic tagger.
|
16 |
+
try:
|
17 |
+
tagger = GenericTagger(args, quiet=True)
|
18 |
+
except RuntimeError:
|
19 |
+
tagger = Tagger(args)
|
20 |
+
|
21 |
+
for line in fileinput.input([]):
|
22 |
+
print(tagger.parse(line.strip()))
|
23 |
+
|
24 |
+
def info():
|
25 |
+
"""Print configuration info."""
|
26 |
+
args = ' '.join(sys.argv[1:])
|
27 |
+
try:
|
28 |
+
tagger = GenericTagger(args, quiet=True)
|
29 |
+
except RuntimeError:
|
30 |
+
tagger = Tagger(args)
|
31 |
+
#TODO get the fugashi version here too
|
32 |
+
print("Fugashi dictionary info:")
|
33 |
+
print("-----")
|
34 |
+
for di in tagger.dictionary_info:
|
35 |
+
for field in 'version size charset filename'.split():
|
36 |
+
print( (field + ':').ljust(10), di[field])
|
37 |
+
print('-----')
|
38 |
+
|
39 |
+
def build_dict():
|
40 |
+
"""EXPERIMENTAL A wrapper for MeCab's user dictionary building command.
|
41 |
+
|
42 |
+
This also defaults to utf8.
|
43 |
+
"""
|
44 |
+
# TODO simplify using pip-installed dictionaries as base
|
45 |
+
args = sys.argv[0] + " -f utf8 -t utf8 " + ' '.join(sys.argv[1:])
|
46 |
+
print(args)
|
47 |
+
build_dictionary(args)
|
.venv/Lib/site-packages/fugashi/fugashi.cp39-win_amd64.pyd
ADDED
Binary file (113 kB). View file
|
|
.venv/Lib/site-packages/functorch/_C.cp39-win_amd64.pyd
ADDED
Binary file (322 kB). View file
|
|
.venv/Lib/site-packages/functorch/__init__.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the BSD-style license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
import torch
|
7 |
+
from torch._functorch.deprecated import (
|
8 |
+
combine_state_for_ensemble,
|
9 |
+
functionalize,
|
10 |
+
grad,
|
11 |
+
grad_and_value,
|
12 |
+
hessian,
|
13 |
+
jacfwd,
|
14 |
+
jacrev,
|
15 |
+
jvp,
|
16 |
+
make_functional,
|
17 |
+
make_functional_with_buffers,
|
18 |
+
vjp,
|
19 |
+
vmap,
|
20 |
+
)
|
21 |
+
|
22 |
+
# utilities. Maybe these should go in their own namespace in the future?
|
23 |
+
from torch._functorch.make_functional import (
|
24 |
+
FunctionalModule,
|
25 |
+
FunctionalModuleWithBuffers,
|
26 |
+
)
|
27 |
+
|
28 |
+
# Was never documented
|
29 |
+
from torch._functorch.python_key import make_fx
|
30 |
+
|
31 |
+
|
32 |
+
# Top-level APIs. Please think carefully before adding something to the
|
33 |
+
# top-level namespace:
|
34 |
+
# - private helper functions should go into torch._functorch
|
35 |
+
# - very experimental things should go into functorch.experimental
|
36 |
+
# - compilation related things should go into functorch.compile
|
37 |
+
|
38 |
+
|
39 |
+
__version__ = torch.__version__
|
.venv/Lib/site-packages/functorch/_src/make_functional/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file has moved to under torch/_functorch. It is not public API.
|
2 |
+
# If you are not a PyTorch developer and you are relying on the following
|
3 |
+
# imports, please file an issue.
|
4 |
+
from torch._functorch.make_functional import _swap_state
|
.venv/Lib/site-packages/functorch/_src/vmap/__init__.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file has moved to under torch/_functorch. It is not public API.
|
2 |
+
# If you are not a PyTorch developer and you are relying on the following
|
3 |
+
# imports, please file an issue.
|
4 |
+
from torch._functorch.vmap import (
|
5 |
+
_add_batch_dim,
|
6 |
+
_broadcast_to_and_flatten,
|
7 |
+
_create_batched_inputs,
|
8 |
+
_get_name,
|
9 |
+
_process_batched_inputs,
|
10 |
+
_remove_batch_dim,
|
11 |
+
_unwrap_batched,
|
12 |
+
_validate_and_get_batch_size,
|
13 |
+
Tensor,
|
14 |
+
tree_flatten,
|
15 |
+
tree_unflatten,
|
16 |
+
)
|
.venv/Lib/site-packages/functorch/compile/__init__.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch._functorch import config
|
2 |
+
from torch._functorch.aot_autograd import (
|
3 |
+
aot_function,
|
4 |
+
aot_module,
|
5 |
+
aot_module_simplified,
|
6 |
+
compiled_function,
|
7 |
+
compiled_module,
|
8 |
+
get_aot_compilation_context,
|
9 |
+
get_aot_graph_name,
|
10 |
+
get_graph_being_compiled,
|
11 |
+
make_boxed_compiler,
|
12 |
+
make_boxed_func,
|
13 |
+
)
|
14 |
+
from torch._functorch.compilers import (
|
15 |
+
debug_compile,
|
16 |
+
default_decompositions,
|
17 |
+
draw_graph_compile,
|
18 |
+
memory_efficient_fusion,
|
19 |
+
nnc_jit,
|
20 |
+
nop,
|
21 |
+
print_compile,
|
22 |
+
ts_compile,
|
23 |
+
)
|
24 |
+
from torch._functorch.fx_minifier import minifier
|
25 |
+
from torch._functorch.partitioners import (
|
26 |
+
default_partition,
|
27 |
+
draw_graph,
|
28 |
+
min_cut_rematerialization_partition,
|
29 |
+
)
|
30 |
+
from torch._functorch.python_key import pythonkey_decompose
|
.venv/Lib/site-packages/functorch/dim/batch_tensor.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the BSD-style license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
from contextlib import contextmanager
|
7 |
+
|
8 |
+
from torch._C._functorch import _vmap_add_layers, _vmap_remove_layers
|
9 |
+
|
10 |
+
|
11 |
+
_enabled = False
|
12 |
+
|
13 |
+
|
14 |
+
@contextmanager
|
15 |
+
def _enable_layers(dims):
|
16 |
+
global _enabled
|
17 |
+
assert not _enabled
|
18 |
+
input = sorted((d._level, d.size) for d in dims if not isinstance(d, int))
|
19 |
+
n = len(input)
|
20 |
+
try:
|
21 |
+
_vmap_add_layers(input)
|
22 |
+
_enabled = True
|
23 |
+
yield
|
24 |
+
finally:
|
25 |
+
_enabled = False
|
26 |
+
_vmap_remove_layers(n)
|
.venv/Lib/site-packages/functorch/dim/delayed_mul_tensor.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the BSD-style license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
import torch
|
7 |
+
|
8 |
+
from . import _Tensor, Tensor
|
9 |
+
from .reference import _dims, _enable_layers, llist, ltuple
|
10 |
+
|
11 |
+
|
12 |
+
class DelayedMulTensor(_Tensor):
|
13 |
+
def __init__(self, lhs, rhs):
|
14 |
+
self._lhs, self._rhs = lhs, rhs
|
15 |
+
self._data = None
|
16 |
+
self._levels_data = None
|
17 |
+
self._has_device = lhs._has_device or rhs._has_device
|
18 |
+
self._batchtensor_data = None
|
19 |
+
self._tensor_data = None
|
20 |
+
|
21 |
+
@property
|
22 |
+
def _levels(self):
|
23 |
+
if self._levels_data is None:
|
24 |
+
levels = llist(self._lhs._levels)
|
25 |
+
for l in self._rhs._levels:
|
26 |
+
if l not in levels:
|
27 |
+
levels.append(l)
|
28 |
+
self._levels_data = ltuple(levels)
|
29 |
+
return self._levels_data
|
30 |
+
|
31 |
+
@property
|
32 |
+
def _batchtensor(self):
|
33 |
+
if self._batchtensor_data is None:
|
34 |
+
with _enable_layers(self._levels):
|
35 |
+
print("bt multiply fallback")
|
36 |
+
self._batchtensor_data = self._lhs._batchtensor * self._rhs._batchtensor
|
37 |
+
return self._batchtensor_data
|
38 |
+
|
39 |
+
@property
|
40 |
+
def _tensor(self):
|
41 |
+
if self._tensor_data is None:
|
42 |
+
self._tensor_data = Tensor.from_batched(
|
43 |
+
self._batchtensor, self._has_device
|
44 |
+
)._tensor
|
45 |
+
return self._tensor_data
|
46 |
+
|
47 |
+
@property
|
48 |
+
def ndim(self):
|
49 |
+
return self._batchtensor.ndim
|
50 |
+
|
51 |
+
@property
|
52 |
+
def dims(self):
|
53 |
+
return ltuple(super().dims)
|
54 |
+
|
55 |
+
def sum(self, dim):
|
56 |
+
dims = _dims(dim, 0, False, False)
|
57 |
+
n = ord("a")
|
58 |
+
all_levels = self._levels
|
59 |
+
|
60 |
+
def to_char(d):
|
61 |
+
return chr(n + all_levels.index(d))
|
62 |
+
|
63 |
+
plhs, levelslhs = self._lhs._tensor, self._lhs._levels
|
64 |
+
prhs, levelsrhs = self._rhs._tensor, self._rhs._levels
|
65 |
+
new_dims = tuple(d for d in self.dims if d not in dims)
|
66 |
+
new_levels = [l for l in self._levels if l not in dims]
|
67 |
+
fmt = "".join(
|
68 |
+
[
|
69 |
+
*(to_char(d) for d in levelslhs),
|
70 |
+
",",
|
71 |
+
*(to_char(d) for d in levelsrhs),
|
72 |
+
"->",
|
73 |
+
*(to_char(d) for d in new_levels),
|
74 |
+
]
|
75 |
+
)
|
76 |
+
result_data = torch.einsum(fmt, (plhs, prhs))
|
77 |
+
return Tensor.from_positional(result_data, new_levels, True)
|
.venv/Lib/site-packages/functorch/dim/dim.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the BSD-style license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
import dis
|
7 |
+
import inspect
|
8 |
+
from dataclasses import dataclass
|
9 |
+
from typing import Union
|
10 |
+
|
11 |
+
from . import DimList
|
12 |
+
|
13 |
+
|
14 |
+
_vmap_levels = []
|
15 |
+
|
16 |
+
|
17 |
+
@dataclass
|
18 |
+
class LevelInfo:
|
19 |
+
level: int
|
20 |
+
alive: bool = True
|
21 |
+
|
22 |
+
|
23 |
+
class Dim:
|
24 |
+
def __init__(self, name: str, size: Union[None, int] = None):
|
25 |
+
self.name = name
|
26 |
+
self._size = None
|
27 |
+
self._vmap_level = None
|
28 |
+
if size is not None:
|
29 |
+
self.size = size
|
30 |
+
|
31 |
+
def __del__(self):
|
32 |
+
if self._vmap_level is not None:
|
33 |
+
_vmap_active_levels[self._vmap_stack].alive = False # noqa: F821
|
34 |
+
while (
|
35 |
+
not _vmap_levels[-1].alive
|
36 |
+
and current_level() == _vmap_levels[-1].level # noqa: F821
|
37 |
+
):
|
38 |
+
_vmap_decrement_nesting() # noqa: F821
|
39 |
+
_vmap_levels.pop()
|
40 |
+
|
41 |
+
@property
|
42 |
+
def size(self):
|
43 |
+
assert self.is_bound
|
44 |
+
return self._size
|
45 |
+
|
46 |
+
@size.setter
|
47 |
+
def size(self, size: int):
|
48 |
+
from . import DimensionBindError
|
49 |
+
|
50 |
+
if self._size is None:
|
51 |
+
self._size = size
|
52 |
+
self._vmap_level = _vmap_increment_nesting(size, "same") # noqa: F821
|
53 |
+
self._vmap_stack = len(_vmap_levels)
|
54 |
+
_vmap_levels.append(LevelInfo(self._vmap_level))
|
55 |
+
|
56 |
+
elif self._size != size:
|
57 |
+
raise DimensionBindError(
|
58 |
+
f"Dim '{self}' previously bound to a dimension of size {self._size} cannot bind to a dimension of size {size}"
|
59 |
+
)
|
60 |
+
|
61 |
+
@property
|
62 |
+
def is_bound(self):
|
63 |
+
return self._size is not None
|
64 |
+
|
65 |
+
def __repr__(self):
|
66 |
+
return self.name
|
67 |
+
|
68 |
+
|
69 |
+
def extract_name(inst):
|
70 |
+
assert inst.opname == "STORE_FAST" or inst.opname == "STORE_NAME"
|
71 |
+
return inst.argval
|
72 |
+
|
73 |
+
|
74 |
+
_cache = {}
|
75 |
+
|
76 |
+
|
77 |
+
def dims(lists=0):
|
78 |
+
frame = inspect.currentframe()
|
79 |
+
assert frame is not None
|
80 |
+
calling_frame = frame.f_back
|
81 |
+
assert calling_frame is not None
|
82 |
+
code, lasti = calling_frame.f_code, calling_frame.f_lasti
|
83 |
+
key = (code, lasti)
|
84 |
+
if key not in _cache:
|
85 |
+
first = lasti // 2 + 1
|
86 |
+
instructions = list(dis.get_instructions(calling_frame.f_code))
|
87 |
+
unpack = instructions[first]
|
88 |
+
|
89 |
+
if unpack.opname == "STORE_FAST" or unpack.opname == "STORE_NAME":
|
90 |
+
# just a single dim, not a list
|
91 |
+
name = unpack.argval
|
92 |
+
ctor = Dim if lists == 0 else DimList
|
93 |
+
_cache[key] = lambda: ctor(name=name)
|
94 |
+
else:
|
95 |
+
assert unpack.opname == "UNPACK_SEQUENCE"
|
96 |
+
ndims = unpack.argval
|
97 |
+
names = tuple(
|
98 |
+
extract_name(instructions[first + 1 + i]) for i in range(ndims)
|
99 |
+
)
|
100 |
+
first_list = len(names) - lists
|
101 |
+
_cache[key] = lambda: tuple(
|
102 |
+
Dim(n) if i < first_list else DimList(name=n)
|
103 |
+
for i, n in enumerate(names)
|
104 |
+
)
|
105 |
+
return _cache[key]()
|
106 |
+
|
107 |
+
|
108 |
+
def _dim_set(positional, arg):
|
109 |
+
def convert(a):
|
110 |
+
if isinstance(a, Dim):
|
111 |
+
return a
|
112 |
+
else:
|
113 |
+
assert isinstance(a, int)
|
114 |
+
return positional[a]
|
115 |
+
|
116 |
+
if arg is None:
|
117 |
+
return positional
|
118 |
+
elif not isinstance(arg, (Dim, int)):
|
119 |
+
return tuple(convert(a) for a in arg)
|
120 |
+
else:
|
121 |
+
return (convert(arg),)
|
.venv/Lib/site-packages/functorch/dim/magic_trace.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the BSD-style license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
import os
|
7 |
+
import signal
|
8 |
+
import subprocess
|
9 |
+
from contextlib import contextmanager
|
10 |
+
|
11 |
+
|
12 |
+
@contextmanager
|
13 |
+
def magic_trace(output="trace.fxt", magic_trace_cache="/tmp/magic-trace"):
|
14 |
+
pid = os.getpid()
|
15 |
+
if not os.path.exists(magic_trace_cache):
|
16 |
+
print(f"Downloading magic_trace to: {magic_trace_cache}")
|
17 |
+
subprocess.run(
|
18 |
+
[
|
19 |
+
"wget",
|
20 |
+
"-O",
|
21 |
+
magic_trace_cache,
|
22 |
+
"-q",
|
23 |
+
"https://github.com/janestreet/magic-trace/releases/download/v1.0.2/magic-trace",
|
24 |
+
]
|
25 |
+
)
|
26 |
+
subprocess.run(["chmod", "+x", magic_trace_cache])
|
27 |
+
args = [magic_trace_cache, "attach", "-pid", str(pid), "-o", output]
|
28 |
+
p = subprocess.Popen(args, stderr=subprocess.PIPE, encoding="utf-8")
|
29 |
+
while True:
|
30 |
+
x = p.stderr.readline()
|
31 |
+
print(x)
|
32 |
+
if "Attached" in x:
|
33 |
+
break
|
34 |
+
try:
|
35 |
+
yield
|
36 |
+
finally:
|
37 |
+
p.send_signal(signal.SIGINT)
|
38 |
+
r = p.wait()
|
39 |
+
print(p.stderr.read())
|
40 |
+
p.stderr.close()
|
41 |
+
if r != 0:
|
42 |
+
raise ValueError(f"magic_trace exited abnormally: {r}")
|
.venv/Lib/site-packages/functorch/dim/op_properties.py
ADDED
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the BSD-style license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
import torch
|
7 |
+
|
8 |
+
|
9 |
+
# pointwise operators can go through a faster pathway
|
10 |
+
|
11 |
+
tensor_magic_methods = ["add", ""]
|
12 |
+
pointwise_magic_methods_with_reverse = (
|
13 |
+
"add",
|
14 |
+
"sub",
|
15 |
+
"mul",
|
16 |
+
"floordiv",
|
17 |
+
"div",
|
18 |
+
"truediv",
|
19 |
+
"mod",
|
20 |
+
"pow",
|
21 |
+
"lshift",
|
22 |
+
"rshift",
|
23 |
+
"and",
|
24 |
+
"or",
|
25 |
+
"xor",
|
26 |
+
)
|
27 |
+
pointwise_magic_methods = (
|
28 |
+
*(x for m in pointwise_magic_methods_with_reverse for x in (m, "r" + m)),
|
29 |
+
"eq",
|
30 |
+
"gt",
|
31 |
+
"le",
|
32 |
+
"lt",
|
33 |
+
"ge",
|
34 |
+
"gt",
|
35 |
+
"ne",
|
36 |
+
"neg",
|
37 |
+
"pos",
|
38 |
+
"abs",
|
39 |
+
"invert",
|
40 |
+
"iadd",
|
41 |
+
"isub",
|
42 |
+
"imul",
|
43 |
+
"ifloordiv",
|
44 |
+
"idiv",
|
45 |
+
"itruediv",
|
46 |
+
"imod",
|
47 |
+
"ipow",
|
48 |
+
"ilshift",
|
49 |
+
"irshift",
|
50 |
+
"iand",
|
51 |
+
"ior",
|
52 |
+
"ixor",
|
53 |
+
"int",
|
54 |
+
"long",
|
55 |
+
"float",
|
56 |
+
"complex",
|
57 |
+
)
|
58 |
+
|
59 |
+
pointwise_methods = (*(f"__{m}__" for m in pointwise_magic_methods),)
|
60 |
+
|
61 |
+
pointwise = (
|
62 |
+
*(getattr(torch.Tensor, m) for m in pointwise_methods),
|
63 |
+
torch.nn.functional.dropout,
|
64 |
+
torch.where,
|
65 |
+
torch.Tensor.abs,
|
66 |
+
torch.abs,
|
67 |
+
torch.Tensor.acos,
|
68 |
+
torch.acos,
|
69 |
+
torch.Tensor.acosh,
|
70 |
+
torch.acosh,
|
71 |
+
torch.Tensor.add,
|
72 |
+
torch.add,
|
73 |
+
torch.Tensor.addcdiv,
|
74 |
+
torch.addcdiv,
|
75 |
+
torch.Tensor.addcmul,
|
76 |
+
torch.addcmul,
|
77 |
+
torch.Tensor.addr,
|
78 |
+
torch.addr,
|
79 |
+
torch.Tensor.angle,
|
80 |
+
torch.angle,
|
81 |
+
torch.Tensor.asin,
|
82 |
+
torch.asin,
|
83 |
+
torch.Tensor.asinh,
|
84 |
+
torch.asinh,
|
85 |
+
torch.Tensor.atan,
|
86 |
+
torch.atan,
|
87 |
+
torch.Tensor.atan2,
|
88 |
+
torch.atan2,
|
89 |
+
torch.Tensor.atanh,
|
90 |
+
torch.atanh,
|
91 |
+
torch.Tensor.bitwise_and,
|
92 |
+
torch.bitwise_and,
|
93 |
+
torch.Tensor.bitwise_left_shift,
|
94 |
+
torch.bitwise_left_shift,
|
95 |
+
torch.Tensor.bitwise_not,
|
96 |
+
torch.bitwise_not,
|
97 |
+
torch.Tensor.bitwise_or,
|
98 |
+
torch.bitwise_or,
|
99 |
+
torch.Tensor.bitwise_right_shift,
|
100 |
+
torch.bitwise_right_shift,
|
101 |
+
torch.Tensor.bitwise_xor,
|
102 |
+
torch.bitwise_xor,
|
103 |
+
torch.Tensor.ceil,
|
104 |
+
torch.ceil,
|
105 |
+
torch.celu,
|
106 |
+
torch.nn.functional.celu,
|
107 |
+
torch.Tensor.clamp,
|
108 |
+
torch.clamp,
|
109 |
+
torch.Tensor.clamp_max,
|
110 |
+
torch.clamp_max,
|
111 |
+
torch.Tensor.clamp_min,
|
112 |
+
torch.clamp_min,
|
113 |
+
torch.Tensor.copysign,
|
114 |
+
torch.copysign,
|
115 |
+
torch.Tensor.cos,
|
116 |
+
torch.cos,
|
117 |
+
torch.Tensor.cosh,
|
118 |
+
torch.cosh,
|
119 |
+
torch.Tensor.deg2rad,
|
120 |
+
torch.deg2rad,
|
121 |
+
torch.Tensor.digamma,
|
122 |
+
torch.digamma,
|
123 |
+
torch.Tensor.div,
|
124 |
+
torch.div,
|
125 |
+
torch.dropout,
|
126 |
+
torch.nn.functional.dropout,
|
127 |
+
torch.nn.functional.elu,
|
128 |
+
torch.Tensor.eq,
|
129 |
+
torch.eq,
|
130 |
+
torch.Tensor.erf,
|
131 |
+
torch.erf,
|
132 |
+
torch.Tensor.erfc,
|
133 |
+
torch.erfc,
|
134 |
+
torch.Tensor.erfinv,
|
135 |
+
torch.erfinv,
|
136 |
+
torch.Tensor.exp,
|
137 |
+
torch.exp,
|
138 |
+
torch.Tensor.exp2,
|
139 |
+
torch.exp2,
|
140 |
+
torch.Tensor.expm1,
|
141 |
+
torch.expm1,
|
142 |
+
torch.feature_dropout,
|
143 |
+
torch.Tensor.float_power,
|
144 |
+
torch.float_power,
|
145 |
+
torch.Tensor.floor,
|
146 |
+
torch.floor,
|
147 |
+
torch.Tensor.floor_divide,
|
148 |
+
torch.floor_divide,
|
149 |
+
torch.Tensor.fmod,
|
150 |
+
torch.fmod,
|
151 |
+
torch.Tensor.frac,
|
152 |
+
torch.frac,
|
153 |
+
torch.Tensor.frexp,
|
154 |
+
torch.frexp,
|
155 |
+
torch.Tensor.gcd,
|
156 |
+
torch.gcd,
|
157 |
+
torch.Tensor.ge,
|
158 |
+
torch.ge,
|
159 |
+
torch.nn.functional.gelu,
|
160 |
+
torch.nn.functional.glu,
|
161 |
+
torch.Tensor.gt,
|
162 |
+
torch.gt,
|
163 |
+
torch.Tensor.hardshrink,
|
164 |
+
torch.hardshrink,
|
165 |
+
torch.nn.functional.hardshrink,
|
166 |
+
torch.nn.functional.hardsigmoid,
|
167 |
+
torch.nn.functional.hardswish,
|
168 |
+
torch.nn.functional.hardtanh,
|
169 |
+
torch.Tensor.heaviside,
|
170 |
+
torch.heaviside,
|
171 |
+
torch.Tensor.hypot,
|
172 |
+
torch.hypot,
|
173 |
+
torch.Tensor.i0,
|
174 |
+
torch.i0,
|
175 |
+
torch.Tensor.igamma,
|
176 |
+
torch.igamma,
|
177 |
+
torch.Tensor.igammac,
|
178 |
+
torch.igammac,
|
179 |
+
torch.Tensor.isclose,
|
180 |
+
torch.isclose,
|
181 |
+
torch.Tensor.isfinite,
|
182 |
+
torch.isfinite,
|
183 |
+
torch.Tensor.isinf,
|
184 |
+
torch.isinf,
|
185 |
+
torch.Tensor.isnan,
|
186 |
+
torch.isnan,
|
187 |
+
torch.Tensor.isneginf,
|
188 |
+
torch.isneginf,
|
189 |
+
torch.Tensor.isposinf,
|
190 |
+
torch.isposinf,
|
191 |
+
torch.Tensor.isreal,
|
192 |
+
torch.isreal,
|
193 |
+
torch.Tensor.kron,
|
194 |
+
torch.kron,
|
195 |
+
torch.Tensor.lcm,
|
196 |
+
torch.lcm,
|
197 |
+
torch.Tensor.ldexp,
|
198 |
+
torch.ldexp,
|
199 |
+
torch.Tensor.le,
|
200 |
+
torch.le,
|
201 |
+
torch.nn.functional.leaky_relu,
|
202 |
+
torch.Tensor.lerp,
|
203 |
+
torch.lerp,
|
204 |
+
torch.Tensor.lgamma,
|
205 |
+
torch.lgamma,
|
206 |
+
torch.Tensor.log,
|
207 |
+
torch.log,
|
208 |
+
torch.Tensor.log10,
|
209 |
+
torch.log10,
|
210 |
+
torch.Tensor.log1p,
|
211 |
+
torch.log1p,
|
212 |
+
torch.Tensor.log2,
|
213 |
+
torch.log2,
|
214 |
+
torch.nn.functional.logsigmoid,
|
215 |
+
torch.Tensor.logical_and,
|
216 |
+
torch.logical_and,
|
217 |
+
torch.Tensor.logical_not,
|
218 |
+
torch.logical_not,
|
219 |
+
torch.Tensor.logical_or,
|
220 |
+
torch.logical_or,
|
221 |
+
torch.Tensor.logical_xor,
|
222 |
+
torch.logical_xor,
|
223 |
+
torch.Tensor.logit,
|
224 |
+
torch.logit,
|
225 |
+
torch.Tensor.lt,
|
226 |
+
torch.lt,
|
227 |
+
torch.Tensor.maximum,
|
228 |
+
torch.maximum,
|
229 |
+
torch.Tensor.minimum,
|
230 |
+
torch.minimum,
|
231 |
+
torch.nn.functional.mish,
|
232 |
+
torch.Tensor.mvlgamma,
|
233 |
+
torch.mvlgamma,
|
234 |
+
torch.Tensor.nan_to_num,
|
235 |
+
torch.nan_to_num,
|
236 |
+
torch.Tensor.ne,
|
237 |
+
torch.ne,
|
238 |
+
torch.Tensor.neg,
|
239 |
+
torch.neg,
|
240 |
+
torch.Tensor.nextafter,
|
241 |
+
torch.nextafter,
|
242 |
+
torch.Tensor.outer,
|
243 |
+
torch.outer,
|
244 |
+
torch.polar,
|
245 |
+
torch.Tensor.polygamma,
|
246 |
+
torch.polygamma,
|
247 |
+
torch.Tensor.positive,
|
248 |
+
torch.positive,
|
249 |
+
torch.Tensor.pow,
|
250 |
+
torch.pow,
|
251 |
+
torch.Tensor.prelu,
|
252 |
+
torch.prelu,
|
253 |
+
torch.nn.functional.prelu,
|
254 |
+
torch.Tensor.rad2deg,
|
255 |
+
torch.rad2deg,
|
256 |
+
torch.Tensor.reciprocal,
|
257 |
+
torch.reciprocal,
|
258 |
+
torch.Tensor.relu,
|
259 |
+
torch.relu,
|
260 |
+
torch.nn.functional.relu,
|
261 |
+
torch.nn.functional.relu6,
|
262 |
+
torch.Tensor.remainder,
|
263 |
+
torch.remainder,
|
264 |
+
torch.Tensor.round,
|
265 |
+
torch.round,
|
266 |
+
torch.rrelu,
|
267 |
+
torch.nn.functional.rrelu,
|
268 |
+
torch.Tensor.rsqrt,
|
269 |
+
torch.rsqrt,
|
270 |
+
torch.rsub,
|
271 |
+
torch.selu,
|
272 |
+
torch.nn.functional.selu,
|
273 |
+
torch.Tensor.sgn,
|
274 |
+
torch.sgn,
|
275 |
+
torch.Tensor.sigmoid,
|
276 |
+
torch.sigmoid,
|
277 |
+
torch.nn.functional.sigmoid,
|
278 |
+
torch.Tensor.sign,
|
279 |
+
torch.sign,
|
280 |
+
torch.Tensor.signbit,
|
281 |
+
torch.signbit,
|
282 |
+
torch.nn.functional.silu,
|
283 |
+
torch.Tensor.sin,
|
284 |
+
torch.sin,
|
285 |
+
torch.Tensor.sinc,
|
286 |
+
torch.sinc,
|
287 |
+
torch.Tensor.sinh,
|
288 |
+
torch.sinh,
|
289 |
+
torch.nn.functional.softplus,
|
290 |
+
torch.nn.functional.softshrink,
|
291 |
+
torch.Tensor.sqrt,
|
292 |
+
torch.sqrt,
|
293 |
+
torch.Tensor.square,
|
294 |
+
torch.square,
|
295 |
+
torch.Tensor.sub,
|
296 |
+
torch.sub,
|
297 |
+
torch.Tensor.tan,
|
298 |
+
torch.tan,
|
299 |
+
torch.Tensor.tanh,
|
300 |
+
torch.tanh,
|
301 |
+
torch.nn.functional.tanh,
|
302 |
+
torch.threshold,
|
303 |
+
torch.nn.functional.threshold,
|
304 |
+
torch.trapz,
|
305 |
+
torch.Tensor.true_divide,
|
306 |
+
torch.true_divide,
|
307 |
+
torch.Tensor.trunc,
|
308 |
+
torch.trunc,
|
309 |
+
torch.Tensor.xlogy,
|
310 |
+
torch.xlogy,
|
311 |
+
torch.rand_like,
|
312 |
+
)
|
.venv/Lib/site-packages/functorch/dim/reference.py
ADDED
@@ -0,0 +1,645 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the BSD-style license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
|
7 |
+
# reference python implementations for C ops
|
8 |
+
import torch
|
9 |
+
from functorch._C import dim as _C
|
10 |
+
|
11 |
+
from . import op_properties
|
12 |
+
from .batch_tensor import _enable_layers
|
13 |
+
from .tree_map import tree_flatten, tree_map
|
14 |
+
|
15 |
+
|
16 |
+
DimList = _C.DimList
|
17 |
+
import operator
|
18 |
+
from functools import reduce
|
19 |
+
|
20 |
+
|
21 |
+
# use dict to avoid writing C++ bindings for set
|
22 |
+
pointwise = set(op_properties.pointwise)
|
23 |
+
|
24 |
+
|
25 |
+
def prod(x):
|
26 |
+
return reduce(operator.mul, x, 1)
|
27 |
+
|
28 |
+
|
29 |
+
def _wrap_dim(d, N, keepdim):
|
30 |
+
from . import Dim
|
31 |
+
|
32 |
+
if isinstance(d, Dim):
|
33 |
+
assert not keepdim, "cannot preserve first-class dimensions with keepdim=True"
|
34 |
+
return d
|
35 |
+
elif d >= 0:
|
36 |
+
return d - N
|
37 |
+
else:
|
38 |
+
return d
|
39 |
+
|
40 |
+
|
41 |
+
def _dims(d, N, keepdim, single_dim):
|
42 |
+
from . import Dim
|
43 |
+
|
44 |
+
if isinstance(d, (Dim, int)):
|
45 |
+
return ltuple((_wrap_dim(d, N, keepdim),))
|
46 |
+
assert not single_dim, f"expected a single dimension or int but found: {d}"
|
47 |
+
return ltuple(_wrap_dim(x, N, keepdim) for x in d)
|
48 |
+
|
49 |
+
|
50 |
+
def _bind_dims_to_size(lhs_size, rhs, lhs_debug):
|
51 |
+
from . import DimensionMismatchError
|
52 |
+
|
53 |
+
not_bound = tuple((i, r) for i, r in enumerate(rhs) if not r.is_bound)
|
54 |
+
if len(not_bound) == 1:
|
55 |
+
idx, d = not_bound[0]
|
56 |
+
rhs_so_far = prod(r.size for r in rhs if r.is_bound)
|
57 |
+
if lhs_size % rhs_so_far != 0:
|
58 |
+
rhs_s = tuple("?" if not r.is_bound else str(r.size) for r in rhs)
|
59 |
+
raise DimensionMismatchError(
|
60 |
+
f"inferred dimension does not evenly fit into larger dimension: {lhs_size} vs {rhs_s}"
|
61 |
+
)
|
62 |
+
new_size = lhs_size // rhs_so_far
|
63 |
+
d.size = new_size
|
64 |
+
elif len(not_bound) > 1:
|
65 |
+
rhs_s = tuple("?" if not r.is_bound else str(r.size) for r in rhs)
|
66 |
+
raise DimensionMismatchError(
|
67 |
+
f"cannot infer the size of two dimensions at once: {rhs} with sizes {rhs_s}"
|
68 |
+
)
|
69 |
+
else:
|
70 |
+
rhs_size = prod(r.size for r in rhs)
|
71 |
+
if lhs_size != rhs_size:
|
72 |
+
raise DimensionMismatchError(
|
73 |
+
f"Dimension sizes to do not match ({lhs_size} != {rhs_size}) when matching {lhs_debug} to {rhs}"
|
74 |
+
)
|
75 |
+
|
76 |
+
|
77 |
+
def _tensor_levels(inp):
|
78 |
+
from . import _Tensor
|
79 |
+
|
80 |
+
if isinstance(inp, _Tensor):
|
81 |
+
return inp._tensor, llist(inp._levels), inp._has_device
|
82 |
+
else:
|
83 |
+
return inp, llist(range(-inp.ndim, 0)), True
|
84 |
+
|
85 |
+
|
86 |
+
def _match_levels(v, from_levels, to_levels):
|
87 |
+
view = []
|
88 |
+
permute = []
|
89 |
+
requires_view = False
|
90 |
+
size = v.size()
|
91 |
+
for t in to_levels:
|
92 |
+
try:
|
93 |
+
idx = from_levels.index(t)
|
94 |
+
permute.append(idx)
|
95 |
+
view.append(size[idx])
|
96 |
+
except ValueError:
|
97 |
+
view.append(1)
|
98 |
+
requires_view = True
|
99 |
+
if permute != list(range(len(permute))):
|
100 |
+
v = v.permute(*permute)
|
101 |
+
if requires_view:
|
102 |
+
v = v.view(*view)
|
103 |
+
return v
|
104 |
+
|
105 |
+
|
106 |
+
# make a single dimension positional but do not permute it,
|
107 |
+
# used to do multi-tensor operators where the dim being acted on
|
108 |
+
# should not physically move if possible
|
109 |
+
def _positional_no_permute(self, dim, expand_dim=False):
|
110 |
+
from . import Tensor
|
111 |
+
|
112 |
+
ptensor, levels = self._tensor, llist(self._levels)
|
113 |
+
try:
|
114 |
+
idx = levels.index(dim)
|
115 |
+
except ValueError:
|
116 |
+
if not expand_dim:
|
117 |
+
raise
|
118 |
+
idx = 0
|
119 |
+
ptensor = ptensor.expand(dim.size, *ptensor.size())
|
120 |
+
levels.insert(0, 0)
|
121 |
+
idx_batched = 0
|
122 |
+
for i in range(idx):
|
123 |
+
if isinstance(levels[i], int):
|
124 |
+
levels[i] -= 1
|
125 |
+
idx_batched += 1
|
126 |
+
levels[idx] = -idx_batched - 1
|
127 |
+
return Tensor.from_positional(ptensor, levels, self._has_device), idx_batched
|
128 |
+
|
129 |
+
|
130 |
+
def seq(a, b):
|
131 |
+
from . import Dim
|
132 |
+
|
133 |
+
if isinstance(a, Dim) != isinstance(b, Dim):
|
134 |
+
return False
|
135 |
+
if isinstance(a, Dim):
|
136 |
+
return a is b
|
137 |
+
else:
|
138 |
+
return a == b
|
139 |
+
|
140 |
+
|
141 |
+
class isin:
|
142 |
+
def __contains__(self, item):
|
143 |
+
for x in self:
|
144 |
+
if seq(item, x):
|
145 |
+
return True
|
146 |
+
return False
|
147 |
+
|
148 |
+
def index(self, item):
|
149 |
+
for i, x in enumerate(self):
|
150 |
+
if seq(item, x):
|
151 |
+
return i
|
152 |
+
raise ValueError
|
153 |
+
|
154 |
+
|
155 |
+
class llist(isin, list):
|
156 |
+
pass
|
157 |
+
|
158 |
+
|
159 |
+
class ltuple(isin, tuple):
|
160 |
+
pass
|
161 |
+
|
162 |
+
|
163 |
+
empty_dict = {}
|
164 |
+
|
165 |
+
|
166 |
+
@classmethod
|
167 |
+
def __torch_function__(self, orig, cls, args, kwargs=empty_dict):
|
168 |
+
from . import _Tensor, Tensor, TensorLike
|
169 |
+
from .delayed_mul_tensor import DelayedMulTensor
|
170 |
+
|
171 |
+
if orig is torch.Tensor.__mul__:
|
172 |
+
lhs, rhs = args
|
173 |
+
if (
|
174 |
+
isinstance(lhs, _Tensor)
|
175 |
+
and isinstance(rhs, _Tensor)
|
176 |
+
and lhs.ndim == 0
|
177 |
+
and rhs.ndim == 0
|
178 |
+
):
|
179 |
+
return DelayedMulTensor(lhs, rhs)
|
180 |
+
all_dims = llist()
|
181 |
+
flat_args, unflatten = tree_flatten((args, kwargs))
|
182 |
+
device_holding_tensor = None
|
183 |
+
for f in flat_args:
|
184 |
+
if isinstance(f, _Tensor):
|
185 |
+
if f._has_device:
|
186 |
+
device_holding_tensor = f._batchtensor
|
187 |
+
for d in f.dims:
|
188 |
+
if d not in all_dims:
|
189 |
+
all_dims.append(d)
|
190 |
+
|
191 |
+
def unwrap(t):
|
192 |
+
if isinstance(t, _Tensor):
|
193 |
+
r = t._batchtensor
|
194 |
+
if device_holding_tensor is not None and not t._has_device:
|
195 |
+
r = r.to(device=device_holding_tensor.device)
|
196 |
+
return r
|
197 |
+
return t
|
198 |
+
|
199 |
+
if orig in pointwise:
|
200 |
+
result_levels = llist()
|
201 |
+
arg_levels = llist()
|
202 |
+
to_expand = []
|
203 |
+
for i, f in enumerate(flat_args):
|
204 |
+
if isinstance(f, TensorLike):
|
205 |
+
ptensor, levels, _ = _tensor_levels(f)
|
206 |
+
if (
|
207 |
+
isinstance(f, _Tensor)
|
208 |
+
and not f._has_device
|
209 |
+
and device_holding_tensor is not None
|
210 |
+
):
|
211 |
+
ptensor = ptensor.to(device=device_holding_tensor.device)
|
212 |
+
flat_args[i] = ptensor
|
213 |
+
for l in levels:
|
214 |
+
if l not in result_levels:
|
215 |
+
result_levels.append(l)
|
216 |
+
to_expand.append((i, levels))
|
217 |
+
|
218 |
+
for i, levels in to_expand:
|
219 |
+
flat_args[i] = _match_levels(flat_args[i], levels, result_levels)
|
220 |
+
args, kwargs = unflatten(flat_args)
|
221 |
+
result = orig(*args, **kwargs)
|
222 |
+
|
223 |
+
def wrap(t):
|
224 |
+
if isinstance(t, TensorLike):
|
225 |
+
return Tensor.from_positional(
|
226 |
+
t, result_levels, device_holding_tensor is not None
|
227 |
+
)
|
228 |
+
return t
|
229 |
+
|
230 |
+
return tree_map(wrap, result)
|
231 |
+
else:
|
232 |
+
|
233 |
+
def wrap(t):
|
234 |
+
if isinstance(t, TensorLike):
|
235 |
+
return Tensor.from_batched(t, device_holding_tensor is not None)
|
236 |
+
return t
|
237 |
+
|
238 |
+
with _enable_layers(all_dims):
|
239 |
+
print(f"batch_tensor for {orig}")
|
240 |
+
args, kwargs = unflatten(unwrap(f) for f in flat_args)
|
241 |
+
result = orig(*args, **kwargs)
|
242 |
+
# print("END", orig)
|
243 |
+
return tree_map(wrap, result)
|
244 |
+
|
245 |
+
|
246 |
+
def positional(self, *dims):
|
247 |
+
from . import Dim, DimensionBindError, Tensor
|
248 |
+
|
249 |
+
ptensor, levels = self._tensor, llist(self._levels)
|
250 |
+
flat_dims = llist()
|
251 |
+
view = []
|
252 |
+
needs_view = False
|
253 |
+
ndim = self.ndim
|
254 |
+
for d in dims:
|
255 |
+
if isinstance(d, DimList):
|
256 |
+
flat_dims.extend(d)
|
257 |
+
view.extend(e.size for e in d)
|
258 |
+
elif isinstance(d, Dim):
|
259 |
+
flat_dims.append(d)
|
260 |
+
view.append(d.size)
|
261 |
+
elif isinstance(d, int):
|
262 |
+
d = _wrap_dim(d, ndim, False)
|
263 |
+
flat_dims.append(d)
|
264 |
+
view.append(ptensor.size(d))
|
265 |
+
else:
|
266 |
+
flat_dims.extend(d)
|
267 |
+
view.append(prod(e.size for e in d))
|
268 |
+
needs_view = True
|
269 |
+
|
270 |
+
permute = list(range(len(levels)))
|
271 |
+
nflat = len(flat_dims)
|
272 |
+
for i, d in enumerate(flat_dims):
|
273 |
+
try:
|
274 |
+
idx = levels.index(d)
|
275 |
+
except ValueError as e:
|
276 |
+
raise DimensionBindError(
|
277 |
+
f"tensor of dimensions {self.dims} does not contain dim {d}"
|
278 |
+
) from e
|
279 |
+
p = permute[idx]
|
280 |
+
del levels[idx]
|
281 |
+
del permute[idx]
|
282 |
+
levels.insert(i, 0)
|
283 |
+
permute.insert(i, p)
|
284 |
+
ptensor = ptensor.permute(*permute)
|
285 |
+
seen = 0
|
286 |
+
for i in range(len(levels) - 1, -1, -1):
|
287 |
+
if isinstance(levels[i], int):
|
288 |
+
seen += 1
|
289 |
+
levels[i] = -seen
|
290 |
+
result = Tensor.from_positional(ptensor, levels, self._has_device)
|
291 |
+
if needs_view:
|
292 |
+
result = result.reshape(*view, *result.size()[len(flat_dims) :])
|
293 |
+
return result
|
294 |
+
|
295 |
+
|
296 |
+
def _contains_dim(input):
|
297 |
+
from . import Dim
|
298 |
+
|
299 |
+
for i in input:
|
300 |
+
if isinstance(i, Dim):
|
301 |
+
return True
|
302 |
+
|
303 |
+
|
304 |
+
def expand(self, *sizes):
|
305 |
+
if not _contains_dim(sizes):
|
306 |
+
return self.__torch_function__(torch.Tensor.expand, None, (self, *sizes))
|
307 |
+
dims = sizes
|
308 |
+
sizes = [d.size for d in dims] + [-1] * self.ndim
|
309 |
+
self = self.expand(*sizes)
|
310 |
+
return self[dims]
|
311 |
+
|
312 |
+
|
313 |
+
_not_present = object()
|
314 |
+
|
315 |
+
|
316 |
+
def _getarg(name, offset, args, kwargs, default):
|
317 |
+
if len(args) > offset:
|
318 |
+
return args[offset]
|
319 |
+
return kwargs.get(name, default)
|
320 |
+
|
321 |
+
|
322 |
+
def _patcharg(name, offset, args, kwargs, value):
|
323 |
+
if len(args) > offset:
|
324 |
+
args[offset] = value
|
325 |
+
else:
|
326 |
+
kwargs[name] = value
|
327 |
+
|
328 |
+
|
329 |
+
def _wrap(
|
330 |
+
orig, dim_offset=0, keepdim_offset=1, dim_name="dim", single_dim=False, reduce=True
|
331 |
+
):
|
332 |
+
from . import Dim, Tensor, TensorLike
|
333 |
+
|
334 |
+
def fn(self, *args, **kwargs):
|
335 |
+
dim = _getarg(dim_name, dim_offset, args, kwargs, _not_present)
|
336 |
+
if dim is _not_present or (single_dim and not isinstance(dim, Dim)):
|
337 |
+
with _enable_layers(self.dims):
|
338 |
+
print(f"dim fallback batch_tensor for {orig}")
|
339 |
+
return Tensor.from_batched(
|
340 |
+
orig(self._batchtensor, *args, **kwargs), self._has_device
|
341 |
+
)
|
342 |
+
keepdim = (
|
343 |
+
_getarg("keepdim", keepdim_offset, args, kwargs, False) if reduce else False
|
344 |
+
)
|
345 |
+
t, levels = self._tensor, llist(self._levels)
|
346 |
+
dims = _dims(dim, self._batchtensor.ndim, keepdim, single_dim)
|
347 |
+
dim_indices = tuple(levels.index(d) for d in dims)
|
348 |
+
if reduce and not keepdim:
|
349 |
+
new_levels = [l for i, l in enumerate(levels) if i not in dim_indices]
|
350 |
+
else:
|
351 |
+
new_levels = levels
|
352 |
+
|
353 |
+
if len(dim_indices) == 1:
|
354 |
+
dim_indices = dim_indices[
|
355 |
+
0
|
356 |
+
] # so that dims that really only take a single argument work...
|
357 |
+
args = list(args)
|
358 |
+
_patcharg(dim_name, dim_offset, args, kwargs, dim_indices)
|
359 |
+
|
360 |
+
def wrap(t):
|
361 |
+
if isinstance(t, TensorLike):
|
362 |
+
return Tensor.from_positional(t, new_levels, self._has_device)
|
363 |
+
return t
|
364 |
+
|
365 |
+
with _enable_layers(new_levels):
|
366 |
+
print(f"dim used batch_tensor for {orig}")
|
367 |
+
r = orig(t, *args, **kwargs)
|
368 |
+
return tree_map(wrap, r)
|
369 |
+
|
370 |
+
return fn
|
371 |
+
|
372 |
+
|
373 |
+
def _def(name, *args, **kwargs):
|
374 |
+
from . import _Tensor
|
375 |
+
|
376 |
+
orig = getattr(torch.Tensor, name)
|
377 |
+
setattr(_Tensor, name, _wrap(orig, *args, **kwargs))
|
378 |
+
|
379 |
+
|
380 |
+
no_slice = slice(None)
|
381 |
+
|
382 |
+
_orig_getitem = torch.Tensor.__getitem__
|
383 |
+
|
384 |
+
|
385 |
+
class dim_tracker:
|
386 |
+
def __init__(self) -> None:
|
387 |
+
self.dims = llist()
|
388 |
+
self.count = []
|
389 |
+
|
390 |
+
def record(self, d):
|
391 |
+
if d not in self.dims:
|
392 |
+
self.dims.append(d)
|
393 |
+
self.count.append(1)
|
394 |
+
|
395 |
+
def __getitem__(self, d):
|
396 |
+
return self.count[self.dims.index(d)]
|
397 |
+
|
398 |
+
|
399 |
+
def t__getitem__(self, input):
|
400 |
+
from . import _Tensor, Dim, DimensionBindError, DimList, Tensor, TensorLike
|
401 |
+
|
402 |
+
# * bail to original example if we have a single non-Dim tensor, or a non-tensor
|
403 |
+
# * locate ... or an unbound tensor list, and determine its size, bind dim list
|
404 |
+
# (remember that None does not count to the total dim count)
|
405 |
+
# * bind simple dims and dim-packs to their sizes, count the number of uses of each dim,
|
406 |
+
# produce the re-view if needed
|
407 |
+
# * for each single-use dim index, replace with no_slice and mark that it will be added
|
408 |
+
# (keep track of whether we have to call super)
|
409 |
+
# * call super if needed
|
410 |
+
# * if we have dims to bind, bind them (it will help if we eliminated ... and None before)
|
411 |
+
# this handles bool indexing handling, as well as some other simple cases.
|
412 |
+
|
413 |
+
is_simple = (
|
414 |
+
not isinstance(input, Dim)
|
415 |
+
and not isinstance(input, (tuple, list))
|
416 |
+
and
|
417 |
+
# WAR for functorch bug where zero time tensors in getitem are not handled correctly.
|
418 |
+
not (isinstance(input, TensorLike) and input.ndim == 0)
|
419 |
+
)
|
420 |
+
|
421 |
+
if is_simple:
|
422 |
+
if isinstance(self, _Tensor):
|
423 |
+
return _Tensor.__torch_function__(_orig_getitem, None, (self, input))
|
424 |
+
else:
|
425 |
+
return _orig_getitem(self, input)
|
426 |
+
|
427 |
+
# can further optimize this case
|
428 |
+
if not isinstance(input, tuple):
|
429 |
+
input = [input]
|
430 |
+
else:
|
431 |
+
input = list(input)
|
432 |
+
|
433 |
+
dims_indexed = 0
|
434 |
+
expanding_object = None
|
435 |
+
dimlists = []
|
436 |
+
for i, s in enumerate(input):
|
437 |
+
if s is ... or isinstance(s, DimList) and not s.is_bound:
|
438 |
+
if expanding_object is not None:
|
439 |
+
msg = (
|
440 |
+
"at most one ... or unbound dimension list can exist in indexing list but"
|
441 |
+
f" found 2 at offsets {i} and {expanding_object}"
|
442 |
+
)
|
443 |
+
raise DimensionBindError(msg)
|
444 |
+
expanding_object = i
|
445 |
+
|
446 |
+
if isinstance(s, DimList):
|
447 |
+
dims_indexed += len(s) if s.is_bound else 0
|
448 |
+
dimlists.append(i)
|
449 |
+
elif s is not None and s is not ...:
|
450 |
+
dims_indexed += 1
|
451 |
+
|
452 |
+
ndim = self.ndim
|
453 |
+
if dims_indexed > ndim:
|
454 |
+
raise IndexError(
|
455 |
+
f"at least {dims_indexed} indices were supplied but the tensor only has {ndim} dimensions."
|
456 |
+
)
|
457 |
+
if expanding_object is not None:
|
458 |
+
expanding_ndims = ndim - dims_indexed
|
459 |
+
obj = input[expanding_object]
|
460 |
+
if obj is ...:
|
461 |
+
input[expanding_object : expanding_object + 1] = [
|
462 |
+
no_slice
|
463 |
+
] * expanding_ndims
|
464 |
+
else:
|
465 |
+
obj.bind_len(expanding_ndims)
|
466 |
+
# flatten the dimslists into the indexing
|
467 |
+
for i in reversed(dimlists):
|
468 |
+
input[i : i + 1] = input[i]
|
469 |
+
dims_indexed = 0
|
470 |
+
requires_view = False
|
471 |
+
size = self.size()
|
472 |
+
view_sizes = []
|
473 |
+
dims_seen = dim_tracker()
|
474 |
+
|
475 |
+
def add_dims(t):
|
476 |
+
if not isinstance(t, _Tensor):
|
477 |
+
return
|
478 |
+
for d in t.dims:
|
479 |
+
dims_seen.record(d)
|
480 |
+
|
481 |
+
add_dims(self)
|
482 |
+
dim_packs = []
|
483 |
+
for i, idx in enumerate(input):
|
484 |
+
if idx is None:
|
485 |
+
input[i] = no_slice
|
486 |
+
view_sizes.append(1)
|
487 |
+
requires_view = True
|
488 |
+
else:
|
489 |
+
sz = size[dims_indexed]
|
490 |
+
if isinstance(idx, Dim):
|
491 |
+
idx.size = sz
|
492 |
+
dims_seen.record(idx)
|
493 |
+
view_sizes.append(sz)
|
494 |
+
elif isinstance(idx, (tuple, list)) and idx and isinstance(idx[0], Dim):
|
495 |
+
for d in idx:
|
496 |
+
dims_seen.record(idx)
|
497 |
+
_bind_dims_to_size(sz, idx, f"offset {i}")
|
498 |
+
view_sizes.extend(d.size for d in idx)
|
499 |
+
requires_view = True
|
500 |
+
dim_packs.append(i)
|
501 |
+
else:
|
502 |
+
add_dims(idx)
|
503 |
+
view_sizes.append(sz)
|
504 |
+
dims_indexed += 1
|
505 |
+
if requires_view:
|
506 |
+
self = self.view(*view_sizes)
|
507 |
+
for i in reversed(dim_packs):
|
508 |
+
input[i : i + 1] = input[i]
|
509 |
+
|
510 |
+
# currenty:
|
511 |
+
# input is flat, containing either Dim, or Tensor, or something valid for standard indexing
|
512 |
+
# self may have first-class dims as well.
|
513 |
+
|
514 |
+
# to index:
|
515 |
+
# drop the first class dims from self, they just become direct indices of their positions
|
516 |
+
|
517 |
+
# figure out the dimensions of the indexing tensors: union of all the dims in the tensors in the index.
|
518 |
+
# these dimensions will appear and need to be bound at the first place tensor occures
|
519 |
+
|
520 |
+
if isinstance(self, _Tensor):
|
521 |
+
ptensor_self, levels = self._tensor, list(self._levels)
|
522 |
+
# indices to ptensor rather than self which has first-class dimensions
|
523 |
+
input_it = iter(input)
|
524 |
+
flat_inputs = [next(input_it) if isinstance(l, int) else l for l in levels]
|
525 |
+
has_device = self._has_device
|
526 |
+
to_pad = 0
|
527 |
+
else:
|
528 |
+
ptensor_self, flat_inputs = self, input
|
529 |
+
to_pad = ptensor_self.ndim - len(flat_inputs)
|
530 |
+
has_device = True
|
531 |
+
|
532 |
+
result_levels = []
|
533 |
+
index_levels = []
|
534 |
+
tensor_insert_point = None
|
535 |
+
to_expand = {}
|
536 |
+
requires_getindex = False
|
537 |
+
for i, inp in enumerate(flat_inputs):
|
538 |
+
if isinstance(inp, Dim) and dims_seen[inp] == 1:
|
539 |
+
flat_inputs[i] = no_slice
|
540 |
+
result_levels.append(inp)
|
541 |
+
elif isinstance(inp, TensorLike):
|
542 |
+
requires_getindex = True
|
543 |
+
if tensor_insert_point is None:
|
544 |
+
tensor_insert_point = len(result_levels)
|
545 |
+
ptensor, levels, _ = _tensor_levels(inp)
|
546 |
+
to_expand[i] = levels
|
547 |
+
flat_inputs[i] = ptensor
|
548 |
+
for l in levels:
|
549 |
+
if l not in index_levels:
|
550 |
+
index_levels.append(l)
|
551 |
+
else:
|
552 |
+
requires_getindex = True
|
553 |
+
result_levels.append(0)
|
554 |
+
|
555 |
+
if tensor_insert_point is not None:
|
556 |
+
result_levels[tensor_insert_point:tensor_insert_point] = index_levels
|
557 |
+
|
558 |
+
for i, levels in to_expand.items():
|
559 |
+
flat_inputs[i] = _match_levels(flat_inputs[i], levels, index_levels)
|
560 |
+
|
561 |
+
if requires_getindex:
|
562 |
+
result = _orig_getitem(ptensor_self, flat_inputs)
|
563 |
+
else:
|
564 |
+
result = ptensor_self
|
565 |
+
|
566 |
+
next_positional = -1
|
567 |
+
if to_pad > 0:
|
568 |
+
result_levels.extend([0] * to_pad)
|
569 |
+
for i, r in enumerate(reversed(result_levels)):
|
570 |
+
if isinstance(r, int):
|
571 |
+
result_levels[-1 - i] = next_positional
|
572 |
+
next_positional -= 1
|
573 |
+
|
574 |
+
return Tensor.from_positional(result, result_levels, has_device)
|
575 |
+
|
576 |
+
|
577 |
+
# XXX - dim is optional and can be the outer-most dimension...
|
578 |
+
def stack(tensors, new_dim, dim=0, out=None):
|
579 |
+
if isinstance(dim, int):
|
580 |
+
return torch.stack(tensors, dim, out).index(dim, new_dim)
|
581 |
+
index = None
|
582 |
+
if out is not None:
|
583 |
+
out, index = _positional_no_permute(out, dim, expand_dim=True)
|
584 |
+
ptensors = []
|
585 |
+
for t in tensors:
|
586 |
+
pt, pi = _positional_no_permute(t, dim, expand_dim=True)
|
587 |
+
if index is not None and pi != index:
|
588 |
+
pt = pt.move_dim(pi, index)
|
589 |
+
else:
|
590 |
+
index = pi
|
591 |
+
ptensors.append(pt)
|
592 |
+
pr = torch.stack(ptensors, index, out=out)
|
593 |
+
return pr.index((index, index + 1), (new_dim, dim))
|
594 |
+
|
595 |
+
|
596 |
+
_orig_split = torch.Tensor.split
|
597 |
+
|
598 |
+
|
599 |
+
def split(self, split_size_or_sections, dim=0):
|
600 |
+
from . import _Tensor, Dim
|
601 |
+
|
602 |
+
if isinstance(split_size_or_sections, int) or any(
|
603 |
+
isinstance(t, int) for t in split_size_or_sections
|
604 |
+
):
|
605 |
+
if isinstance(dim, Dim):
|
606 |
+
raise ValueError(
|
607 |
+
"when dim is specified as a Dim object, split sizes must also be dimensions."
|
608 |
+
)
|
609 |
+
return _orig_split(self, split_size_or_sections, dim=dim)
|
610 |
+
|
611 |
+
if isinstance(dim, Dim):
|
612 |
+
assert isinstance(self, _Tensor), f"Tensor does not have dimension {dim}"
|
613 |
+
self, dim = _positional_no_permute(self, dim)
|
614 |
+
|
615 |
+
size = self.size(dim)
|
616 |
+
total_bound_size = 0
|
617 |
+
unbound = []
|
618 |
+
sizes = []
|
619 |
+
for i, d in enumerate(split_size_or_sections):
|
620 |
+
if d.is_bound:
|
621 |
+
sizes.append(d.size)
|
622 |
+
total_bound_size += d.size
|
623 |
+
else:
|
624 |
+
sizes.append(0)
|
625 |
+
unbound.append(i)
|
626 |
+
|
627 |
+
if unbound:
|
628 |
+
assert (
|
629 |
+
total_bound_size <= size
|
630 |
+
), f"result dimensions are larger than original: {total_bound_size} vs {size} ({split_size_or_sections})"
|
631 |
+
remaining_size = size - total_bound_size
|
632 |
+
chunk_size = -(-remaining_size // len(unbound))
|
633 |
+
for u in unbound:
|
634 |
+
sz = min(chunk_size, remaining_size)
|
635 |
+
split_size_or_sections[u].size = sz
|
636 |
+
sizes[u] = sz
|
637 |
+
remaining_size -= sz
|
638 |
+
else:
|
639 |
+
assert (
|
640 |
+
total_bound_size == size
|
641 |
+
), f"result dimensions do not match original: {total_bound_size} vs {size} ({split_size_or_sections})"
|
642 |
+
return tuple(
|
643 |
+
t.index(dim, d)
|
644 |
+
for d, t in zip(split_size_or_sections, _orig_split(self, sizes, dim=dim))
|
645 |
+
)
|
.venv/Lib/site-packages/functorch/dim/tree_map.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the BSD-style license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
|
7 |
+
from functorch._C import dim
|
8 |
+
|
9 |
+
|
10 |
+
tree_flatten = dim.tree_flatten
|
11 |
+
|
12 |
+
|
13 |
+
def tree_map(fn, tree):
|
14 |
+
vs, unflatten = tree_flatten(tree)
|
15 |
+
return unflatten(fn(v) for v in vs)
|
.venv/Lib/site-packages/functorch/dim/wrap_type.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Facebook, Inc. and its affiliates.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the BSD-style license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
|
7 |
+
from types import (
|
8 |
+
BuiltinMethodType,
|
9 |
+
FunctionType,
|
10 |
+
GetSetDescriptorType,
|
11 |
+
MethodDescriptorType,
|
12 |
+
WrapperDescriptorType,
|
13 |
+
)
|
14 |
+
|
15 |
+
from functorch._C import dim as _C
|
16 |
+
|
17 |
+
|
18 |
+
_wrap_method = _C._wrap_method
|
19 |
+
|
20 |
+
FUNC_TYPES = (
|
21 |
+
FunctionType,
|
22 |
+
MethodDescriptorType,
|
23 |
+
BuiltinMethodType,
|
24 |
+
WrapperDescriptorType,
|
25 |
+
)
|
26 |
+
PROPERTY_TYPES = (GetSetDescriptorType, property)
|
27 |
+
|
28 |
+
|
29 |
+
def _py_wrap_method(orig, __torch_function__):
|
30 |
+
def impl(*args, **kwargs):
|
31 |
+
return __torch_function__(orig, None, args, kwargs)
|
32 |
+
|
33 |
+
return impl
|
34 |
+
|
35 |
+
|
36 |
+
def wrap_type(use_c, to_patch, pattern, __torch_function__):
|
37 |
+
if use_c:
|
38 |
+
wrap_method = _wrap_method
|
39 |
+
else:
|
40 |
+
wrap_method = _py_wrap_method
|
41 |
+
|
42 |
+
all = {}
|
43 |
+
for t in reversed(pattern.mro()[:-1]): # skip object
|
44 |
+
all.update(t.__dict__)
|
45 |
+
|
46 |
+
def wrap_attr(orig):
|
47 |
+
return property(wrap_method(orig.__get__, __torch_function__))
|
48 |
+
|
49 |
+
for name, obj in all.items():
|
50 |
+
if name in (
|
51 |
+
"__dict__",
|
52 |
+
"__new__",
|
53 |
+
"__init__",
|
54 |
+
"__repr__",
|
55 |
+
"__weakref__",
|
56 |
+
"__doc__",
|
57 |
+
"__module__",
|
58 |
+
"__dir__",
|
59 |
+
):
|
60 |
+
continue
|
61 |
+
|
62 |
+
# skip things that have been overloaded
|
63 |
+
# things that come from object like `__eq__` still need to be patched, however.
|
64 |
+
if hasattr(to_patch, name) and getattr(to_patch, name) is not getattr(
|
65 |
+
object, name, None
|
66 |
+
):
|
67 |
+
continue
|
68 |
+
|
69 |
+
if isinstance(obj, FUNC_TYPES):
|
70 |
+
setattr(to_patch, name, wrap_method(obj, __torch_function__))
|
71 |
+
elif isinstance(obj, PROPERTY_TYPES):
|
72 |
+
setattr(to_patch, name, wrap_attr(obj))
|
.venv/Lib/site-packages/huggingface_hub/__init__.py
ADDED
@@ -0,0 +1,1002 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
# ***********
|
16 |
+
# `huggingface_hub` init has 2 modes:
|
17 |
+
# - Normal usage:
|
18 |
+
# If imported to use it, all modules and functions are lazy-loaded. This means
|
19 |
+
# they exist at top level in module but are imported only the first time they are
|
20 |
+
# used. This way, `from huggingface_hub import something` will import `something`
|
21 |
+
# quickly without the hassle of importing all the features from `huggingface_hub`.
|
22 |
+
# - Static check:
|
23 |
+
# If statically analyzed, all modules and functions are loaded normally. This way
|
24 |
+
# static typing check works properly as well as autocomplete in text editors and
|
25 |
+
# IDEs.
|
26 |
+
#
|
27 |
+
# The static model imports are done inside the `if TYPE_CHECKING:` statement at
|
28 |
+
# the bottom of this file. Since module/functions imports are duplicated, it is
|
29 |
+
# mandatory to make sure to add them twice when adding one. This is checked in the
|
30 |
+
# `make quality` command.
|
31 |
+
#
|
32 |
+
# To update the static imports, please run the following command and commit the changes.
|
33 |
+
# ```
|
34 |
+
# # Use script
|
35 |
+
# python utils/check_static_imports.py --update-file
|
36 |
+
#
|
37 |
+
# # Or run style on codebase
|
38 |
+
# make style
|
39 |
+
# ```
|
40 |
+
#
|
41 |
+
# ***********
|
42 |
+
# Lazy loader vendored from https://github.com/scientific-python/lazy_loader
|
43 |
+
import importlib
|
44 |
+
import os
|
45 |
+
import sys
|
46 |
+
from typing import TYPE_CHECKING
|
47 |
+
|
48 |
+
|
49 |
+
__version__ = "0.26.5"
|
50 |
+
|
51 |
+
# Alphabetical order of definitions is ensured in tests
|
52 |
+
# WARNING: any comment added in this dictionary definition will be lost when
|
53 |
+
# re-generating the file !
|
54 |
+
_SUBMOD_ATTRS = {
|
55 |
+
"_commit_scheduler": [
|
56 |
+
"CommitScheduler",
|
57 |
+
],
|
58 |
+
"_inference_endpoints": [
|
59 |
+
"InferenceEndpoint",
|
60 |
+
"InferenceEndpointError",
|
61 |
+
"InferenceEndpointStatus",
|
62 |
+
"InferenceEndpointTimeoutError",
|
63 |
+
"InferenceEndpointType",
|
64 |
+
],
|
65 |
+
"_login": [
|
66 |
+
"auth_list",
|
67 |
+
"auth_switch",
|
68 |
+
"interpreter_login",
|
69 |
+
"login",
|
70 |
+
"logout",
|
71 |
+
"notebook_login",
|
72 |
+
],
|
73 |
+
"_multi_commits": [
|
74 |
+
"MultiCommitException",
|
75 |
+
"plan_multi_commits",
|
76 |
+
],
|
77 |
+
"_snapshot_download": [
|
78 |
+
"snapshot_download",
|
79 |
+
],
|
80 |
+
"_space_api": [
|
81 |
+
"SpaceHardware",
|
82 |
+
"SpaceRuntime",
|
83 |
+
"SpaceStage",
|
84 |
+
"SpaceStorage",
|
85 |
+
"SpaceVariable",
|
86 |
+
],
|
87 |
+
"_tensorboard_logger": [
|
88 |
+
"HFSummaryWriter",
|
89 |
+
],
|
90 |
+
"_webhooks_payload": [
|
91 |
+
"WebhookPayload",
|
92 |
+
"WebhookPayloadComment",
|
93 |
+
"WebhookPayloadDiscussion",
|
94 |
+
"WebhookPayloadDiscussionChanges",
|
95 |
+
"WebhookPayloadEvent",
|
96 |
+
"WebhookPayloadMovedTo",
|
97 |
+
"WebhookPayloadRepo",
|
98 |
+
"WebhookPayloadUrl",
|
99 |
+
"WebhookPayloadWebhook",
|
100 |
+
],
|
101 |
+
"_webhooks_server": [
|
102 |
+
"WebhooksServer",
|
103 |
+
"webhook_endpoint",
|
104 |
+
],
|
105 |
+
"community": [
|
106 |
+
"Discussion",
|
107 |
+
"DiscussionComment",
|
108 |
+
"DiscussionCommit",
|
109 |
+
"DiscussionEvent",
|
110 |
+
"DiscussionStatusChange",
|
111 |
+
"DiscussionTitleChange",
|
112 |
+
"DiscussionWithDetails",
|
113 |
+
],
|
114 |
+
"constants": [
|
115 |
+
"CONFIG_NAME",
|
116 |
+
"FLAX_WEIGHTS_NAME",
|
117 |
+
"HUGGINGFACE_CO_URL_HOME",
|
118 |
+
"HUGGINGFACE_CO_URL_TEMPLATE",
|
119 |
+
"PYTORCH_WEIGHTS_NAME",
|
120 |
+
"REPO_TYPE_DATASET",
|
121 |
+
"REPO_TYPE_MODEL",
|
122 |
+
"REPO_TYPE_SPACE",
|
123 |
+
"TF2_WEIGHTS_NAME",
|
124 |
+
"TF_WEIGHTS_NAME",
|
125 |
+
],
|
126 |
+
"fastai_utils": [
|
127 |
+
"_save_pretrained_fastai",
|
128 |
+
"from_pretrained_fastai",
|
129 |
+
"push_to_hub_fastai",
|
130 |
+
],
|
131 |
+
"file_download": [
|
132 |
+
"HfFileMetadata",
|
133 |
+
"_CACHED_NO_EXIST",
|
134 |
+
"get_hf_file_metadata",
|
135 |
+
"hf_hub_download",
|
136 |
+
"hf_hub_url",
|
137 |
+
"try_to_load_from_cache",
|
138 |
+
],
|
139 |
+
"hf_api": [
|
140 |
+
"Collection",
|
141 |
+
"CollectionItem",
|
142 |
+
"CommitInfo",
|
143 |
+
"CommitOperation",
|
144 |
+
"CommitOperationAdd",
|
145 |
+
"CommitOperationCopy",
|
146 |
+
"CommitOperationDelete",
|
147 |
+
"DatasetInfo",
|
148 |
+
"GitCommitInfo",
|
149 |
+
"GitRefInfo",
|
150 |
+
"GitRefs",
|
151 |
+
"HfApi",
|
152 |
+
"ModelInfo",
|
153 |
+
"RepoUrl",
|
154 |
+
"SpaceInfo",
|
155 |
+
"User",
|
156 |
+
"UserLikes",
|
157 |
+
"WebhookInfo",
|
158 |
+
"WebhookWatchedItem",
|
159 |
+
"accept_access_request",
|
160 |
+
"add_collection_item",
|
161 |
+
"add_space_secret",
|
162 |
+
"add_space_variable",
|
163 |
+
"auth_check",
|
164 |
+
"cancel_access_request",
|
165 |
+
"change_discussion_status",
|
166 |
+
"comment_discussion",
|
167 |
+
"create_branch",
|
168 |
+
"create_collection",
|
169 |
+
"create_commit",
|
170 |
+
"create_commits_on_pr",
|
171 |
+
"create_discussion",
|
172 |
+
"create_inference_endpoint",
|
173 |
+
"create_pull_request",
|
174 |
+
"create_repo",
|
175 |
+
"create_tag",
|
176 |
+
"create_webhook",
|
177 |
+
"dataset_info",
|
178 |
+
"delete_branch",
|
179 |
+
"delete_collection",
|
180 |
+
"delete_collection_item",
|
181 |
+
"delete_file",
|
182 |
+
"delete_folder",
|
183 |
+
"delete_inference_endpoint",
|
184 |
+
"delete_repo",
|
185 |
+
"delete_space_secret",
|
186 |
+
"delete_space_storage",
|
187 |
+
"delete_space_variable",
|
188 |
+
"delete_tag",
|
189 |
+
"delete_webhook",
|
190 |
+
"disable_webhook",
|
191 |
+
"duplicate_space",
|
192 |
+
"edit_discussion_comment",
|
193 |
+
"enable_webhook",
|
194 |
+
"file_exists",
|
195 |
+
"get_collection",
|
196 |
+
"get_dataset_tags",
|
197 |
+
"get_discussion_details",
|
198 |
+
"get_full_repo_name",
|
199 |
+
"get_inference_endpoint",
|
200 |
+
"get_model_tags",
|
201 |
+
"get_paths_info",
|
202 |
+
"get_repo_discussions",
|
203 |
+
"get_safetensors_metadata",
|
204 |
+
"get_space_runtime",
|
205 |
+
"get_space_variables",
|
206 |
+
"get_token_permission",
|
207 |
+
"get_user_overview",
|
208 |
+
"get_webhook",
|
209 |
+
"grant_access",
|
210 |
+
"like",
|
211 |
+
"list_accepted_access_requests",
|
212 |
+
"list_collections",
|
213 |
+
"list_datasets",
|
214 |
+
"list_inference_endpoints",
|
215 |
+
"list_liked_repos",
|
216 |
+
"list_metrics",
|
217 |
+
"list_models",
|
218 |
+
"list_organization_members",
|
219 |
+
"list_papers",
|
220 |
+
"list_pending_access_requests",
|
221 |
+
"list_rejected_access_requests",
|
222 |
+
"list_repo_commits",
|
223 |
+
"list_repo_files",
|
224 |
+
"list_repo_likers",
|
225 |
+
"list_repo_refs",
|
226 |
+
"list_repo_tree",
|
227 |
+
"list_spaces",
|
228 |
+
"list_user_followers",
|
229 |
+
"list_user_following",
|
230 |
+
"list_webhooks",
|
231 |
+
"merge_pull_request",
|
232 |
+
"model_info",
|
233 |
+
"move_repo",
|
234 |
+
"paper_info",
|
235 |
+
"parse_safetensors_file_metadata",
|
236 |
+
"pause_inference_endpoint",
|
237 |
+
"pause_space",
|
238 |
+
"preupload_lfs_files",
|
239 |
+
"reject_access_request",
|
240 |
+
"rename_discussion",
|
241 |
+
"repo_exists",
|
242 |
+
"repo_info",
|
243 |
+
"repo_type_and_id_from_hf_id",
|
244 |
+
"request_space_hardware",
|
245 |
+
"request_space_storage",
|
246 |
+
"restart_space",
|
247 |
+
"resume_inference_endpoint",
|
248 |
+
"revision_exists",
|
249 |
+
"run_as_future",
|
250 |
+
"scale_to_zero_inference_endpoint",
|
251 |
+
"set_space_sleep_time",
|
252 |
+
"space_info",
|
253 |
+
"super_squash_history",
|
254 |
+
"unlike",
|
255 |
+
"update_collection_item",
|
256 |
+
"update_collection_metadata",
|
257 |
+
"update_inference_endpoint",
|
258 |
+
"update_repo_settings",
|
259 |
+
"update_repo_visibility",
|
260 |
+
"update_webhook",
|
261 |
+
"upload_file",
|
262 |
+
"upload_folder",
|
263 |
+
"upload_large_folder",
|
264 |
+
"whoami",
|
265 |
+
],
|
266 |
+
"hf_file_system": [
|
267 |
+
"HfFileSystem",
|
268 |
+
"HfFileSystemFile",
|
269 |
+
"HfFileSystemResolvedPath",
|
270 |
+
"HfFileSystemStreamFile",
|
271 |
+
],
|
272 |
+
"hub_mixin": [
|
273 |
+
"ModelHubMixin",
|
274 |
+
"PyTorchModelHubMixin",
|
275 |
+
],
|
276 |
+
"inference._client": [
|
277 |
+
"InferenceClient",
|
278 |
+
"InferenceTimeoutError",
|
279 |
+
],
|
280 |
+
"inference._generated._async_client": [
|
281 |
+
"AsyncInferenceClient",
|
282 |
+
],
|
283 |
+
"inference._generated.types": [
|
284 |
+
"AudioClassificationInput",
|
285 |
+
"AudioClassificationOutputElement",
|
286 |
+
"AudioClassificationOutputTransform",
|
287 |
+
"AudioClassificationParameters",
|
288 |
+
"AudioToAudioInput",
|
289 |
+
"AudioToAudioOutputElement",
|
290 |
+
"AutomaticSpeechRecognitionEarlyStoppingEnum",
|
291 |
+
"AutomaticSpeechRecognitionGenerationParameters",
|
292 |
+
"AutomaticSpeechRecognitionInput",
|
293 |
+
"AutomaticSpeechRecognitionOutput",
|
294 |
+
"AutomaticSpeechRecognitionOutputChunk",
|
295 |
+
"AutomaticSpeechRecognitionParameters",
|
296 |
+
"ChatCompletionInput",
|
297 |
+
"ChatCompletionInputFunctionDefinition",
|
298 |
+
"ChatCompletionInputFunctionName",
|
299 |
+
"ChatCompletionInputGrammarType",
|
300 |
+
"ChatCompletionInputMessage",
|
301 |
+
"ChatCompletionInputMessageChunk",
|
302 |
+
"ChatCompletionInputStreamOptions",
|
303 |
+
"ChatCompletionInputToolType",
|
304 |
+
"ChatCompletionInputURL",
|
305 |
+
"ChatCompletionOutput",
|
306 |
+
"ChatCompletionOutputComplete",
|
307 |
+
"ChatCompletionOutputFunctionDefinition",
|
308 |
+
"ChatCompletionOutputLogprob",
|
309 |
+
"ChatCompletionOutputLogprobs",
|
310 |
+
"ChatCompletionOutputMessage",
|
311 |
+
"ChatCompletionOutputToolCall",
|
312 |
+
"ChatCompletionOutputTopLogprob",
|
313 |
+
"ChatCompletionOutputUsage",
|
314 |
+
"ChatCompletionStreamOutput",
|
315 |
+
"ChatCompletionStreamOutputChoice",
|
316 |
+
"ChatCompletionStreamOutputDelta",
|
317 |
+
"ChatCompletionStreamOutputDeltaToolCall",
|
318 |
+
"ChatCompletionStreamOutputFunction",
|
319 |
+
"ChatCompletionStreamOutputLogprob",
|
320 |
+
"ChatCompletionStreamOutputLogprobs",
|
321 |
+
"ChatCompletionStreamOutputTopLogprob",
|
322 |
+
"ChatCompletionStreamOutputUsage",
|
323 |
+
"DepthEstimationInput",
|
324 |
+
"DepthEstimationOutput",
|
325 |
+
"DocumentQuestionAnsweringInput",
|
326 |
+
"DocumentQuestionAnsweringInputData",
|
327 |
+
"DocumentQuestionAnsweringOutputElement",
|
328 |
+
"DocumentQuestionAnsweringParameters",
|
329 |
+
"FeatureExtractionInput",
|
330 |
+
"FillMaskInput",
|
331 |
+
"FillMaskOutputElement",
|
332 |
+
"FillMaskParameters",
|
333 |
+
"ImageClassificationInput",
|
334 |
+
"ImageClassificationOutputElement",
|
335 |
+
"ImageClassificationOutputTransform",
|
336 |
+
"ImageClassificationParameters",
|
337 |
+
"ImageSegmentationInput",
|
338 |
+
"ImageSegmentationOutputElement",
|
339 |
+
"ImageSegmentationParameters",
|
340 |
+
"ImageToImageInput",
|
341 |
+
"ImageToImageOutput",
|
342 |
+
"ImageToImageParameters",
|
343 |
+
"ImageToImageTargetSize",
|
344 |
+
"ImageToTextEarlyStoppingEnum",
|
345 |
+
"ImageToTextGenerationParameters",
|
346 |
+
"ImageToTextInput",
|
347 |
+
"ImageToTextOutput",
|
348 |
+
"ImageToTextParameters",
|
349 |
+
"ObjectDetectionBoundingBox",
|
350 |
+
"ObjectDetectionInput",
|
351 |
+
"ObjectDetectionOutputElement",
|
352 |
+
"ObjectDetectionParameters",
|
353 |
+
"QuestionAnsweringInput",
|
354 |
+
"QuestionAnsweringInputData",
|
355 |
+
"QuestionAnsweringOutputElement",
|
356 |
+
"QuestionAnsweringParameters",
|
357 |
+
"SentenceSimilarityInput",
|
358 |
+
"SentenceSimilarityInputData",
|
359 |
+
"SummarizationInput",
|
360 |
+
"SummarizationOutput",
|
361 |
+
"SummarizationParameters",
|
362 |
+
"TableQuestionAnsweringInput",
|
363 |
+
"TableQuestionAnsweringInputData",
|
364 |
+
"TableQuestionAnsweringOutputElement",
|
365 |
+
"Text2TextGenerationInput",
|
366 |
+
"Text2TextGenerationOutput",
|
367 |
+
"Text2TextGenerationParameters",
|
368 |
+
"TextClassificationInput",
|
369 |
+
"TextClassificationOutputElement",
|
370 |
+
"TextClassificationOutputTransform",
|
371 |
+
"TextClassificationParameters",
|
372 |
+
"TextGenerationInput",
|
373 |
+
"TextGenerationInputGenerateParameters",
|
374 |
+
"TextGenerationInputGrammarType",
|
375 |
+
"TextGenerationOutput",
|
376 |
+
"TextGenerationOutputBestOfSequence",
|
377 |
+
"TextGenerationOutputDetails",
|
378 |
+
"TextGenerationOutputPrefillToken",
|
379 |
+
"TextGenerationOutputToken",
|
380 |
+
"TextGenerationStreamOutput",
|
381 |
+
"TextGenerationStreamOutputStreamDetails",
|
382 |
+
"TextGenerationStreamOutputToken",
|
383 |
+
"TextToAudioEarlyStoppingEnum",
|
384 |
+
"TextToAudioGenerationParameters",
|
385 |
+
"TextToAudioInput",
|
386 |
+
"TextToAudioOutput",
|
387 |
+
"TextToAudioParameters",
|
388 |
+
"TextToImageInput",
|
389 |
+
"TextToImageOutput",
|
390 |
+
"TextToImageParameters",
|
391 |
+
"TextToImageTargetSize",
|
392 |
+
"TextToSpeechEarlyStoppingEnum",
|
393 |
+
"TextToSpeechGenerationParameters",
|
394 |
+
"TextToSpeechInput",
|
395 |
+
"TextToSpeechOutput",
|
396 |
+
"TextToSpeechParameters",
|
397 |
+
"TokenClassificationInput",
|
398 |
+
"TokenClassificationOutputElement",
|
399 |
+
"TokenClassificationParameters",
|
400 |
+
"ToolElement",
|
401 |
+
"TranslationInput",
|
402 |
+
"TranslationOutput",
|
403 |
+
"TranslationParameters",
|
404 |
+
"VideoClassificationInput",
|
405 |
+
"VideoClassificationOutputElement",
|
406 |
+
"VideoClassificationOutputTransform",
|
407 |
+
"VideoClassificationParameters",
|
408 |
+
"VisualQuestionAnsweringInput",
|
409 |
+
"VisualQuestionAnsweringInputData",
|
410 |
+
"VisualQuestionAnsweringOutputElement",
|
411 |
+
"VisualQuestionAnsweringParameters",
|
412 |
+
"ZeroShotClassificationInput",
|
413 |
+
"ZeroShotClassificationInputData",
|
414 |
+
"ZeroShotClassificationOutputElement",
|
415 |
+
"ZeroShotClassificationParameters",
|
416 |
+
"ZeroShotImageClassificationInput",
|
417 |
+
"ZeroShotImageClassificationInputData",
|
418 |
+
"ZeroShotImageClassificationOutputElement",
|
419 |
+
"ZeroShotImageClassificationParameters",
|
420 |
+
"ZeroShotObjectDetectionBoundingBox",
|
421 |
+
"ZeroShotObjectDetectionInput",
|
422 |
+
"ZeroShotObjectDetectionInputData",
|
423 |
+
"ZeroShotObjectDetectionOutputElement",
|
424 |
+
],
|
425 |
+
"inference_api": [
|
426 |
+
"InferenceApi",
|
427 |
+
],
|
428 |
+
"keras_mixin": [
|
429 |
+
"KerasModelHubMixin",
|
430 |
+
"from_pretrained_keras",
|
431 |
+
"push_to_hub_keras",
|
432 |
+
"save_pretrained_keras",
|
433 |
+
],
|
434 |
+
"repocard": [
|
435 |
+
"DatasetCard",
|
436 |
+
"ModelCard",
|
437 |
+
"RepoCard",
|
438 |
+
"SpaceCard",
|
439 |
+
"metadata_eval_result",
|
440 |
+
"metadata_load",
|
441 |
+
"metadata_save",
|
442 |
+
"metadata_update",
|
443 |
+
],
|
444 |
+
"repocard_data": [
|
445 |
+
"CardData",
|
446 |
+
"DatasetCardData",
|
447 |
+
"EvalResult",
|
448 |
+
"ModelCardData",
|
449 |
+
"SpaceCardData",
|
450 |
+
],
|
451 |
+
"repository": [
|
452 |
+
"Repository",
|
453 |
+
],
|
454 |
+
"serialization": [
|
455 |
+
"StateDictSplit",
|
456 |
+
"get_tf_storage_size",
|
457 |
+
"get_torch_storage_id",
|
458 |
+
"get_torch_storage_size",
|
459 |
+
"save_torch_model",
|
460 |
+
"save_torch_state_dict",
|
461 |
+
"split_state_dict_into_shards_factory",
|
462 |
+
"split_tf_state_dict_into_shards",
|
463 |
+
"split_torch_state_dict_into_shards",
|
464 |
+
],
|
465 |
+
"utils": [
|
466 |
+
"CacheNotFound",
|
467 |
+
"CachedFileInfo",
|
468 |
+
"CachedRepoInfo",
|
469 |
+
"CachedRevisionInfo",
|
470 |
+
"CorruptedCacheException",
|
471 |
+
"DeleteCacheStrategy",
|
472 |
+
"HFCacheInfo",
|
473 |
+
"HfFolder",
|
474 |
+
"cached_assets_path",
|
475 |
+
"configure_http_backend",
|
476 |
+
"dump_environment_info",
|
477 |
+
"get_session",
|
478 |
+
"get_token",
|
479 |
+
"logging",
|
480 |
+
"scan_cache_dir",
|
481 |
+
],
|
482 |
+
}
|
483 |
+
|
484 |
+
|
485 |
+
def _attach(package_name, submodules=None, submod_attrs=None):
|
486 |
+
"""Attach lazily loaded submodules, functions, or other attributes.
|
487 |
+
|
488 |
+
Typically, modules import submodules and attributes as follows:
|
489 |
+
|
490 |
+
```py
|
491 |
+
import mysubmodule
|
492 |
+
import anothersubmodule
|
493 |
+
|
494 |
+
from .foo import someattr
|
495 |
+
```
|
496 |
+
|
497 |
+
The idea is to replace a package's `__getattr__`, `__dir__`, and
|
498 |
+
`__all__`, such that all imports work exactly the way they would
|
499 |
+
with normal imports, except that the import occurs upon first use.
|
500 |
+
|
501 |
+
The typical way to call this function, replacing the above imports, is:
|
502 |
+
|
503 |
+
```python
|
504 |
+
__getattr__, __dir__, __all__ = lazy.attach(
|
505 |
+
__name__,
|
506 |
+
['mysubmodule', 'anothersubmodule'],
|
507 |
+
{'foo': ['someattr']}
|
508 |
+
)
|
509 |
+
```
|
510 |
+
This functionality requires Python 3.7 or higher.
|
511 |
+
|
512 |
+
Args:
|
513 |
+
package_name (`str`):
|
514 |
+
Typically use `__name__`.
|
515 |
+
submodules (`set`):
|
516 |
+
List of submodules to attach.
|
517 |
+
submod_attrs (`dict`):
|
518 |
+
Dictionary of submodule -> list of attributes / functions.
|
519 |
+
These attributes are imported as they are used.
|
520 |
+
|
521 |
+
Returns:
|
522 |
+
__getattr__, __dir__, __all__
|
523 |
+
|
524 |
+
"""
|
525 |
+
if submod_attrs is None:
|
526 |
+
submod_attrs = {}
|
527 |
+
|
528 |
+
if submodules is None:
|
529 |
+
submodules = set()
|
530 |
+
else:
|
531 |
+
submodules = set(submodules)
|
532 |
+
|
533 |
+
attr_to_modules = {attr: mod for mod, attrs in submod_attrs.items() for attr in attrs}
|
534 |
+
|
535 |
+
__all__ = list(submodules | attr_to_modules.keys())
|
536 |
+
|
537 |
+
def __getattr__(name):
|
538 |
+
if name in submodules:
|
539 |
+
try:
|
540 |
+
return importlib.import_module(f"{package_name}.{name}")
|
541 |
+
except Exception as e:
|
542 |
+
print(f"Error importing {package_name}.{name}: {e}")
|
543 |
+
raise
|
544 |
+
elif name in attr_to_modules:
|
545 |
+
submod_path = f"{package_name}.{attr_to_modules[name]}"
|
546 |
+
try:
|
547 |
+
submod = importlib.import_module(submod_path)
|
548 |
+
except Exception as e:
|
549 |
+
print(f"Error importing {submod_path}: {e}")
|
550 |
+
raise
|
551 |
+
attr = getattr(submod, name)
|
552 |
+
|
553 |
+
# If the attribute lives in a file (module) with the same
|
554 |
+
# name as the attribute, ensure that the attribute and *not*
|
555 |
+
# the module is accessible on the package.
|
556 |
+
if name == attr_to_modules[name]:
|
557 |
+
pkg = sys.modules[package_name]
|
558 |
+
pkg.__dict__[name] = attr
|
559 |
+
|
560 |
+
return attr
|
561 |
+
else:
|
562 |
+
raise AttributeError(f"No {package_name} attribute {name}")
|
563 |
+
|
564 |
+
def __dir__():
|
565 |
+
return __all__
|
566 |
+
|
567 |
+
return __getattr__, __dir__, list(__all__)
|
568 |
+
|
569 |
+
|
570 |
+
__getattr__, __dir__, __all__ = _attach(__name__, submodules=[], submod_attrs=_SUBMOD_ATTRS)
|
571 |
+
|
572 |
+
if os.environ.get("EAGER_IMPORT", ""):
|
573 |
+
for attr in __all__:
|
574 |
+
__getattr__(attr)
|
575 |
+
|
576 |
+
# WARNING: any content below this statement is generated automatically. Any manual edit
|
577 |
+
# will be lost when re-generating this file !
|
578 |
+
#
|
579 |
+
# To update the static imports, please run the following command and commit the changes.
|
580 |
+
# ```
|
581 |
+
# # Use script
|
582 |
+
# python utils/check_static_imports.py --update-file
|
583 |
+
#
|
584 |
+
# # Or run style on codebase
|
585 |
+
# make style
|
586 |
+
# ```
|
587 |
+
if TYPE_CHECKING: # pragma: no cover
|
588 |
+
from ._commit_scheduler import CommitScheduler # noqa: F401
|
589 |
+
from ._inference_endpoints import (
|
590 |
+
InferenceEndpoint, # noqa: F401
|
591 |
+
InferenceEndpointError, # noqa: F401
|
592 |
+
InferenceEndpointStatus, # noqa: F401
|
593 |
+
InferenceEndpointTimeoutError, # noqa: F401
|
594 |
+
InferenceEndpointType, # noqa: F401
|
595 |
+
)
|
596 |
+
from ._login import (
|
597 |
+
auth_list, # noqa: F401
|
598 |
+
auth_switch, # noqa: F401
|
599 |
+
interpreter_login, # noqa: F401
|
600 |
+
login, # noqa: F401
|
601 |
+
logout, # noqa: F401
|
602 |
+
notebook_login, # noqa: F401
|
603 |
+
)
|
604 |
+
from ._multi_commits import (
|
605 |
+
MultiCommitException, # noqa: F401
|
606 |
+
plan_multi_commits, # noqa: F401
|
607 |
+
)
|
608 |
+
from ._snapshot_download import snapshot_download # noqa: F401
|
609 |
+
from ._space_api import (
|
610 |
+
SpaceHardware, # noqa: F401
|
611 |
+
SpaceRuntime, # noqa: F401
|
612 |
+
SpaceStage, # noqa: F401
|
613 |
+
SpaceStorage, # noqa: F401
|
614 |
+
SpaceVariable, # noqa: F401
|
615 |
+
)
|
616 |
+
from ._tensorboard_logger import HFSummaryWriter # noqa: F401
|
617 |
+
from ._webhooks_payload import (
|
618 |
+
WebhookPayload, # noqa: F401
|
619 |
+
WebhookPayloadComment, # noqa: F401
|
620 |
+
WebhookPayloadDiscussion, # noqa: F401
|
621 |
+
WebhookPayloadDiscussionChanges, # noqa: F401
|
622 |
+
WebhookPayloadEvent, # noqa: F401
|
623 |
+
WebhookPayloadMovedTo, # noqa: F401
|
624 |
+
WebhookPayloadRepo, # noqa: F401
|
625 |
+
WebhookPayloadUrl, # noqa: F401
|
626 |
+
WebhookPayloadWebhook, # noqa: F401
|
627 |
+
)
|
628 |
+
from ._webhooks_server import (
|
629 |
+
WebhooksServer, # noqa: F401
|
630 |
+
webhook_endpoint, # noqa: F401
|
631 |
+
)
|
632 |
+
from .community import (
|
633 |
+
Discussion, # noqa: F401
|
634 |
+
DiscussionComment, # noqa: F401
|
635 |
+
DiscussionCommit, # noqa: F401
|
636 |
+
DiscussionEvent, # noqa: F401
|
637 |
+
DiscussionStatusChange, # noqa: F401
|
638 |
+
DiscussionTitleChange, # noqa: F401
|
639 |
+
DiscussionWithDetails, # noqa: F401
|
640 |
+
)
|
641 |
+
from .constants import (
|
642 |
+
CONFIG_NAME, # noqa: F401
|
643 |
+
FLAX_WEIGHTS_NAME, # noqa: F401
|
644 |
+
HUGGINGFACE_CO_URL_HOME, # noqa: F401
|
645 |
+
HUGGINGFACE_CO_URL_TEMPLATE, # noqa: F401
|
646 |
+
PYTORCH_WEIGHTS_NAME, # noqa: F401
|
647 |
+
REPO_TYPE_DATASET, # noqa: F401
|
648 |
+
REPO_TYPE_MODEL, # noqa: F401
|
649 |
+
REPO_TYPE_SPACE, # noqa: F401
|
650 |
+
TF2_WEIGHTS_NAME, # noqa: F401
|
651 |
+
TF_WEIGHTS_NAME, # noqa: F401
|
652 |
+
)
|
653 |
+
from .fastai_utils import (
|
654 |
+
_save_pretrained_fastai, # noqa: F401
|
655 |
+
from_pretrained_fastai, # noqa: F401
|
656 |
+
push_to_hub_fastai, # noqa: F401
|
657 |
+
)
|
658 |
+
from .file_download import (
|
659 |
+
_CACHED_NO_EXIST, # noqa: F401
|
660 |
+
HfFileMetadata, # noqa: F401
|
661 |
+
get_hf_file_metadata, # noqa: F401
|
662 |
+
hf_hub_download, # noqa: F401
|
663 |
+
hf_hub_url, # noqa: F401
|
664 |
+
try_to_load_from_cache, # noqa: F401
|
665 |
+
)
|
666 |
+
from .hf_api import (
|
667 |
+
Collection, # noqa: F401
|
668 |
+
CollectionItem, # noqa: F401
|
669 |
+
CommitInfo, # noqa: F401
|
670 |
+
CommitOperation, # noqa: F401
|
671 |
+
CommitOperationAdd, # noqa: F401
|
672 |
+
CommitOperationCopy, # noqa: F401
|
673 |
+
CommitOperationDelete, # noqa: F401
|
674 |
+
DatasetInfo, # noqa: F401
|
675 |
+
GitCommitInfo, # noqa: F401
|
676 |
+
GitRefInfo, # noqa: F401
|
677 |
+
GitRefs, # noqa: F401
|
678 |
+
HfApi, # noqa: F401
|
679 |
+
ModelInfo, # noqa: F401
|
680 |
+
RepoUrl, # noqa: F401
|
681 |
+
SpaceInfo, # noqa: F401
|
682 |
+
User, # noqa: F401
|
683 |
+
UserLikes, # noqa: F401
|
684 |
+
WebhookInfo, # noqa: F401
|
685 |
+
WebhookWatchedItem, # noqa: F401
|
686 |
+
accept_access_request, # noqa: F401
|
687 |
+
add_collection_item, # noqa: F401
|
688 |
+
add_space_secret, # noqa: F401
|
689 |
+
add_space_variable, # noqa: F401
|
690 |
+
auth_check, # noqa: F401
|
691 |
+
cancel_access_request, # noqa: F401
|
692 |
+
change_discussion_status, # noqa: F401
|
693 |
+
comment_discussion, # noqa: F401
|
694 |
+
create_branch, # noqa: F401
|
695 |
+
create_collection, # noqa: F401
|
696 |
+
create_commit, # noqa: F401
|
697 |
+
create_commits_on_pr, # noqa: F401
|
698 |
+
create_discussion, # noqa: F401
|
699 |
+
create_inference_endpoint, # noqa: F401
|
700 |
+
create_pull_request, # noqa: F401
|
701 |
+
create_repo, # noqa: F401
|
702 |
+
create_tag, # noqa: F401
|
703 |
+
create_webhook, # noqa: F401
|
704 |
+
dataset_info, # noqa: F401
|
705 |
+
delete_branch, # noqa: F401
|
706 |
+
delete_collection, # noqa: F401
|
707 |
+
delete_collection_item, # noqa: F401
|
708 |
+
delete_file, # noqa: F401
|
709 |
+
delete_folder, # noqa: F401
|
710 |
+
delete_inference_endpoint, # noqa: F401
|
711 |
+
delete_repo, # noqa: F401
|
712 |
+
delete_space_secret, # noqa: F401
|
713 |
+
delete_space_storage, # noqa: F401
|
714 |
+
delete_space_variable, # noqa: F401
|
715 |
+
delete_tag, # noqa: F401
|
716 |
+
delete_webhook, # noqa: F401
|
717 |
+
disable_webhook, # noqa: F401
|
718 |
+
duplicate_space, # noqa: F401
|
719 |
+
edit_discussion_comment, # noqa: F401
|
720 |
+
enable_webhook, # noqa: F401
|
721 |
+
file_exists, # noqa: F401
|
722 |
+
get_collection, # noqa: F401
|
723 |
+
get_dataset_tags, # noqa: F401
|
724 |
+
get_discussion_details, # noqa: F401
|
725 |
+
get_full_repo_name, # noqa: F401
|
726 |
+
get_inference_endpoint, # noqa: F401
|
727 |
+
get_model_tags, # noqa: F401
|
728 |
+
get_paths_info, # noqa: F401
|
729 |
+
get_repo_discussions, # noqa: F401
|
730 |
+
get_safetensors_metadata, # noqa: F401
|
731 |
+
get_space_runtime, # noqa: F401
|
732 |
+
get_space_variables, # noqa: F401
|
733 |
+
get_token_permission, # noqa: F401
|
734 |
+
get_user_overview, # noqa: F401
|
735 |
+
get_webhook, # noqa: F401
|
736 |
+
grant_access, # noqa: F401
|
737 |
+
like, # noqa: F401
|
738 |
+
list_accepted_access_requests, # noqa: F401
|
739 |
+
list_collections, # noqa: F401
|
740 |
+
list_datasets, # noqa: F401
|
741 |
+
list_inference_endpoints, # noqa: F401
|
742 |
+
list_liked_repos, # noqa: F401
|
743 |
+
list_metrics, # noqa: F401
|
744 |
+
list_models, # noqa: F401
|
745 |
+
list_organization_members, # noqa: F401
|
746 |
+
list_papers, # noqa: F401
|
747 |
+
list_pending_access_requests, # noqa: F401
|
748 |
+
list_rejected_access_requests, # noqa: F401
|
749 |
+
list_repo_commits, # noqa: F401
|
750 |
+
list_repo_files, # noqa: F401
|
751 |
+
list_repo_likers, # noqa: F401
|
752 |
+
list_repo_refs, # noqa: F401
|
753 |
+
list_repo_tree, # noqa: F401
|
754 |
+
list_spaces, # noqa: F401
|
755 |
+
list_user_followers, # noqa: F401
|
756 |
+
list_user_following, # noqa: F401
|
757 |
+
list_webhooks, # noqa: F401
|
758 |
+
merge_pull_request, # noqa: F401
|
759 |
+
model_info, # noqa: F401
|
760 |
+
move_repo, # noqa: F401
|
761 |
+
paper_info, # noqa: F401
|
762 |
+
parse_safetensors_file_metadata, # noqa: F401
|
763 |
+
pause_inference_endpoint, # noqa: F401
|
764 |
+
pause_space, # noqa: F401
|
765 |
+
preupload_lfs_files, # noqa: F401
|
766 |
+
reject_access_request, # noqa: F401
|
767 |
+
rename_discussion, # noqa: F401
|
768 |
+
repo_exists, # noqa: F401
|
769 |
+
repo_info, # noqa: F401
|
770 |
+
repo_type_and_id_from_hf_id, # noqa: F401
|
771 |
+
request_space_hardware, # noqa: F401
|
772 |
+
request_space_storage, # noqa: F401
|
773 |
+
restart_space, # noqa: F401
|
774 |
+
resume_inference_endpoint, # noqa: F401
|
775 |
+
revision_exists, # noqa: F401
|
776 |
+
run_as_future, # noqa: F401
|
777 |
+
scale_to_zero_inference_endpoint, # noqa: F401
|
778 |
+
set_space_sleep_time, # noqa: F401
|
779 |
+
space_info, # noqa: F401
|
780 |
+
super_squash_history, # noqa: F401
|
781 |
+
unlike, # noqa: F401
|
782 |
+
update_collection_item, # noqa: F401
|
783 |
+
update_collection_metadata, # noqa: F401
|
784 |
+
update_inference_endpoint, # noqa: F401
|
785 |
+
update_repo_settings, # noqa: F401
|
786 |
+
update_repo_visibility, # noqa: F401
|
787 |
+
update_webhook, # noqa: F401
|
788 |
+
upload_file, # noqa: F401
|
789 |
+
upload_folder, # noqa: F401
|
790 |
+
upload_large_folder, # noqa: F401
|
791 |
+
whoami, # noqa: F401
|
792 |
+
)
|
793 |
+
from .hf_file_system import (
|
794 |
+
HfFileSystem, # noqa: F401
|
795 |
+
HfFileSystemFile, # noqa: F401
|
796 |
+
HfFileSystemResolvedPath, # noqa: F401
|
797 |
+
HfFileSystemStreamFile, # noqa: F401
|
798 |
+
)
|
799 |
+
from .hub_mixin import (
|
800 |
+
ModelHubMixin, # noqa: F401
|
801 |
+
PyTorchModelHubMixin, # noqa: F401
|
802 |
+
)
|
803 |
+
from .inference._client import (
|
804 |
+
InferenceClient, # noqa: F401
|
805 |
+
InferenceTimeoutError, # noqa: F401
|
806 |
+
)
|
807 |
+
from .inference._generated._async_client import AsyncInferenceClient # noqa: F401
|
808 |
+
from .inference._generated.types import (
|
809 |
+
AudioClassificationInput, # noqa: F401
|
810 |
+
AudioClassificationOutputElement, # noqa: F401
|
811 |
+
AudioClassificationOutputTransform, # noqa: F401
|
812 |
+
AudioClassificationParameters, # noqa: F401
|
813 |
+
AudioToAudioInput, # noqa: F401
|
814 |
+
AudioToAudioOutputElement, # noqa: F401
|
815 |
+
AutomaticSpeechRecognitionEarlyStoppingEnum, # noqa: F401
|
816 |
+
AutomaticSpeechRecognitionGenerationParameters, # noqa: F401
|
817 |
+
AutomaticSpeechRecognitionInput, # noqa: F401
|
818 |
+
AutomaticSpeechRecognitionOutput, # noqa: F401
|
819 |
+
AutomaticSpeechRecognitionOutputChunk, # noqa: F401
|
820 |
+
AutomaticSpeechRecognitionParameters, # noqa: F401
|
821 |
+
ChatCompletionInput, # noqa: F401
|
822 |
+
ChatCompletionInputFunctionDefinition, # noqa: F401
|
823 |
+
ChatCompletionInputFunctionName, # noqa: F401
|
824 |
+
ChatCompletionInputGrammarType, # noqa: F401
|
825 |
+
ChatCompletionInputMessage, # noqa: F401
|
826 |
+
ChatCompletionInputMessageChunk, # noqa: F401
|
827 |
+
ChatCompletionInputStreamOptions, # noqa: F401
|
828 |
+
ChatCompletionInputToolType, # noqa: F401
|
829 |
+
ChatCompletionInputURL, # noqa: F401
|
830 |
+
ChatCompletionOutput, # noqa: F401
|
831 |
+
ChatCompletionOutputComplete, # noqa: F401
|
832 |
+
ChatCompletionOutputFunctionDefinition, # noqa: F401
|
833 |
+
ChatCompletionOutputLogprob, # noqa: F401
|
834 |
+
ChatCompletionOutputLogprobs, # noqa: F401
|
835 |
+
ChatCompletionOutputMessage, # noqa: F401
|
836 |
+
ChatCompletionOutputToolCall, # noqa: F401
|
837 |
+
ChatCompletionOutputTopLogprob, # noqa: F401
|
838 |
+
ChatCompletionOutputUsage, # noqa: F401
|
839 |
+
ChatCompletionStreamOutput, # noqa: F401
|
840 |
+
ChatCompletionStreamOutputChoice, # noqa: F401
|
841 |
+
ChatCompletionStreamOutputDelta, # noqa: F401
|
842 |
+
ChatCompletionStreamOutputDeltaToolCall, # noqa: F401
|
843 |
+
ChatCompletionStreamOutputFunction, # noqa: F401
|
844 |
+
ChatCompletionStreamOutputLogprob, # noqa: F401
|
845 |
+
ChatCompletionStreamOutputLogprobs, # noqa: F401
|
846 |
+
ChatCompletionStreamOutputTopLogprob, # noqa: F401
|
847 |
+
ChatCompletionStreamOutputUsage, # noqa: F401
|
848 |
+
DepthEstimationInput, # noqa: F401
|
849 |
+
DepthEstimationOutput, # noqa: F401
|
850 |
+
DocumentQuestionAnsweringInput, # noqa: F401
|
851 |
+
DocumentQuestionAnsweringInputData, # noqa: F401
|
852 |
+
DocumentQuestionAnsweringOutputElement, # noqa: F401
|
853 |
+
DocumentQuestionAnsweringParameters, # noqa: F401
|
854 |
+
FeatureExtractionInput, # noqa: F401
|
855 |
+
FillMaskInput, # noqa: F401
|
856 |
+
FillMaskOutputElement, # noqa: F401
|
857 |
+
FillMaskParameters, # noqa: F401
|
858 |
+
ImageClassificationInput, # noqa: F401
|
859 |
+
ImageClassificationOutputElement, # noqa: F401
|
860 |
+
ImageClassificationOutputTransform, # noqa: F401
|
861 |
+
ImageClassificationParameters, # noqa: F401
|
862 |
+
ImageSegmentationInput, # noqa: F401
|
863 |
+
ImageSegmentationOutputElement, # noqa: F401
|
864 |
+
ImageSegmentationParameters, # noqa: F401
|
865 |
+
ImageToImageInput, # noqa: F401
|
866 |
+
ImageToImageOutput, # noqa: F401
|
867 |
+
ImageToImageParameters, # noqa: F401
|
868 |
+
ImageToImageTargetSize, # noqa: F401
|
869 |
+
ImageToTextEarlyStoppingEnum, # noqa: F401
|
870 |
+
ImageToTextGenerationParameters, # noqa: F401
|
871 |
+
ImageToTextInput, # noqa: F401
|
872 |
+
ImageToTextOutput, # noqa: F401
|
873 |
+
ImageToTextParameters, # noqa: F401
|
874 |
+
ObjectDetectionBoundingBox, # noqa: F401
|
875 |
+
ObjectDetectionInput, # noqa: F401
|
876 |
+
ObjectDetectionOutputElement, # noqa: F401
|
877 |
+
ObjectDetectionParameters, # noqa: F401
|
878 |
+
QuestionAnsweringInput, # noqa: F401
|
879 |
+
QuestionAnsweringInputData, # noqa: F401
|
880 |
+
QuestionAnsweringOutputElement, # noqa: F401
|
881 |
+
QuestionAnsweringParameters, # noqa: F401
|
882 |
+
SentenceSimilarityInput, # noqa: F401
|
883 |
+
SentenceSimilarityInputData, # noqa: F401
|
884 |
+
SummarizationInput, # noqa: F401
|
885 |
+
SummarizationOutput, # noqa: F401
|
886 |
+
SummarizationParameters, # noqa: F401
|
887 |
+
TableQuestionAnsweringInput, # noqa: F401
|
888 |
+
TableQuestionAnsweringInputData, # noqa: F401
|
889 |
+
TableQuestionAnsweringOutputElement, # noqa: F401
|
890 |
+
Text2TextGenerationInput, # noqa: F401
|
891 |
+
Text2TextGenerationOutput, # noqa: F401
|
892 |
+
Text2TextGenerationParameters, # noqa: F401
|
893 |
+
TextClassificationInput, # noqa: F401
|
894 |
+
TextClassificationOutputElement, # noqa: F401
|
895 |
+
TextClassificationOutputTransform, # noqa: F401
|
896 |
+
TextClassificationParameters, # noqa: F401
|
897 |
+
TextGenerationInput, # noqa: F401
|
898 |
+
TextGenerationInputGenerateParameters, # noqa: F401
|
899 |
+
TextGenerationInputGrammarType, # noqa: F401
|
900 |
+
TextGenerationOutput, # noqa: F401
|
901 |
+
TextGenerationOutputBestOfSequence, # noqa: F401
|
902 |
+
TextGenerationOutputDetails, # noqa: F401
|
903 |
+
TextGenerationOutputPrefillToken, # noqa: F401
|
904 |
+
TextGenerationOutputToken, # noqa: F401
|
905 |
+
TextGenerationStreamOutput, # noqa: F401
|
906 |
+
TextGenerationStreamOutputStreamDetails, # noqa: F401
|
907 |
+
TextGenerationStreamOutputToken, # noqa: F401
|
908 |
+
TextToAudioEarlyStoppingEnum, # noqa: F401
|
909 |
+
TextToAudioGenerationParameters, # noqa: F401
|
910 |
+
TextToAudioInput, # noqa: F401
|
911 |
+
TextToAudioOutput, # noqa: F401
|
912 |
+
TextToAudioParameters, # noqa: F401
|
913 |
+
TextToImageInput, # noqa: F401
|
914 |
+
TextToImageOutput, # noqa: F401
|
915 |
+
TextToImageParameters, # noqa: F401
|
916 |
+
TextToImageTargetSize, # noqa: F401
|
917 |
+
TextToSpeechEarlyStoppingEnum, # noqa: F401
|
918 |
+
TextToSpeechGenerationParameters, # noqa: F401
|
919 |
+
TextToSpeechInput, # noqa: F401
|
920 |
+
TextToSpeechOutput, # noqa: F401
|
921 |
+
TextToSpeechParameters, # noqa: F401
|
922 |
+
TokenClassificationInput, # noqa: F401
|
923 |
+
TokenClassificationOutputElement, # noqa: F401
|
924 |
+
TokenClassificationParameters, # noqa: F401
|
925 |
+
ToolElement, # noqa: F401
|
926 |
+
TranslationInput, # noqa: F401
|
927 |
+
TranslationOutput, # noqa: F401
|
928 |
+
TranslationParameters, # noqa: F401
|
929 |
+
VideoClassificationInput, # noqa: F401
|
930 |
+
VideoClassificationOutputElement, # noqa: F401
|
931 |
+
VideoClassificationOutputTransform, # noqa: F401
|
932 |
+
VideoClassificationParameters, # noqa: F401
|
933 |
+
VisualQuestionAnsweringInput, # noqa: F401
|
934 |
+
VisualQuestionAnsweringInputData, # noqa: F401
|
935 |
+
VisualQuestionAnsweringOutputElement, # noqa: F401
|
936 |
+
VisualQuestionAnsweringParameters, # noqa: F401
|
937 |
+
ZeroShotClassificationInput, # noqa: F401
|
938 |
+
ZeroShotClassificationInputData, # noqa: F401
|
939 |
+
ZeroShotClassificationOutputElement, # noqa: F401
|
940 |
+
ZeroShotClassificationParameters, # noqa: F401
|
941 |
+
ZeroShotImageClassificationInput, # noqa: F401
|
942 |
+
ZeroShotImageClassificationInputData, # noqa: F401
|
943 |
+
ZeroShotImageClassificationOutputElement, # noqa: F401
|
944 |
+
ZeroShotImageClassificationParameters, # noqa: F401
|
945 |
+
ZeroShotObjectDetectionBoundingBox, # noqa: F401
|
946 |
+
ZeroShotObjectDetectionInput, # noqa: F401
|
947 |
+
ZeroShotObjectDetectionInputData, # noqa: F401
|
948 |
+
ZeroShotObjectDetectionOutputElement, # noqa: F401
|
949 |
+
)
|
950 |
+
from .inference_api import InferenceApi # noqa: F401
|
951 |
+
from .keras_mixin import (
|
952 |
+
KerasModelHubMixin, # noqa: F401
|
953 |
+
from_pretrained_keras, # noqa: F401
|
954 |
+
push_to_hub_keras, # noqa: F401
|
955 |
+
save_pretrained_keras, # noqa: F401
|
956 |
+
)
|
957 |
+
from .repocard import (
|
958 |
+
DatasetCard, # noqa: F401
|
959 |
+
ModelCard, # noqa: F401
|
960 |
+
RepoCard, # noqa: F401
|
961 |
+
SpaceCard, # noqa: F401
|
962 |
+
metadata_eval_result, # noqa: F401
|
963 |
+
metadata_load, # noqa: F401
|
964 |
+
metadata_save, # noqa: F401
|
965 |
+
metadata_update, # noqa: F401
|
966 |
+
)
|
967 |
+
from .repocard_data import (
|
968 |
+
CardData, # noqa: F401
|
969 |
+
DatasetCardData, # noqa: F401
|
970 |
+
EvalResult, # noqa: F401
|
971 |
+
ModelCardData, # noqa: F401
|
972 |
+
SpaceCardData, # noqa: F401
|
973 |
+
)
|
974 |
+
from .repository import Repository # noqa: F401
|
975 |
+
from .serialization import (
|
976 |
+
StateDictSplit, # noqa: F401
|
977 |
+
get_tf_storage_size, # noqa: F401
|
978 |
+
get_torch_storage_id, # noqa: F401
|
979 |
+
get_torch_storage_size, # noqa: F401
|
980 |
+
save_torch_model, # noqa: F401
|
981 |
+
save_torch_state_dict, # noqa: F401
|
982 |
+
split_state_dict_into_shards_factory, # noqa: F401
|
983 |
+
split_tf_state_dict_into_shards, # noqa: F401
|
984 |
+
split_torch_state_dict_into_shards, # noqa: F401
|
985 |
+
)
|
986 |
+
from .utils import (
|
987 |
+
CachedFileInfo, # noqa: F401
|
988 |
+
CachedRepoInfo, # noqa: F401
|
989 |
+
CachedRevisionInfo, # noqa: F401
|
990 |
+
CacheNotFound, # noqa: F401
|
991 |
+
CorruptedCacheException, # noqa: F401
|
992 |
+
DeleteCacheStrategy, # noqa: F401
|
993 |
+
HFCacheInfo, # noqa: F401
|
994 |
+
HfFolder, # noqa: F401
|
995 |
+
cached_assets_path, # noqa: F401
|
996 |
+
configure_http_backend, # noqa: F401
|
997 |
+
dump_environment_info, # noqa: F401
|
998 |
+
get_session, # noqa: F401
|
999 |
+
get_token, # noqa: F401
|
1000 |
+
logging, # noqa: F401
|
1001 |
+
scan_cache_dir, # noqa: F401
|
1002 |
+
)
|