ayousanz commited on
Commit
f96afdc
·
verified ·
1 Parent(s): 978c416

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .venv/Lib/site-packages/fsspec-2024.2.0.dist-info/INSTALLER +1 -0
  2. .venv/Lib/site-packages/fsspec-2024.2.0.dist-info/LICENSE +29 -0
  3. .venv/Lib/site-packages/fsspec-2024.2.0.dist-info/METADATA +167 -0
  4. .venv/Lib/site-packages/fsspec-2024.2.0.dist-info/RECORD +56 -0
  5. .venv/Lib/site-packages/fsspec-2024.2.0.dist-info/REQUESTED +0 -0
  6. .venv/Lib/site-packages/fsspec-2024.2.0.dist-info/WHEEL +5 -0
  7. .venv/Lib/site-packages/fsspec-2024.2.0.dist-info/top_level.txt +1 -0
  8. .venv/Lib/site-packages/fsspec/implementations/__init__.py +0 -0
  9. .venv/Lib/site-packages/fsspec/implementations/jupyter.py +124 -0
  10. .venv/Lib/site-packages/fsspec/implementations/libarchive.py +213 -0
  11. .venv/Lib/site-packages/fsspec/implementations/local.py +418 -0
  12. .venv/Lib/site-packages/fsspec/implementations/memory.py +292 -0
  13. .venv/Lib/site-packages/fsspec/implementations/reference.py +1160 -0
  14. .venv/Lib/site-packages/fsspec/implementations/sftp.py +180 -0
  15. .venv/Lib/site-packages/fsspec/implementations/smb.py +324 -0
  16. .venv/Lib/site-packages/fsspec/implementations/tar.py +124 -0
  17. .venv/Lib/site-packages/fsspec/implementations/webhdfs.py +486 -0
  18. .venv/Lib/site-packages/fsspec/implementations/zip.py +133 -0
  19. .venv/Lib/site-packages/fsspec/tests/abstract/__init__.py +287 -0
  20. .venv/Lib/site-packages/fsspec/tests/abstract/common.py +175 -0
  21. .venv/Lib/site-packages/fsspec/tests/abstract/copy.py +557 -0
  22. .venv/Lib/site-packages/fsspec/tests/abstract/get.py +587 -0
  23. .venv/Lib/site-packages/fsspec/tests/abstract/put.py +591 -0
  24. .venv/Lib/site-packages/fugashi-1.4.0.dist-info/INSTALLER +1 -0
  25. .venv/Lib/site-packages/fugashi-1.4.0.dist-info/LICENSE +21 -0
  26. .venv/Lib/site-packages/fugashi-1.4.0.dist-info/LICENSE.mecab +29 -0
  27. .venv/Lib/site-packages/fugashi-1.4.0.dist-info/METADATA +157 -0
  28. .venv/Lib/site-packages/fugashi-1.4.0.dist-info/RECORD +16 -0
  29. .venv/Lib/site-packages/fugashi-1.4.0.dist-info/REQUESTED +0 -0
  30. .venv/Lib/site-packages/fugashi-1.4.0.dist-info/WHEEL +5 -0
  31. .venv/Lib/site-packages/fugashi-1.4.0.dist-info/entry_points.txt +4 -0
  32. .venv/Lib/site-packages/fugashi-1.4.0.dist-info/top_level.txt +1 -0
  33. .venv/Lib/site-packages/fugashi/__init__.py +2 -0
  34. .venv/Lib/site-packages/fugashi/__pycache__/__init__.cpython-39.pyc +0 -0
  35. .venv/Lib/site-packages/fugashi/cli.py +47 -0
  36. .venv/Lib/site-packages/fugashi/fugashi.cp39-win_amd64.pyd +0 -0
  37. .venv/Lib/site-packages/functorch/_C.cp39-win_amd64.pyd +0 -0
  38. .venv/Lib/site-packages/functorch/__init__.py +39 -0
  39. .venv/Lib/site-packages/functorch/_src/make_functional/__init__.py +4 -0
  40. .venv/Lib/site-packages/functorch/_src/vmap/__init__.py +16 -0
  41. .venv/Lib/site-packages/functorch/compile/__init__.py +30 -0
  42. .venv/Lib/site-packages/functorch/dim/batch_tensor.py +26 -0
  43. .venv/Lib/site-packages/functorch/dim/delayed_mul_tensor.py +77 -0
  44. .venv/Lib/site-packages/functorch/dim/dim.py +121 -0
  45. .venv/Lib/site-packages/functorch/dim/magic_trace.py +42 -0
  46. .venv/Lib/site-packages/functorch/dim/op_properties.py +312 -0
  47. .venv/Lib/site-packages/functorch/dim/reference.py +645 -0
  48. .venv/Lib/site-packages/functorch/dim/tree_map.py +15 -0
  49. .venv/Lib/site-packages/functorch/dim/wrap_type.py +72 -0
  50. .venv/Lib/site-packages/huggingface_hub/__init__.py +1002 -0
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ uv
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/LICENSE ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2018, Martin Durant
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ * Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ * Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ * Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/METADATA ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.1
2
+ Name: fsspec
3
+ Version: 2024.2.0
4
+ Summary: File-system specification
5
+ Home-page: https://github.com/fsspec/filesystem_spec
6
+ Maintainer: Martin Durant
7
+ Maintainer-email: [email protected]
8
+ License: BSD
9
+ Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
10
+ Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
11
+ Keywords: file
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: BSD License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Requires-Python: >=3.8
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Provides-Extra: abfs
24
+ Requires-Dist: adlfs ; extra == 'abfs'
25
+ Provides-Extra: adl
26
+ Requires-Dist: adlfs ; extra == 'adl'
27
+ Provides-Extra: arrow
28
+ Requires-Dist: pyarrow >=1 ; extra == 'arrow'
29
+ Provides-Extra: dask
30
+ Requires-Dist: dask ; extra == 'dask'
31
+ Requires-Dist: distributed ; extra == 'dask'
32
+ Provides-Extra: devel
33
+ Requires-Dist: pytest ; extra == 'devel'
34
+ Requires-Dist: pytest-cov ; extra == 'devel'
35
+ Provides-Extra: dropbox
36
+ Requires-Dist: dropboxdrivefs ; extra == 'dropbox'
37
+ Requires-Dist: requests ; extra == 'dropbox'
38
+ Requires-Dist: dropbox ; extra == 'dropbox'
39
+ Provides-Extra: entrypoints
40
+ Provides-Extra: full
41
+ Requires-Dist: adlfs ; extra == 'full'
42
+ Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'full'
43
+ Requires-Dist: dask ; extra == 'full'
44
+ Requires-Dist: distributed ; extra == 'full'
45
+ Requires-Dist: dropbox ; extra == 'full'
46
+ Requires-Dist: dropboxdrivefs ; extra == 'full'
47
+ Requires-Dist: fusepy ; extra == 'full'
48
+ Requires-Dist: gcsfs ; extra == 'full'
49
+ Requires-Dist: libarchive-c ; extra == 'full'
50
+ Requires-Dist: ocifs ; extra == 'full'
51
+ Requires-Dist: panel ; extra == 'full'
52
+ Requires-Dist: paramiko ; extra == 'full'
53
+ Requires-Dist: pyarrow >=1 ; extra == 'full'
54
+ Requires-Dist: pygit2 ; extra == 'full'
55
+ Requires-Dist: requests ; extra == 'full'
56
+ Requires-Dist: s3fs ; extra == 'full'
57
+ Requires-Dist: smbprotocol ; extra == 'full'
58
+ Requires-Dist: tqdm ; extra == 'full'
59
+ Provides-Extra: fuse
60
+ Requires-Dist: fusepy ; extra == 'fuse'
61
+ Provides-Extra: gcs
62
+ Requires-Dist: gcsfs ; extra == 'gcs'
63
+ Provides-Extra: git
64
+ Requires-Dist: pygit2 ; extra == 'git'
65
+ Provides-Extra: github
66
+ Requires-Dist: requests ; extra == 'github'
67
+ Provides-Extra: gs
68
+ Requires-Dist: gcsfs ; extra == 'gs'
69
+ Provides-Extra: gui
70
+ Requires-Dist: panel ; extra == 'gui'
71
+ Provides-Extra: hdfs
72
+ Requires-Dist: pyarrow >=1 ; extra == 'hdfs'
73
+ Provides-Extra: http
74
+ Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'http'
75
+ Provides-Extra: libarchive
76
+ Requires-Dist: libarchive-c ; extra == 'libarchive'
77
+ Provides-Extra: oci
78
+ Requires-Dist: ocifs ; extra == 'oci'
79
+ Provides-Extra: s3
80
+ Requires-Dist: s3fs ; extra == 's3'
81
+ Provides-Extra: sftp
82
+ Requires-Dist: paramiko ; extra == 'sftp'
83
+ Provides-Extra: smb
84
+ Requires-Dist: smbprotocol ; extra == 'smb'
85
+ Provides-Extra: ssh
86
+ Requires-Dist: paramiko ; extra == 'ssh'
87
+ Provides-Extra: tqdm
88
+ Requires-Dist: tqdm ; extra == 'tqdm'
89
+
90
+ # filesystem_spec
91
+
92
+ [![PyPI version](https://badge.fury.io/py/fsspec.svg)](https://pypi.python.org/pypi/fsspec/)
93
+ [![Anaconda-Server Badge](https://anaconda.org/conda-forge/fsspec/badges/version.svg)](https://anaconda.org/conda-forge/fsspec)
94
+ ![Build](https://github.com/fsspec/filesystem_spec/workflows/CI/badge.svg)
95
+ [![Docs](https://readthedocs.org/projects/filesystem-spec/badge/?version=latest)](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
96
+ [![PyPi downloads](https://img.shields.io/pypi/dm/fsspec?label=pypi%20downloads&style=flat)](https://pepy.tech/project/fsspec)
97
+
98
+ A specification for pythonic filesystems.
99
+
100
+ ## Install
101
+
102
+ ```bash
103
+ pip install fsspec
104
+ ```
105
+
106
+ would install the base fsspec. Various optionally supported features might require specification of custom
107
+ extra require, e.g. `pip install fsspec[ssh]` will install dependencies for `ssh` backends support.
108
+ Use `pip install fsspec[full]` for installation of all known extra dependencies.
109
+
110
+ Up-to-date package also provided through conda-forge distribution:
111
+
112
+ ```bash
113
+ conda install -c conda-forge fsspec
114
+ ```
115
+
116
+
117
+ ## Purpose
118
+
119
+ To produce a template or specification for a file-system interface, that specific implementations should follow,
120
+ so that applications making use of them can rely on a common behaviour and not have to worry about the specific
121
+ internal implementation decisions with any given backend. Many such implementations are included in this package,
122
+ or in sister projects such as `s3fs` and `gcsfs`.
123
+
124
+ In addition, if this is well-designed, then additional functionality, such as a key-value store or FUSE
125
+ mounting of the file-system implementation may be available for all implementations "for free".
126
+
127
+ ## Documentation
128
+
129
+ Please refer to [RTD](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
130
+
131
+ ## Develop
132
+
133
+ fsspec uses GitHub Actions for CI. Environment files can be found
134
+ in the "ci/" directory. Note that the main environment is called "py38",
135
+ but it is expected that the version of python installed be adjustable at
136
+ CI runtime. For local use, pick a version suitable for you.
137
+
138
+ ### Testing
139
+
140
+ Tests can be run in the dev environment, if activated, via ``pytest fsspec``.
141
+
142
+ The full fsspec suite requires a system-level docker, docker-compose, and fuse
143
+ installation. If only making changes to one backend implementation, it is
144
+ not generally necessary to run all tests locally.
145
+
146
+ It is expected that contributors ensure that any change to fsspec does not
147
+ cause issues or regressions for either other fsspec-related packages such
148
+ as gcsfs and s3fs, nor for downstream users of fsspec. The "downstream" CI
149
+ run and corresponding environment file run a set of tests from the dask
150
+ test suite, and very minimal tests against pandas and zarr from the
151
+ test_downstream.py module in this repo.
152
+
153
+ ### Code Formatting
154
+
155
+ fsspec uses [Black](https://black.readthedocs.io/en/stable) to ensure
156
+ a consistent code format throughout the project.
157
+ Run ``black fsspec`` from the root of the filesystem_spec repository to
158
+ auto-format your code. Additionally, many editors have plugins that will apply
159
+ ``black`` as you edit files. ``black`` is included in the ``tox`` environments.
160
+
161
+ Optionally, you may wish to setup [pre-commit hooks](https://pre-commit.com) to
162
+ automatically run ``black`` when you make a git commit.
163
+ Run ``pre-commit install --install-hooks`` from the root of the
164
+ filesystem_spec repository to setup pre-commit hooks. ``black`` will now be run
165
+ before you commit, reformatting any changed files. You can format without
166
+ committing via ``pre-commit run`` or skip these checks with ``git commit
167
+ --no-verify``.
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/RECORD ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fsspec-2024.2.0.dist-info/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
2
+ fsspec-2024.2.0.dist-info/METADATA,sha256=uwzW1Braxnd_QGVI8W6J0KHi5KTiTJEm8YzSUdG-_Dc,6786
3
+ fsspec-2024.2.0.dist-info/RECORD,,
4
+ fsspec-2024.2.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
5
+ fsspec-2024.2.0.dist-info/top_level.txt,sha256=blt2pDrQDwN3Gklcw13CSPLQRd6aaOgJ8AxqrW395MI,7
6
+ fsspec-2024.2.0.dist-info\INSTALLER,sha256=5hhM4Q4mYTT9z6QB6PGpUAW81PGNFrYrdXMj4oM_6ak,2
7
+ fsspec-2024.2.0.dist-info\REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ fsspec/__init__.py,sha256=2kT62GfFK-AjgS-LgwSsCo_VA2IePvsyv8Ash5oiaFA,1982
9
+ fsspec/_version.py,sha256=onTKKWe4fXkBjQxbTwM82SUT0H3x4U17IYrciFAryaU,500
10
+ fsspec/archive.py,sha256=S__DzfZj-urAN3tp2W6jJ6YDiXG1fAl7FjvWUN73qIE,2386
11
+ fsspec/asyn.py,sha256=kJ45sFFya2lZsmu2v8CVc8ZPRs8AccEzAy6Jot2ylkU,36157
12
+ fsspec/caching.py,sha256=N45pzJdD4w5FOX_sxGvHWirggPNB66JTGP1HH6fpSck,28781
13
+ fsspec/callbacks.py,sha256=BDIwLzK6rr_0V5ch557fSzsivCElpdqhXr5dZ9Te-EE,9210
14
+ fsspec/compression.py,sha256=Yyd8FXw2rwWRtVoRVah_yguv-J7BUcBo4yDu6Qt52a0,4859
15
+ fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
16
+ fsspec/conftest.py,sha256=fVfx-NLrH_OZS1TIpYNoPzM7efEcMoL62reHOdYeFCA,1245
17
+ fsspec/core.py,sha256=0yCj1Z5MhbSDIQiqFs49VORl9QaGwV6hp9bXdkIoPIo,22363
18
+ fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
19
+ fsspec/exceptions.py,sha256=xcS7LiRrQ748kvOB9mrUR14kpjNztrHgEkZWi9M-VaI,330
20
+ fsspec/fuse.py,sha256=66amOa6wdIbS0DMhhfAPUoOB37HPorfXD1izV0prmTY,10145
21
+ fsspec/generic.py,sha256=NuNaP66OaphwMbuLHRFBLda78TD81isa9O4ozJqbUv0,13455
22
+ fsspec/gui.py,sha256=XKoXZpUhRE7jOhRCJH4-jRbKhVu56aS8h9tecvPD3nc,13932
23
+ fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
+ fsspec/implementations/arrow.py,sha256=_7TLuV6ZzNlpmUU_v6ud56u2wadzsKmY5qugPBxgMEs,8649
25
+ fsspec/implementations/cache_mapper.py,sha256=iHgBA6gjzDJ7_mBboHFzpLTf55HP3UEwUOZ43xyUK4M,2429
26
+ fsspec/implementations/cache_metadata.py,sha256=ZvyA7Y3KK-5Ct4E5pELzD6mH_5T03XqaKVT96qYDADU,8576
27
+ fsspec/implementations/cached.py,sha256=LbbPbeUup07O0y7gXD_atFgajWM9p1vlDKu_BOyLfbo,30943
28
+ fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
29
+ fsspec/implementations/data.py,sha256=Oti0dKzyeadnVIedo3s8CADoh9bNM-96_6viTEYr4lo,1245
30
+ fsspec/implementations/dbfs.py,sha256=cix9OYUveuSOx5UO5uRUwNUkYqjzyY0fkKnca1kTgZ0,15014
31
+ fsspec/implementations/dirfs.py,sha256=inDIRSDPhI1_ud1MMBFrpZQ11VIAMJ_dZQtbE4V08Ng,11384
32
+ fsspec/implementations/ftp.py,sha256=rp6cTog8xqjDPlKdSLKcsyP7K593_ByMabxGbNSEpTo,11655
33
+ fsspec/implementations/git.py,sha256=vKGI-Vd5q4H2RrvhebkPc9NwlfkZ980OUGhebeCw-M0,4034
34
+ fsspec/implementations/github.py,sha256=0kIiKkeAaROuHgdWBHVQFrzJ2ZfoDgymCehL_kJXHYA,7565
35
+ fsspec/implementations/http.py,sha256=PkhfgUV3-T7fG2Jf-NLX9doH52snV5Wmw91uVA9k74M,29454
36
+ fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
37
+ fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
38
+ fsspec/implementations/local.py,sha256=nxiRKg9FAQHTQss9-ET8ZzDXPGhSOktgkxrg0ffMs2I,13454
39
+ fsspec/implementations/memory.py,sha256=2iU--pOV2KCTrS-d5K8VKSygh9MPk2D7NZ_C8lMMEIw,9701
40
+ fsspec/implementations/reference.py,sha256=0iGu8mscaQ3a5iTlRNByytQ3_-1Bj8__ARqVwyy4q2M,43871
41
+ fsspec/implementations/sftp.py,sha256=fMY9XZcmpjszQ2tCqO_TPaJesaeD_Dv7ptYzgUPGoO0,5631
42
+ fsspec/implementations/smb.py,sha256=k3RtzW97lJtYuw_QpP1rJRFnUBmSsw9twFjUCex0a5U,10591
43
+ fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
44
+ fsspec/implementations/webhdfs.py,sha256=wqVfno7z0TY1HepaIvKTUUcl_bi5NkV6qWsST8t_s7Y,16745
45
+ fsspec/implementations/zip.py,sha256=JDX-3HOI15qUl6VTBsNPuDp5RVN6s2n3Bywd4mMu0T0,4347
46
+ fsspec/mapping.py,sha256=WFEXRWxujQwfzzkRP5tpdIE0265okAtlP97qFZGvV1k,8165
47
+ fsspec/parquet.py,sha256=qVxDhwc960SGOt5etcYAJxCr-7HQKP01687KpDR02Gw,19463
48
+ fsspec/registry.py,sha256=-dl7sh2tsfhMA2uxz5KQDsPFehQTgMJIbVjNq6QLoKU,11145
49
+ fsspec/spec.py,sha256=3t96RgizRN_slIuHXnuR0bXjVUfBS1TfuDrEua4oQvE,66277
50
+ fsspec/tests/abstract/__init__.py,sha256=i1wcFixV6QhOwdoB24c8oXjzobISNqiKVz9kl2DvAY8,10028
51
+ fsspec/tests/abstract/common.py,sha256=1GQwNo5AONzAnzZj0fWgn8NJPLXALehbsuGxS3FzWVU,4973
52
+ fsspec/tests/abstract/copy.py,sha256=gU5-d97U3RSde35Vp4RxPY4rWwL744HiSrJ8IBOp9-8,19967
53
+ fsspec/tests/abstract/get.py,sha256=vNR4HztvTR7Cj56AMo7_tx7TeYz1Jgr_2Wb8Lv-UiBY,20755
54
+ fsspec/tests/abstract/put.py,sha256=7aih17OKB_IZZh1Mkq1eBDIjobhtMQmI8x-Pw-S_aZk,21201
55
+ fsspec/transaction.py,sha256=jeexB-H6Aw_gN6Z7hoKKe6v8zizITq39-gyTgpipIKE,2251
56
+ fsspec/utils.py,sha256=_VX_0VwDtoAFSjMYrxvJvnPNX9FMoHO5BlFHXJ0bHFI,23053
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/REQUESTED ADDED
File without changes
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/WHEEL ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.42.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
.venv/Lib/site-packages/fsspec-2024.2.0.dist-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ fsspec
.venv/Lib/site-packages/fsspec/implementations/__init__.py ADDED
File without changes
.venv/Lib/site-packages/fsspec/implementations/jupyter.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import io
3
+ import re
4
+
5
+ import requests
6
+
7
+ import fsspec
8
+
9
+
10
+ class JupyterFileSystem(fsspec.AbstractFileSystem):
11
+ """View of the files as seen by a Jupyter server (notebook or lab)"""
12
+
13
+ protocol = ("jupyter", "jlab")
14
+
15
+ def __init__(self, url, tok=None, **kwargs):
16
+ """
17
+
18
+ Parameters
19
+ ----------
20
+ url : str
21
+ Base URL of the server, like "http://127.0.0.1:8888". May include
22
+ token in the string, which is given by the process when starting up
23
+ tok : str
24
+ If the token is obtained separately, can be given here
25
+ kwargs
26
+ """
27
+ if "?" in url:
28
+ if tok is None:
29
+ try:
30
+ tok = re.findall("token=([a-z0-9]+)", url)[0]
31
+ except IndexError as e:
32
+ raise ValueError("Could not determine token") from e
33
+ url = url.split("?", 1)[0]
34
+ self.url = url.rstrip("/") + "/api/contents"
35
+ self.session = requests.Session()
36
+ if tok:
37
+ self.session.headers["Authorization"] = f"token {tok}"
38
+
39
+ super().__init__(**kwargs)
40
+
41
+ def ls(self, path, detail=True, **kwargs):
42
+ path = self._strip_protocol(path)
43
+ r = self.session.get(f"{self.url}/{path}")
44
+ if r.status_code == 404:
45
+ return FileNotFoundError(path)
46
+ r.raise_for_status()
47
+ out = r.json()
48
+
49
+ if out["type"] == "directory":
50
+ out = out["content"]
51
+ else:
52
+ out = [out]
53
+ for o in out:
54
+ o["name"] = o.pop("path")
55
+ o.pop("content")
56
+ if o["type"] == "notebook":
57
+ o["type"] = "file"
58
+ if detail:
59
+ return out
60
+ return [o["name"] for o in out]
61
+
62
+ def cat_file(self, path, start=None, end=None, **kwargs):
63
+ path = self._strip_protocol(path)
64
+ r = self.session.get(f"{self.url}/{path}")
65
+ if r.status_code == 404:
66
+ return FileNotFoundError(path)
67
+ r.raise_for_status()
68
+ out = r.json()
69
+ if out["format"] == "text":
70
+ # data should be binary
71
+ b = out["content"].encode()
72
+ else:
73
+ b = base64.b64decode(out["content"])
74
+ return b[start:end]
75
+
76
+ def pipe_file(self, path, value, **_):
77
+ path = self._strip_protocol(path)
78
+ json = {
79
+ "name": path.rsplit("/", 1)[-1],
80
+ "path": path,
81
+ "size": len(value),
82
+ "content": base64.b64encode(value).decode(),
83
+ "format": "base64",
84
+ "type": "file",
85
+ }
86
+ self.session.put(f"{self.url}/{path}", json=json)
87
+
88
+ def mkdir(self, path, create_parents=True, **kwargs):
89
+ path = self._strip_protocol(path)
90
+ if create_parents and "/" in path:
91
+ self.mkdir(path.rsplit("/", 1)[0], True)
92
+ json = {
93
+ "name": path.rsplit("/", 1)[-1],
94
+ "path": path,
95
+ "size": None,
96
+ "content": None,
97
+ "type": "directory",
98
+ }
99
+ self.session.put(f"{self.url}/{path}", json=json)
100
+
101
+ def _rm(self, path):
102
+ path = self._strip_protocol(path)
103
+ self.session.delete(f"{self.url}/{path}")
104
+
105
+ def _open(self, path, mode="rb", **kwargs):
106
+ path = self._strip_protocol(path)
107
+ if mode == "rb":
108
+ data = self.cat_file(path)
109
+ return io.BytesIO(data)
110
+ else:
111
+ return SimpleFileWriter(self, path, mode="wb")
112
+
113
+
114
+ class SimpleFileWriter(fsspec.spec.AbstractBufferedFile):
115
+ def _upload_chunk(self, final=False):
116
+ """Never uploads a chunk until file is done
117
+
118
+ Not suitable for large files
119
+ """
120
+ if final is False:
121
+ return False
122
+ self.buffer.seek(0)
123
+ data = self.buffer.read()
124
+ self.fs.pipe_file(self.path, data)
.venv/Lib/site-packages/fsspec/implementations/libarchive.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from contextlib import contextmanager
2
+ from ctypes import (
3
+ CFUNCTYPE,
4
+ POINTER,
5
+ c_int,
6
+ c_longlong,
7
+ c_void_p,
8
+ cast,
9
+ create_string_buffer,
10
+ )
11
+
12
+ import libarchive
13
+ import libarchive.ffi as ffi
14
+
15
+ from fsspec import open_files
16
+ from fsspec.archive import AbstractArchiveFileSystem
17
+ from fsspec.implementations.memory import MemoryFile
18
+ from fsspec.utils import DEFAULT_BLOCK_SIZE
19
+
20
+ # Libarchive requires seekable files or memory only for certain archive
21
+ # types. However, since we read the directory first to cache the contents
22
+ # and also allow random access to any file, the file-like object needs
23
+ # to be seekable no matter what.
24
+
25
+ # Seek call-backs (not provided in the libarchive python wrapper)
26
+ SEEK_CALLBACK = CFUNCTYPE(c_longlong, c_int, c_void_p, c_longlong, c_int)
27
+ read_set_seek_callback = ffi.ffi(
28
+ "read_set_seek_callback", [ffi.c_archive_p, SEEK_CALLBACK], c_int, ffi.check_int
29
+ )
30
+ new_api = hasattr(ffi, "NO_OPEN_CB")
31
+
32
+
33
+ @contextmanager
34
+ def custom_reader(file, format_name="all", filter_name="all", block_size=ffi.page_size):
35
+ """Read an archive from a seekable file-like object.
36
+
37
+ The `file` object must support the standard `readinto` and 'seek' methods.
38
+ """
39
+ buf = create_string_buffer(block_size)
40
+ buf_p = cast(buf, c_void_p)
41
+
42
+ def read_func(archive_p, context, ptrptr):
43
+ # readinto the buffer, returns number of bytes read
44
+ length = file.readinto(buf)
45
+ # write the address of the buffer into the pointer
46
+ ptrptr = cast(ptrptr, POINTER(c_void_p))
47
+ ptrptr[0] = buf_p
48
+ # tell libarchive how much data was written into the buffer
49
+ return length
50
+
51
+ def seek_func(archive_p, context, offset, whence):
52
+ file.seek(offset, whence)
53
+ # tell libarchvie the current position
54
+ return file.tell()
55
+
56
+ read_cb = ffi.READ_CALLBACK(read_func)
57
+ seek_cb = SEEK_CALLBACK(seek_func)
58
+
59
+ if new_api:
60
+ open_cb = ffi.NO_OPEN_CB
61
+ close_cb = ffi.NO_CLOSE_CB
62
+ else:
63
+ open_cb = libarchive.read.OPEN_CALLBACK(ffi.VOID_CB)
64
+ close_cb = libarchive.read.CLOSE_CALLBACK(ffi.VOID_CB)
65
+
66
+ with libarchive.read.new_archive_read(format_name, filter_name) as archive_p:
67
+ read_set_seek_callback(archive_p, seek_cb)
68
+ ffi.read_open(archive_p, None, open_cb, read_cb, close_cb)
69
+ yield libarchive.read.ArchiveRead(archive_p)
70
+
71
+
72
+ class LibArchiveFileSystem(AbstractArchiveFileSystem):
73
+ """Compressed archives as a file-system (read-only)
74
+
75
+ Supports the following formats:
76
+ tar, pax , cpio, ISO9660, zip, mtree, shar, ar, raw, xar, lha/lzh, rar
77
+ Microsoft CAB, 7-Zip, WARC
78
+
79
+ See the libarchive documentation for further restrictions.
80
+ https://www.libarchive.org/
81
+
82
+ Keeps file object open while instance lives. It only works in seekable
83
+ file-like objects. In case the filesystem does not support this kind of
84
+ file object, it is recommended to cache locally.
85
+
86
+ This class is pickleable, but not necessarily thread-safe (depends on the
87
+ platform). See libarchive documentation for details.
88
+ """
89
+
90
+ root_marker = ""
91
+ protocol = "libarchive"
92
+ cachable = False
93
+
94
+ def __init__(
95
+ self,
96
+ fo="",
97
+ mode="r",
98
+ target_protocol=None,
99
+ target_options=None,
100
+ block_size=DEFAULT_BLOCK_SIZE,
101
+ **kwargs,
102
+ ):
103
+ """
104
+ Parameters
105
+ ----------
106
+ fo: str or file-like
107
+ Contains ZIP, and must exist. If a str, will fetch file using
108
+ :meth:`~fsspec.open_files`, which must return one file exactly.
109
+ mode: str
110
+ Currently, only 'r' accepted
111
+ target_protocol: str (optional)
112
+ If ``fo`` is a string, this value can be used to override the
113
+ FS protocol inferred from a URL
114
+ target_options: dict (optional)
115
+ Kwargs passed when instantiating the target FS, if ``fo`` is
116
+ a string.
117
+ """
118
+ super().__init__(self, **kwargs)
119
+ if mode != "r":
120
+ raise ValueError("Only read from archive files accepted")
121
+ if isinstance(fo, str):
122
+ files = open_files(fo, protocol=target_protocol, **(target_options or {}))
123
+ if len(files) != 1:
124
+ raise ValueError(
125
+ f'Path "{fo}" did not resolve to exactly one file: "{files}"'
126
+ )
127
+ fo = files[0]
128
+ self.of = fo
129
+ self.fo = fo.__enter__() # the whole instance is a context
130
+ self.block_size = block_size
131
+ self.dir_cache = None
132
+
133
+ @contextmanager
134
+ def _open_archive(self):
135
+ self.fo.seek(0)
136
+ with custom_reader(self.fo, block_size=self.block_size) as arc:
137
+ yield arc
138
+
139
+ @classmethod
140
+ def _strip_protocol(cls, path):
141
+ # file paths are always relative to the archive root
142
+ return super()._strip_protocol(path).lstrip("/")
143
+
144
+ def _get_dirs(self):
145
+ fields = {
146
+ "name": "pathname",
147
+ "size": "size",
148
+ "created": "ctime",
149
+ "mode": "mode",
150
+ "uid": "uid",
151
+ "gid": "gid",
152
+ "mtime": "mtime",
153
+ }
154
+
155
+ if self.dir_cache is not None:
156
+ return
157
+
158
+ self.dir_cache = {}
159
+ list_names = []
160
+ with self._open_archive() as arc:
161
+ for entry in arc:
162
+ if not entry.isdir and not entry.isfile:
163
+ # Skip symbolic links, fifo entries, etc.
164
+ continue
165
+ self.dir_cache.update(
166
+ {
167
+ dirname: {"name": dirname, "size": 0, "type": "directory"}
168
+ for dirname in self._all_dirnames(set(entry.name))
169
+ }
170
+ )
171
+ f = {key: getattr(entry, fields[key]) for key in fields}
172
+ f["type"] = "directory" if entry.isdir else "file"
173
+ list_names.append(entry.name)
174
+
175
+ self.dir_cache[f["name"]] = f
176
+ # libarchive does not seem to return an entry for the directories (at least
177
+ # not in all formats), so get the directories names from the files names
178
+ self.dir_cache.update(
179
+ {
180
+ dirname: {"name": dirname, "size": 0, "type": "directory"}
181
+ for dirname in self._all_dirnames(list_names)
182
+ }
183
+ )
184
+
185
+ def _open(
186
+ self,
187
+ path,
188
+ mode="rb",
189
+ block_size=None,
190
+ autocommit=True,
191
+ cache_options=None,
192
+ **kwargs,
193
+ ):
194
+ path = self._strip_protocol(path)
195
+ if mode != "rb":
196
+ raise NotImplementedError
197
+
198
+ data = bytes()
199
+ with self._open_archive() as arc:
200
+ for entry in arc:
201
+ if entry.pathname != path:
202
+ continue
203
+
204
+ if entry.size == 0:
205
+ # empty file, so there are no blocks
206
+ break
207
+
208
+ for block in entry.get_blocks(entry.size):
209
+ data = block
210
+ break
211
+ else:
212
+ raise ValueError
213
+ return MemoryFile(fs=self, path=path, data=data)
.venv/Lib/site-packages/fsspec/implementations/local.py ADDED
@@ -0,0 +1,418 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import io
3
+ import logging
4
+ import os
5
+ import os.path as osp
6
+ import re
7
+ import shutil
8
+ import stat
9
+ import tempfile
10
+
11
+ from fsspec import AbstractFileSystem
12
+ from fsspec.compression import compr
13
+ from fsspec.core import get_compression
14
+ from fsspec.utils import isfilelike, stringify_path
15
+
16
+ logger = logging.getLogger("fsspec.local")
17
+
18
+
19
+ class LocalFileSystem(AbstractFileSystem):
20
+ """Interface to files on local storage
21
+
22
+ Parameters
23
+ ----------
24
+ auto_mkdir: bool
25
+ Whether, when opening a file, the directory containing it should
26
+ be created (if it doesn't already exist). This is assumed by pyarrow
27
+ code.
28
+ """
29
+
30
+ root_marker = "/"
31
+ protocol = "file", "local"
32
+ local_file = True
33
+
34
+ def __init__(self, auto_mkdir=False, **kwargs):
35
+ super().__init__(**kwargs)
36
+ self.auto_mkdir = auto_mkdir
37
+
38
+ @property
39
+ def fsid(self):
40
+ return "local"
41
+
42
+ def mkdir(self, path, create_parents=True, **kwargs):
43
+ path = self._strip_protocol(path)
44
+ if self.exists(path):
45
+ raise FileExistsError(path)
46
+ if create_parents:
47
+ self.makedirs(path, exist_ok=True)
48
+ else:
49
+ os.mkdir(path, **kwargs)
50
+
51
+ def makedirs(self, path, exist_ok=False):
52
+ path = self._strip_protocol(path)
53
+ os.makedirs(path, exist_ok=exist_ok)
54
+
55
+ def rmdir(self, path):
56
+ path = self._strip_protocol(path)
57
+ os.rmdir(path)
58
+
59
+ def ls(self, path, detail=False, **kwargs):
60
+ path = self._strip_protocol(path)
61
+ info = self.info(path)
62
+ if info["type"] == "directory":
63
+ with os.scandir(path) as it:
64
+ infos = [self.info(f) for f in it]
65
+ else:
66
+ infos = [info]
67
+
68
+ if not detail:
69
+ return [i["name"] for i in infos]
70
+ return infos
71
+
72
+ def info(self, path, **kwargs):
73
+ if isinstance(path, os.DirEntry):
74
+ # scandir DirEntry
75
+ out = path.stat(follow_symlinks=False)
76
+ link = path.is_symlink()
77
+ if path.is_dir(follow_symlinks=False):
78
+ t = "directory"
79
+ elif path.is_file(follow_symlinks=False):
80
+ t = "file"
81
+ else:
82
+ t = "other"
83
+ path = self._strip_protocol(path.path)
84
+ else:
85
+ # str or path-like
86
+ path = self._strip_protocol(path)
87
+ out = os.stat(path, follow_symlinks=False)
88
+ link = stat.S_ISLNK(out.st_mode)
89
+ if link:
90
+ out = os.stat(path, follow_symlinks=True)
91
+ if stat.S_ISDIR(out.st_mode):
92
+ t = "directory"
93
+ elif stat.S_ISREG(out.st_mode):
94
+ t = "file"
95
+ else:
96
+ t = "other"
97
+ result = {
98
+ "name": path,
99
+ "size": out.st_size,
100
+ "type": t,
101
+ "created": out.st_ctime,
102
+ "islink": link,
103
+ }
104
+ for field in ["mode", "uid", "gid", "mtime", "ino", "nlink"]:
105
+ result[field] = getattr(out, f"st_{field}")
106
+ if result["islink"]:
107
+ result["destination"] = os.readlink(path)
108
+ try:
109
+ out2 = os.stat(path, follow_symlinks=True)
110
+ result["size"] = out2.st_size
111
+ except OSError:
112
+ result["size"] = 0
113
+ return result
114
+
115
+ def lexists(self, path, **kwargs):
116
+ return osp.lexists(path)
117
+
118
+ def cp_file(self, path1, path2, **kwargs):
119
+ path1 = self._strip_protocol(path1).rstrip("/")
120
+ path2 = self._strip_protocol(path2).rstrip("/")
121
+ if self.auto_mkdir:
122
+ self.makedirs(self._parent(path2), exist_ok=True)
123
+ if self.isfile(path1):
124
+ shutil.copyfile(path1, path2)
125
+ elif self.isdir(path1):
126
+ self.mkdirs(path2, exist_ok=True)
127
+ else:
128
+ raise FileNotFoundError(path1)
129
+
130
+ def get_file(self, path1, path2, callback=None, **kwargs):
131
+ if isfilelike(path2):
132
+ with open(path1, "rb") as f:
133
+ shutil.copyfileobj(f, path2)
134
+ else:
135
+ return self.cp_file(path1, path2, **kwargs)
136
+
137
+ def put_file(self, path1, path2, callback=None, **kwargs):
138
+ return self.cp_file(path1, path2, **kwargs)
139
+
140
+ def mv_file(self, path1, path2, **kwargs):
141
+ path1 = self._strip_protocol(path1).rstrip("/")
142
+ path2 = self._strip_protocol(path2).rstrip("/")
143
+ shutil.move(path1, path2)
144
+
145
+ def link(self, src, dst, **kwargs):
146
+ src = self._strip_protocol(src)
147
+ dst = self._strip_protocol(dst)
148
+ os.link(src, dst, **kwargs)
149
+
150
+ def symlink(self, src, dst, **kwargs):
151
+ src = self._strip_protocol(src)
152
+ dst = self._strip_protocol(dst)
153
+ os.symlink(src, dst, **kwargs)
154
+
155
+ def islink(self, path) -> bool:
156
+ return os.path.islink(self._strip_protocol(path))
157
+
158
+ def rm_file(self, path):
159
+ os.remove(self._strip_protocol(path))
160
+
161
+ def rm(self, path, recursive=False, maxdepth=None):
162
+ if not isinstance(path, list):
163
+ path = [path]
164
+
165
+ for p in path:
166
+ p = self._strip_protocol(p).rstrip("/")
167
+ if self.isdir(p):
168
+ if not recursive:
169
+ raise ValueError("Cannot delete directory, set recursive=True")
170
+ if osp.abspath(p) == os.getcwd():
171
+ raise ValueError("Cannot delete current working directory")
172
+ shutil.rmtree(p)
173
+ else:
174
+ os.remove(p)
175
+
176
+ def unstrip_protocol(self, name):
177
+ name = self._strip_protocol(name) # normalise for local/win/...
178
+ return f"file://{name}"
179
+
180
+ def _open(self, path, mode="rb", block_size=None, **kwargs):
181
+ path = self._strip_protocol(path)
182
+ if self.auto_mkdir and "w" in mode:
183
+ self.makedirs(self._parent(path), exist_ok=True)
184
+ return LocalFileOpener(path, mode, fs=self, **kwargs)
185
+
186
+ def touch(self, path, truncate=True, **kwargs):
187
+ path = self._strip_protocol(path)
188
+ if self.auto_mkdir:
189
+ self.makedirs(self._parent(path), exist_ok=True)
190
+ if self.exists(path):
191
+ os.utime(path, None)
192
+ else:
193
+ open(path, "a").close()
194
+ if truncate:
195
+ os.truncate(path, 0)
196
+
197
+ def created(self, path):
198
+ info = self.info(path=path)
199
+ return datetime.datetime.fromtimestamp(
200
+ info["created"], tz=datetime.timezone.utc
201
+ )
202
+
203
+ def modified(self, path):
204
+ info = self.info(path=path)
205
+ return datetime.datetime.fromtimestamp(info["mtime"], tz=datetime.timezone.utc)
206
+
207
+ @classmethod
208
+ def _parent(cls, path):
209
+ path = cls._strip_protocol(path).rstrip("/")
210
+ if "/" in path:
211
+ return path.rsplit("/", 1)[0]
212
+ else:
213
+ return cls.root_marker
214
+
215
+ @classmethod
216
+ def _strip_protocol(cls, path):
217
+ path = stringify_path(path)
218
+ if path.startswith("file://"):
219
+ path = path[7:]
220
+ elif path.startswith("file:"):
221
+ path = path[5:]
222
+ elif path.startswith("local://"):
223
+ path = path[8:]
224
+ elif path.startswith("local:"):
225
+ path = path[6:]
226
+ return make_path_posix(path).rstrip("/") or cls.root_marker
227
+
228
+ def _isfilestore(self):
229
+ # Inheriting from DaskFileSystem makes this False (S3, etc. were)
230
+ # the original motivation. But we are a posix-like file system.
231
+ # See https://github.com/dask/dask/issues/5526
232
+ return True
233
+
234
+ def chmod(self, path, mode):
235
+ path = stringify_path(path)
236
+ return os.chmod(path, mode)
237
+
238
+
239
+ def make_path_posix(path, sep=os.sep):
240
+ """Make path generic"""
241
+ if isinstance(path, (list, set, tuple)):
242
+ return type(path)(make_path_posix(p) for p in path)
243
+ if "~" in path:
244
+ path = osp.expanduser(path)
245
+ if sep == "/":
246
+ # most common fast case for posix
247
+ if path.startswith("/"):
248
+ return path
249
+ if path.startswith("./"):
250
+ path = path[2:]
251
+ return f"{os.getcwd()}/{path}"
252
+ if (
253
+ (sep not in path and "/" not in path)
254
+ or (sep == "/" and not path.startswith("/"))
255
+ or (sep == "\\" and ":" not in path and not path.startswith("\\\\"))
256
+ ):
257
+ # relative path like "path" or "rel\\path" (win) or rel/path"
258
+ if os.sep == "\\":
259
+ # abspath made some more '\\' separators
260
+ return make_path_posix(osp.abspath(path))
261
+ else:
262
+ return f"{os.getcwd()}/{path}"
263
+ if path.startswith("file://"):
264
+ path = path[7:]
265
+ if re.match("/[A-Za-z]:", path):
266
+ # for windows file URI like "file:///C:/folder/file"
267
+ # or "file:///C:\\dir\\file"
268
+ path = path[1:].replace("\\", "/").replace("//", "/")
269
+ if path.startswith("\\\\"):
270
+ # special case for windows UNC/DFS-style paths, do nothing,
271
+ # just flip the slashes around (case below does not work!)
272
+ return path.replace("\\", "/")
273
+ if re.match("[A-Za-z]:", path):
274
+ # windows full path like "C:\\local\\path"
275
+ return path.lstrip("\\").replace("\\", "/").replace("//", "/")
276
+ if path.startswith("\\"):
277
+ # windows network path like "\\server\\path"
278
+ return "/" + path.lstrip("\\").replace("\\", "/").replace("//", "/")
279
+ return path
280
+
281
+
282
+ def trailing_sep(path):
283
+ """Return True if the path ends with a path separator.
284
+
285
+ A forward slash is always considered a path separator, even on Operating
286
+ Systems that normally use a backslash.
287
+ """
288
+ # TODO: if all incoming paths were posix-compliant then separator would
289
+ # always be a forward slash, simplifying this function.
290
+ # See https://github.com/fsspec/filesystem_spec/pull/1250
291
+ return path.endswith(os.sep) or (os.altsep is not None and path.endswith(os.altsep))
292
+
293
+
294
+ class LocalFileOpener(io.IOBase):
295
+ def __init__(
296
+ self, path, mode, autocommit=True, fs=None, compression=None, **kwargs
297
+ ):
298
+ logger.debug("open file: %s", path)
299
+ self.path = path
300
+ self.mode = mode
301
+ self.fs = fs
302
+ self.f = None
303
+ self.autocommit = autocommit
304
+ self.compression = get_compression(path, compression)
305
+ self.blocksize = io.DEFAULT_BUFFER_SIZE
306
+ self._open()
307
+
308
+ def _open(self):
309
+ if self.f is None or self.f.closed:
310
+ if self.autocommit or "w" not in self.mode:
311
+ self.f = open(self.path, mode=self.mode)
312
+ if self.compression:
313
+ compress = compr[self.compression]
314
+ self.f = compress(self.f, mode=self.mode)
315
+ else:
316
+ # TODO: check if path is writable?
317
+ i, name = tempfile.mkstemp()
318
+ os.close(i) # we want normal open and normal buffered file
319
+ self.temp = name
320
+ self.f = open(name, mode=self.mode)
321
+ if "w" not in self.mode:
322
+ self.size = self.f.seek(0, 2)
323
+ self.f.seek(0)
324
+ self.f.size = self.size
325
+
326
+ def _fetch_range(self, start, end):
327
+ # probably only used by cached FS
328
+ if "r" not in self.mode:
329
+ raise ValueError
330
+ self._open()
331
+ self.f.seek(start)
332
+ return self.f.read(end - start)
333
+
334
+ def __setstate__(self, state):
335
+ self.f = None
336
+ loc = state.pop("loc", None)
337
+ self.__dict__.update(state)
338
+ if "r" in state["mode"]:
339
+ self.f = None
340
+ self._open()
341
+ self.f.seek(loc)
342
+
343
+ def __getstate__(self):
344
+ d = self.__dict__.copy()
345
+ d.pop("f")
346
+ if "r" in self.mode:
347
+ d["loc"] = self.f.tell()
348
+ else:
349
+ if not self.f.closed:
350
+ raise ValueError("Cannot serialise open write-mode local file")
351
+ return d
352
+
353
+ def commit(self):
354
+ if self.autocommit:
355
+ raise RuntimeError("Can only commit if not already set to autocommit")
356
+ shutil.move(self.temp, self.path)
357
+
358
+ def discard(self):
359
+ if self.autocommit:
360
+ raise RuntimeError("Cannot discard if set to autocommit")
361
+ os.remove(self.temp)
362
+
363
+ def readable(self) -> bool:
364
+ return True
365
+
366
+ def writable(self) -> bool:
367
+ return "r" not in self.mode
368
+
369
+ def read(self, *args, **kwargs):
370
+ return self.f.read(*args, **kwargs)
371
+
372
+ def write(self, *args, **kwargs):
373
+ return self.f.write(*args, **kwargs)
374
+
375
+ def tell(self, *args, **kwargs):
376
+ return self.f.tell(*args, **kwargs)
377
+
378
+ def seek(self, *args, **kwargs):
379
+ return self.f.seek(*args, **kwargs)
380
+
381
+ def seekable(self, *args, **kwargs):
382
+ return self.f.seekable(*args, **kwargs)
383
+
384
+ def readline(self, *args, **kwargs):
385
+ return self.f.readline(*args, **kwargs)
386
+
387
+ def readlines(self, *args, **kwargs):
388
+ return self.f.readlines(*args, **kwargs)
389
+
390
+ def close(self):
391
+ return self.f.close()
392
+
393
+ def truncate(self, size=None) -> int:
394
+ return self.f.truncate(size)
395
+
396
+ @property
397
+ def closed(self):
398
+ return self.f.closed
399
+
400
+ def fileno(self):
401
+ return self.raw.fileno()
402
+
403
+ def flush(self) -> None:
404
+ self.f.flush()
405
+
406
+ def __iter__(self):
407
+ return self.f.__iter__()
408
+
409
+ def __getattr__(self, item):
410
+ return getattr(self.f, item)
411
+
412
+ def __enter__(self):
413
+ self._incontext = True
414
+ return self
415
+
416
+ def __exit__(self, exc_type, exc_value, traceback):
417
+ self._incontext = False
418
+ self.f.__exit__(exc_type, exc_value, traceback)
.venv/Lib/site-packages/fsspec/implementations/memory.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from datetime import datetime, timezone
5
+ from errno import ENOTEMPTY
6
+ from io import BytesIO
7
+ from typing import Any, ClassVar
8
+
9
+ from fsspec import AbstractFileSystem
10
+
11
+ logger = logging.getLogger("fsspec.memoryfs")
12
+
13
+
14
+ class MemoryFileSystem(AbstractFileSystem):
15
+ """A filesystem based on a dict of BytesIO objects
16
+
17
+ This is a global filesystem so instances of this class all point to the same
18
+ in memory filesystem.
19
+ """
20
+
21
+ store: ClassVar[dict[str, Any]] = {} # global, do not overwrite!
22
+ pseudo_dirs = [""] # global, do not overwrite!
23
+ protocol = "memory"
24
+ root_marker = "/"
25
+
26
+ @classmethod
27
+ def _strip_protocol(cls, path):
28
+ if path.startswith("memory://"):
29
+ path = path[len("memory://") :]
30
+ if "::" in path or "://" in path:
31
+ return path.rstrip("/")
32
+ path = path.lstrip("/").rstrip("/")
33
+ return "/" + path if path else ""
34
+
35
+ def ls(self, path, detail=True, **kwargs):
36
+ path = self._strip_protocol(path)
37
+ if path in self.store:
38
+ # there is a key with this exact name
39
+ if not detail:
40
+ return [path]
41
+ return [
42
+ {
43
+ "name": path,
44
+ "size": self.store[path].size,
45
+ "type": "file",
46
+ "created": self.store[path].created.timestamp(),
47
+ }
48
+ ]
49
+ paths = set()
50
+ starter = path + "/"
51
+ out = []
52
+ for p2 in tuple(self.store):
53
+ if p2.startswith(starter):
54
+ if "/" not in p2[len(starter) :]:
55
+ # exact child
56
+ out.append(
57
+ {
58
+ "name": p2,
59
+ "size": self.store[p2].size,
60
+ "type": "file",
61
+ "created": self.store[p2].created.timestamp(),
62
+ }
63
+ )
64
+ elif len(p2) > len(starter):
65
+ # implied child directory
66
+ ppath = starter + p2[len(starter) :].split("/", 1)[0]
67
+ if ppath not in paths:
68
+ out = out or []
69
+ out.append(
70
+ {
71
+ "name": ppath,
72
+ "size": 0,
73
+ "type": "directory",
74
+ }
75
+ )
76
+ paths.add(ppath)
77
+ for p2 in self.pseudo_dirs:
78
+ if p2.startswith(starter):
79
+ if "/" not in p2[len(starter) :]:
80
+ # exact child pdir
81
+ if p2 not in paths:
82
+ out.append({"name": p2, "size": 0, "type": "directory"})
83
+ paths.add(p2)
84
+ else:
85
+ # directory implied by deeper pdir
86
+ ppath = starter + p2[len(starter) :].split("/", 1)[0]
87
+ if ppath not in paths:
88
+ out.append({"name": ppath, "size": 0, "type": "directory"})
89
+ paths.add(ppath)
90
+ if not out:
91
+ if path in self.pseudo_dirs:
92
+ # empty dir
93
+ return []
94
+ raise FileNotFoundError(path)
95
+ if detail:
96
+ return out
97
+ return sorted([f["name"] for f in out])
98
+
99
+ def mkdir(self, path, create_parents=True, **kwargs):
100
+ path = self._strip_protocol(path)
101
+ if path in self.store or path in self.pseudo_dirs:
102
+ raise FileExistsError(path)
103
+ if self._parent(path).strip("/") and self.isfile(self._parent(path)):
104
+ raise NotADirectoryError(self._parent(path))
105
+ if create_parents and self._parent(path).strip("/"):
106
+ try:
107
+ self.mkdir(self._parent(path), create_parents, **kwargs)
108
+ except FileExistsError:
109
+ pass
110
+ if path and path not in self.pseudo_dirs:
111
+ self.pseudo_dirs.append(path)
112
+
113
+ def makedirs(self, path, exist_ok=False):
114
+ try:
115
+ self.mkdir(path, create_parents=True)
116
+ except FileExistsError:
117
+ if not exist_ok:
118
+ raise
119
+
120
+ def pipe_file(self, path, value, **kwargs):
121
+ """Set the bytes of given file
122
+
123
+ Avoids copies of the data if possible
124
+ """
125
+ self.open(path, "wb", data=value)
126
+
127
+ def rmdir(self, path):
128
+ path = self._strip_protocol(path)
129
+ if path == "":
130
+ # silently avoid deleting FS root
131
+ return
132
+ if path in self.pseudo_dirs:
133
+ if not self.ls(path):
134
+ self.pseudo_dirs.remove(path)
135
+ else:
136
+ raise OSError(ENOTEMPTY, "Directory not empty", path)
137
+ else:
138
+ raise FileNotFoundError(path)
139
+
140
+ def info(self, path, **kwargs):
141
+ path = self._strip_protocol(path)
142
+ if path in self.pseudo_dirs or any(
143
+ p.startswith(path + "/") for p in list(self.store) + self.pseudo_dirs
144
+ ):
145
+ return {
146
+ "name": path,
147
+ "size": 0,
148
+ "type": "directory",
149
+ }
150
+ elif path in self.store:
151
+ filelike = self.store[path]
152
+ return {
153
+ "name": path,
154
+ "size": filelike.size,
155
+ "type": "file",
156
+ "created": getattr(filelike, "created", None),
157
+ }
158
+ else:
159
+ raise FileNotFoundError(path)
160
+
161
+ def _open(
162
+ self,
163
+ path,
164
+ mode="rb",
165
+ block_size=None,
166
+ autocommit=True,
167
+ cache_options=None,
168
+ **kwargs,
169
+ ):
170
+ path = self._strip_protocol(path)
171
+ if path in self.pseudo_dirs:
172
+ raise IsADirectoryError(path)
173
+ parent = path
174
+ while len(parent) > 1:
175
+ parent = self._parent(parent)
176
+ if self.isfile(parent):
177
+ raise FileExistsError(parent)
178
+ if mode in ["rb", "ab", "r+b"]:
179
+ if path in self.store:
180
+ f = self.store[path]
181
+ if mode == "ab":
182
+ # position at the end of file
183
+ f.seek(0, 2)
184
+ else:
185
+ # position at the beginning of file
186
+ f.seek(0)
187
+ return f
188
+ else:
189
+ raise FileNotFoundError(path)
190
+ elif mode == "wb":
191
+ m = MemoryFile(self, path, kwargs.get("data"))
192
+ if not self._intrans:
193
+ m.commit()
194
+ return m
195
+ else:
196
+ name = self.__class__.__name__
197
+ raise ValueError(f"unsupported file mode for {name}: {mode!r}")
198
+
199
+ def cp_file(self, path1, path2, **kwargs):
200
+ path1 = self._strip_protocol(path1)
201
+ path2 = self._strip_protocol(path2)
202
+ if self.isfile(path1):
203
+ self.store[path2] = MemoryFile(
204
+ self, path2, self.store[path1].getvalue()
205
+ ) # implicit copy
206
+ elif self.isdir(path1):
207
+ if path2 not in self.pseudo_dirs:
208
+ self.pseudo_dirs.append(path2)
209
+ else:
210
+ raise FileNotFoundError(path1)
211
+
212
+ def cat_file(self, path, start=None, end=None, **kwargs):
213
+ path = self._strip_protocol(path)
214
+ try:
215
+ return bytes(self.store[path].getbuffer()[start:end])
216
+ except KeyError:
217
+ raise FileNotFoundError(path)
218
+
219
+ def _rm(self, path):
220
+ path = self._strip_protocol(path)
221
+ try:
222
+ del self.store[path]
223
+ except KeyError as e:
224
+ raise FileNotFoundError(path) from e
225
+
226
+ def modified(self, path):
227
+ path = self._strip_protocol(path)
228
+ try:
229
+ return self.store[path].modified
230
+ except KeyError:
231
+ raise FileNotFoundError(path)
232
+
233
+ def created(self, path):
234
+ path = self._strip_protocol(path)
235
+ try:
236
+ return self.store[path].created
237
+ except KeyError:
238
+ raise FileNotFoundError(path)
239
+
240
+ def rm(self, path, recursive=False, maxdepth=None):
241
+ if isinstance(path, str):
242
+ path = self._strip_protocol(path)
243
+ else:
244
+ path = [self._strip_protocol(p) for p in path]
245
+ paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
246
+ for p in reversed(paths):
247
+ # If the expanded path doesn't exist, it is only because the expanded
248
+ # path was a directory that does not exist in self.pseudo_dirs. This
249
+ # is possible if you directly create files without making the
250
+ # directories first.
251
+ if not self.exists(p):
252
+ continue
253
+ if self.isfile(p):
254
+ self.rm_file(p)
255
+ else:
256
+ self.rmdir(p)
257
+
258
+
259
+ class MemoryFile(BytesIO):
260
+ """A BytesIO which can't close and works as a context manager
261
+
262
+ Can initialise with data. Each path should only be active once at any moment.
263
+
264
+ No need to provide fs, path if auto-committing (default)
265
+ """
266
+
267
+ def __init__(self, fs=None, path=None, data=None):
268
+ logger.debug("open file %s", path)
269
+ self.fs = fs
270
+ self.path = path
271
+ self.created = datetime.now(tz=timezone.utc)
272
+ self.modified = datetime.now(tz=timezone.utc)
273
+ if data:
274
+ super().__init__(data)
275
+ self.seek(0)
276
+
277
+ @property
278
+ def size(self):
279
+ return self.getbuffer().nbytes
280
+
281
+ def __enter__(self):
282
+ return self
283
+
284
+ def close(self):
285
+ pass
286
+
287
+ def discard(self):
288
+ pass
289
+
290
+ def commit(self):
291
+ self.fs.store[self.path] = self
292
+ self.modified = datetime.now(tz=timezone.utc)
.venv/Lib/site-packages/fsspec/implementations/reference.py ADDED
@@ -0,0 +1,1160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import collections
3
+ import io
4
+ import itertools
5
+ import logging
6
+ import math
7
+ import os
8
+ from functools import lru_cache
9
+ from typing import TYPE_CHECKING
10
+
11
+ import fsspec.core
12
+
13
+ try:
14
+ import ujson as json
15
+ except ImportError:
16
+ if not TYPE_CHECKING:
17
+ import json
18
+
19
+ from ..asyn import AsyncFileSystem
20
+ from ..callbacks import DEFAULT_CALLBACK
21
+ from ..core import filesystem, open, split_protocol
22
+ from ..utils import isfilelike, merge_offset_ranges, other_paths
23
+
24
+ logger = logging.getLogger("fsspec.reference")
25
+
26
+
27
+ class ReferenceNotReachable(RuntimeError):
28
+ def __init__(self, reference, target, *args):
29
+ super().__init__(*args)
30
+ self.reference = reference
31
+ self.target = target
32
+
33
+ def __str__(self):
34
+ return f'Reference "{self.reference}" failed to fetch target {self.target}'
35
+
36
+
37
+ def _first(d):
38
+ return list(d.values())[0]
39
+
40
+
41
+ def _prot_in_references(path, references):
42
+ ref = references.get(path)
43
+ if isinstance(ref, (list, tuple)):
44
+ return split_protocol(ref[0])[0] if ref[0] else ref[0]
45
+
46
+
47
+ def _protocol_groups(paths, references):
48
+ if isinstance(paths, str):
49
+ return {_prot_in_references(paths, references): [paths]}
50
+ out = {}
51
+ for path in paths:
52
+ protocol = _prot_in_references(path, references)
53
+ out.setdefault(protocol, []).append(path)
54
+ return out
55
+
56
+
57
+ class RefsValuesView(collections.abc.ValuesView):
58
+ def __iter__(self):
59
+ for val in self._mapping.zmetadata.values():
60
+ yield json.dumps(val).encode()
61
+ yield from self._mapping._items.values()
62
+ for field in self._mapping.listdir():
63
+ chunk_sizes = self._mapping._get_chunk_sizes(field)
64
+ if len(chunk_sizes) == 0:
65
+ yield self._mapping[field + "/0"]
66
+ continue
67
+ yield from self._mapping._generate_all_records(field)
68
+
69
+
70
+ class RefsItemsView(collections.abc.ItemsView):
71
+ def __iter__(self):
72
+ return zip(self._mapping.keys(), self._mapping.values())
73
+
74
+
75
+ def ravel_multi_index(idx, sizes):
76
+ val = 0
77
+ mult = 1
78
+ for i, s in zip(idx[::-1], sizes[::-1]):
79
+ val += i * mult
80
+ mult *= s
81
+ return val
82
+
83
+
84
+ class LazyReferenceMapper(collections.abc.MutableMapping):
85
+ """This interface can be used to read/write references from Parquet stores.
86
+ It is not intended for other types of references.
87
+ It can be used with Kerchunk's MultiZarrToZarr method to combine
88
+ references into a parquet store.
89
+ Examples of this use-case can be found here:
90
+ https://fsspec.github.io/kerchunk/advanced.html?highlight=parquet#parquet-storage"""
91
+
92
+ # import is class level to prevent numpy dep requirement for fsspec
93
+ @property
94
+ def np(self):
95
+ import numpy as np
96
+
97
+ return np
98
+
99
+ @property
100
+ def pd(self):
101
+ import pandas as pd
102
+
103
+ return pd
104
+
105
+ def __init__(
106
+ self, root, fs=None, out_root=None, cache_size=128, categorical_threshold=10
107
+ ):
108
+ """
109
+
110
+ This instance will be writable, storing changes in memory until full partitions
111
+ are accumulated or .flush() is called.
112
+
113
+ To create an empty lazy store, use .create()
114
+
115
+ Parameters
116
+ ----------
117
+ root : str
118
+ Root of parquet store
119
+ fs : fsspec.AbstractFileSystem
120
+ fsspec filesystem object, default is local filesystem.
121
+ cache_size : int, default=128
122
+ Maximum size of LRU cache, where cache_size*record_size denotes
123
+ the total number of references that can be loaded in memory at once.
124
+ categorical_threshold : int
125
+ Encode urls as pandas.Categorical to reduce memory footprint if the ratio
126
+ of the number of unique urls to total number of refs for each variable
127
+ is greater than or equal to this number. (default 10)
128
+ """
129
+ self.root = root
130
+ self.chunk_sizes = {}
131
+ self.out_root = out_root or self.root
132
+ self.cat_thresh = categorical_threshold
133
+ self.cache_size = cache_size
134
+ self.dirs = None
135
+ self.url = self.root + "/{field}/refs.{record}.parq"
136
+ # TODO: derive fs from `root`
137
+ self.fs = fsspec.filesystem("file") if fs is None else fs
138
+
139
+ def __getattr__(self, item):
140
+ if item in ("_items", "record_size", "zmetadata"):
141
+ self.setup()
142
+ # avoid possible recursion if setup fails somehow
143
+ return self.__dict__[item]
144
+ raise AttributeError(item)
145
+
146
+ def setup(self):
147
+ self._items = {}
148
+ self._items[".zmetadata"] = self.fs.cat_file(
149
+ "/".join([self.root, ".zmetadata"])
150
+ )
151
+ met = json.loads(self._items[".zmetadata"])
152
+ self.record_size = met["record_size"]
153
+ self.zmetadata = met["metadata"]
154
+
155
+ # Define function to open and decompress refs
156
+ @lru_cache(maxsize=self.cache_size)
157
+ def open_refs(field, record):
158
+ """cached parquet file loader"""
159
+ path = self.url.format(field=field, record=record)
160
+ data = io.BytesIO(self.fs.cat_file(path))
161
+ df = self.pd.read_parquet(data, engine="fastparquet")
162
+ refs = {c: df[c].values for c in df.columns}
163
+ return refs
164
+
165
+ self.open_refs = open_refs
166
+
167
+ @staticmethod
168
+ def create(root, storage_options=None, fs=None, record_size=10000, **kwargs):
169
+ """Make empty parquet reference set
170
+
171
+ First deletes the contents of the given directory, if it exists.
172
+
173
+ Parameters
174
+ ----------
175
+ root: str
176
+ Directory to contain the output; will be created
177
+ storage_options: dict | None
178
+ For making the filesystem to use for writing is fs is None
179
+ fs: FileSystem | None
180
+ Filesystem for writing
181
+ record_size: int
182
+ Number of references per parquet file
183
+ kwargs: passed to __init__
184
+
185
+ Returns
186
+ -------
187
+ LazyReferenceMapper instance
188
+ """
189
+ met = {"metadata": {}, "record_size": record_size}
190
+ if fs is None:
191
+ fs, root = fsspec.core.url_to_fs(root, **(storage_options or {}))
192
+ if fs.exists(root):
193
+ fs.rm(root, recursive=True)
194
+ fs.makedirs(root, exist_ok=True)
195
+ fs.pipe("/".join([root, ".zmetadata"]), json.dumps(met).encode())
196
+ return LazyReferenceMapper(root, fs, **kwargs)
197
+
198
+ def listdir(self, basename=True):
199
+ """List top-level directories"""
200
+ # cache me?
201
+ if self.dirs is None:
202
+ dirs = [p.split("/", 1)[0] for p in self.zmetadata]
203
+ self.dirs = {p for p in dirs if p and not p.startswith(".")}
204
+ listing = self.dirs
205
+ if basename:
206
+ listing = [os.path.basename(path) for path in listing]
207
+ return listing
208
+
209
+ def ls(self, path="", detail=True):
210
+ """Shortcut file listings"""
211
+ if not path:
212
+ dirnames = self.listdir()
213
+ others = set(
214
+ [".zmetadata"]
215
+ + [name for name in self.zmetadata if "/" not in name]
216
+ + [name for name in self._items if "/" not in name]
217
+ )
218
+ if detail is False:
219
+ others.update(dirnames)
220
+ return sorted(others)
221
+ dirinfo = [
222
+ {"name": name, "type": "directory", "size": 0} for name in dirnames
223
+ ]
224
+ fileinfo = [
225
+ {
226
+ "name": name,
227
+ "type": "file",
228
+ "size": len(
229
+ json.dumps(self.zmetadata[name])
230
+ if name in self.zmetadata
231
+ else self._items[name]
232
+ ),
233
+ }
234
+ for name in others
235
+ ]
236
+ return sorted(dirinfo + fileinfo, key=lambda s: s["name"])
237
+ parts = path.split("/", 1)
238
+ if len(parts) > 1:
239
+ raise FileNotFoundError("Cannot list within directories right now")
240
+ field = parts[0]
241
+ others = set(
242
+ [name for name in self.zmetadata if name.startswith(f"{path}/")]
243
+ + [name for name in self._items if name.startswith(f"{path}/")]
244
+ )
245
+ fileinfo = [
246
+ {
247
+ "name": name,
248
+ "type": "file",
249
+ "size": len(
250
+ json.dumps(self.zmetadata[name])
251
+ if name in self.zmetadata
252
+ else self._items[name]
253
+ ),
254
+ }
255
+ for name in others
256
+ ]
257
+ keys = self._keys_in_field(field)
258
+
259
+ if detail is False:
260
+ return list(others) + list(keys)
261
+ recs = self._generate_all_records(field)
262
+ recinfo = [
263
+ {"name": name, "type": "file", "size": rec[-1]}
264
+ for name, rec in zip(keys, recs)
265
+ if rec[0] # filters out path==None, deleted/missing
266
+ ]
267
+ return fileinfo + recinfo
268
+
269
+ def _load_one_key(self, key):
270
+ """Get the reference for one key
271
+
272
+ Returns bytes, one-element list or three-element list.
273
+ """
274
+ if key in self._items:
275
+ return self._items[key]
276
+ elif key in self.zmetadata:
277
+ return json.dumps(self.zmetadata[key]).encode()
278
+ elif "/" not in key or self._is_meta(key):
279
+ raise KeyError(key)
280
+ field, sub_key = key.split("/")
281
+ record, ri, chunk_size = self._key_to_record(key)
282
+ maybe = self._items.get((field, record), {}).get(ri, False)
283
+ if maybe is None:
284
+ # explicitly deleted
285
+ raise KeyError
286
+ elif maybe:
287
+ return maybe
288
+ elif chunk_size == 0:
289
+ return b""
290
+
291
+ # Chunk keys can be loaded from row group and cached in LRU cache
292
+ try:
293
+ refs = self.open_refs(field, record)
294
+ except (ValueError, TypeError, FileNotFoundError):
295
+ raise KeyError(key)
296
+ columns = ["path", "offset", "size", "raw"]
297
+ selection = [refs[c][ri] if c in refs else None for c in columns]
298
+ raw = selection[-1]
299
+ if raw is not None:
300
+ return raw
301
+ if selection[0] is None:
302
+ raise KeyError("This reference does not exist or has been deleted")
303
+ if selection[1:3] == [0, 0]:
304
+ # URL only
305
+ return selection[:1]
306
+ # URL, offset, size
307
+ return selection[:3]
308
+
309
+ @lru_cache(4096)
310
+ def _key_to_record(self, key):
311
+ """Details needed to construct a reference for one key"""
312
+ field, chunk = key.split("/")
313
+ chunk_sizes = self._get_chunk_sizes(field)
314
+ if len(chunk_sizes) == 0:
315
+ return 0, 0, 0
316
+ chunk_idx = [int(c) for c in chunk.split(".")]
317
+ chunk_number = ravel_multi_index(chunk_idx, chunk_sizes)
318
+ record = chunk_number // self.record_size
319
+ ri = chunk_number % self.record_size
320
+ return record, ri, len(chunk_sizes)
321
+
322
+ def _get_chunk_sizes(self, field):
323
+ """The number of chunks along each axis for a given field"""
324
+ if field not in self.chunk_sizes:
325
+ zarray = self.zmetadata[f"{field}/.zarray"]
326
+ size_ratio = [
327
+ math.ceil(s / c) for s, c in zip(zarray["shape"], zarray["chunks"])
328
+ ]
329
+ self.chunk_sizes[field] = size_ratio or [1]
330
+ return self.chunk_sizes[field]
331
+
332
+ def _generate_record(self, field, record):
333
+ """The references for a given parquet file of a given field"""
334
+ refs = self.open_refs(field, record)
335
+ it = iter(zip(*refs.values()))
336
+ if len(refs) == 3:
337
+ # All urls
338
+ return (list(t) for t in it)
339
+ elif len(refs) == 1:
340
+ # All raws
341
+ return refs["raw"]
342
+ else:
343
+ # Mix of urls and raws
344
+ return (list(t[:3]) if not t[3] else t[3] for t in it)
345
+
346
+ def _generate_all_records(self, field):
347
+ """Load all the references within a field by iterating over the parquet files"""
348
+ nrec = 1
349
+ for ch in self._get_chunk_sizes(field):
350
+ nrec *= ch
351
+ nrec = math.ceil(nrec / self.record_size)
352
+ for record in range(nrec):
353
+ yield from self._generate_record(field, record)
354
+
355
+ def values(self):
356
+ return RefsValuesView(self)
357
+
358
+ def items(self):
359
+ return RefsItemsView(self)
360
+
361
+ def __hash__(self):
362
+ return id(self)
363
+
364
+ def __getitem__(self, key):
365
+ return self._load_one_key(key)
366
+
367
+ def __setitem__(self, key, value):
368
+ if "/" in key and not self._is_meta(key):
369
+ field, chunk = key.split("/")
370
+ record, i, _ = self._key_to_record(key)
371
+ subdict = self._items.setdefault((field, record), {})
372
+ subdict[i] = value
373
+ if len(subdict) == self.record_size:
374
+ self.write(field, record)
375
+ else:
376
+ # metadata or top-level
377
+ self._items[key] = value
378
+ new_value = json.loads(
379
+ value.decode() if isinstance(value, bytes) else value
380
+ )
381
+ self.zmetadata[key] = {**self.zmetadata.get(key, {}), **new_value}
382
+
383
+ @staticmethod
384
+ def _is_meta(key):
385
+ return key.startswith(".z") or "/.z" in key
386
+
387
+ def __delitem__(self, key):
388
+ if key in self._items:
389
+ del self._items[key]
390
+ elif key in self.zmetadata:
391
+ del self.zmetadata[key]
392
+ else:
393
+ if "/" in key and not self._is_meta(key):
394
+ field, chunk = key.split("/")
395
+ record, i, _ = self._key_to_record(key)
396
+ subdict = self._items.setdefault((field, record), {})
397
+ subdict[i] = None
398
+ if len(subdict) == self.record_size:
399
+ self.write(field, record)
400
+ else:
401
+ # metadata or top-level
402
+ self._items[key] = None
403
+
404
+ def write(self, field, record, base_url=None, storage_options=None):
405
+ # extra requirements if writing
406
+ import kerchunk.df
407
+ import numpy as np
408
+ import pandas as pd
409
+
410
+ partition = self._items[(field, record)]
411
+ original = False
412
+ if len(partition) < self.record_size:
413
+ try:
414
+ original = self.open_refs(field, record)
415
+ except IOError:
416
+ pass
417
+
418
+ if original:
419
+ paths = original["path"]
420
+ offsets = original["offset"]
421
+ sizes = original["size"]
422
+ raws = original["raw"]
423
+ else:
424
+ paths = np.full(self.record_size, np.nan, dtype="O")
425
+ offsets = np.zeros(self.record_size, dtype="int64")
426
+ sizes = np.zeros(self.record_size, dtype="int64")
427
+ raws = np.full(self.record_size, np.nan, dtype="O")
428
+ for j, data in partition.items():
429
+ if isinstance(data, list):
430
+ if (
431
+ str(paths.dtype) == "category"
432
+ and data[0] not in paths.dtype.categories
433
+ ):
434
+ paths = paths.add_categories(data[0])
435
+ paths[j] = data[0]
436
+ if len(data) > 1:
437
+ offsets[j] = data[1]
438
+ sizes[j] = data[2]
439
+ elif data is None:
440
+ # delete
441
+ paths[j] = None
442
+ offsets[j] = 0
443
+ sizes[j] = 0
444
+ raws[j] = None
445
+ else:
446
+ # this is the only call into kerchunk, could remove
447
+ raws[j] = kerchunk.df._proc_raw(data)
448
+ # TODO: only save needed columns
449
+ df = pd.DataFrame(
450
+ {
451
+ "path": paths,
452
+ "offset": offsets,
453
+ "size": sizes,
454
+ "raw": raws,
455
+ },
456
+ copy=False,
457
+ )
458
+ if df.path.count() / (df.path.nunique() or 1) > self.cat_thresh:
459
+ df["path"] = df["path"].astype("category")
460
+ object_encoding = {"raw": "bytes", "path": "utf8"}
461
+ has_nulls = ["path", "raw"]
462
+
463
+ fn = f"{base_url or self.out_root}/{field}/refs.{record}.parq"
464
+ self.fs.mkdirs(f"{base_url or self.out_root}/{field}", exist_ok=True)
465
+ df.to_parquet(
466
+ fn,
467
+ engine="fastparquet",
468
+ storage_options=storage_options
469
+ or getattr(self.fs, "storage_options", None),
470
+ compression="zstd",
471
+ index=False,
472
+ stats=False,
473
+ object_encoding=object_encoding,
474
+ has_nulls=has_nulls,
475
+ # **kwargs,
476
+ )
477
+ partition.clear()
478
+ self._items.pop((field, record))
479
+
480
+ def flush(self, base_url=None, storage_options=None):
481
+ """Output any modified or deleted keys
482
+
483
+ Parameters
484
+ ----------
485
+ base_url: str
486
+ Location of the output
487
+ """
488
+ # write what we have so far and clear sub chunks
489
+ for thing in list(self._items):
490
+ if isinstance(thing, tuple):
491
+ field, record = thing
492
+ self.write(
493
+ field,
494
+ record,
495
+ base_url=base_url,
496
+ storage_options=storage_options,
497
+ )
498
+
499
+ # gather .zmetadata from self._items and write that too
500
+ for k in list(self._items):
501
+ if k != ".zmetadata" and ".z" in k:
502
+ self.zmetadata[k] = json.loads(self._items.pop(k))
503
+ met = {"metadata": self.zmetadata, "record_size": self.record_size}
504
+ self._items[".zmetadata"] = json.dumps(met).encode()
505
+ self.fs.pipe(
506
+ "/".join([base_url or self.out_root, ".zmetadata"]),
507
+ self._items[".zmetadata"],
508
+ )
509
+
510
+ # TODO: only clear those that we wrote to?
511
+ self.open_refs.cache_clear()
512
+
513
+ def __len__(self):
514
+ # Caveat: This counts expected references, not actual - but is fast
515
+ count = 0
516
+ for field in self.listdir():
517
+ if field.startswith("."):
518
+ count += 1
519
+ else:
520
+ count += math.prod(self._get_chunk_sizes(field))
521
+ count += len(self.zmetadata) # all metadata keys
522
+ # any other files not in reference partitions
523
+ count += sum(1 for _ in self._items if not isinstance(_, tuple))
524
+ return count
525
+
526
+ def __iter__(self):
527
+ # Caveat: returns only existing keys, so the number of these does not
528
+ # match len(self)
529
+ metas = set(self.zmetadata)
530
+ metas.update(self._items)
531
+ for bit in metas:
532
+ if isinstance(bit, str):
533
+ yield bit
534
+ for field in self.listdir():
535
+ for k in self._keys_in_field(field):
536
+ if k in self:
537
+ yield k
538
+
539
+ def __contains__(self, item):
540
+ try:
541
+ self._load_one_key(item)
542
+ return True
543
+ except KeyError:
544
+ return False
545
+
546
+ def _keys_in_field(self, field):
547
+ """List key names in given field
548
+
549
+ Produces strings like "field/x.y" appropriate from the chunking of the array
550
+ """
551
+ chunk_sizes = self._get_chunk_sizes(field)
552
+ if len(chunk_sizes) == 0:
553
+ yield field + "/0"
554
+ return
555
+ inds = itertools.product(*(range(i) for i in chunk_sizes))
556
+ for ind in inds:
557
+ yield field + "/" + ".".join([str(c) for c in ind])
558
+
559
+
560
+ class ReferenceFileSystem(AsyncFileSystem):
561
+ """View byte ranges of some other file as a file system
562
+ Initial version: single file system target, which must support
563
+ async, and must allow start and end args in _cat_file. Later versions
564
+ may allow multiple arbitrary URLs for the targets.
565
+ This FileSystem is read-only. It is designed to be used with async
566
+ targets (for now). This FileSystem only allows whole-file access, no
567
+ ``open``. We do not get original file details from the target FS.
568
+ Configuration is by passing a dict of references at init, or a URL to
569
+ a JSON file containing the same; this dict
570
+ can also contain concrete data for some set of paths.
571
+ Reference dict format:
572
+ {path0: bytes_data, path1: (target_url, offset, size)}
573
+ https://github.com/fsspec/kerchunk/blob/main/README.md
574
+ """
575
+
576
+ protocol = "reference"
577
+
578
+ def __init__(
579
+ self,
580
+ fo,
581
+ target=None,
582
+ ref_storage_args=None,
583
+ target_protocol=None,
584
+ target_options=None,
585
+ remote_protocol=None,
586
+ remote_options=None,
587
+ fs=None,
588
+ template_overrides=None,
589
+ simple_templates=True,
590
+ max_gap=64_000,
591
+ max_block=256_000_000,
592
+ cache_size=128,
593
+ **kwargs,
594
+ ):
595
+ """
596
+ Parameters
597
+ ----------
598
+ fo : dict or str
599
+ The set of references to use for this instance, with a structure as above.
600
+ If str referencing a JSON file, will use fsspec.open, in conjunction
601
+ with target_options and target_protocol to open and parse JSON at this
602
+ location. If a directory, then assume references are a set of parquet
603
+ files to be loaded lazily.
604
+ target : str
605
+ For any references having target_url as None, this is the default file
606
+ target to use
607
+ ref_storage_args : dict
608
+ If references is a str, use these kwargs for loading the JSON file.
609
+ Deprecated: use target_options instead.
610
+ target_protocol : str
611
+ Used for loading the reference file, if it is a path. If None, protocol
612
+ will be derived from the given path
613
+ target_options : dict
614
+ Extra FS options for loading the reference file ``fo``, if given as a path
615
+ remote_protocol : str
616
+ The protocol of the filesystem on which the references will be evaluated
617
+ (unless fs is provided). If not given, will be derived from the first
618
+ URL that has a protocol in the templates or in the references, in that
619
+ order.
620
+ remote_options : dict
621
+ kwargs to go with remote_protocol
622
+ fs : AbstractFileSystem | dict(str, (AbstractFileSystem | dict))
623
+ Directly provide a file system(s):
624
+ - a single filesystem instance
625
+ - a dict of protocol:filesystem, where each value is either a filesystem
626
+ instance, or a dict of kwargs that can be used to create in
627
+ instance for the given protocol
628
+
629
+ If this is given, remote_options and remote_protocol are ignored.
630
+ template_overrides : dict
631
+ Swap out any templates in the references file with these - useful for
632
+ testing.
633
+ simple_templates: bool
634
+ Whether templates can be processed with simple replace (True) or if
635
+ jinja is needed (False, much slower). All reference sets produced by
636
+ ``kerchunk`` are simple in this sense, but the spec allows for complex.
637
+ max_gap, max_block: int
638
+ For merging multiple concurrent requests to the same remote file.
639
+ Neighboring byte ranges will only be merged when their
640
+ inter-range gap is <= ``max_gap``. Default is 64KB. Set to 0
641
+ to only merge when it requires no extra bytes. Pass a negative
642
+ number to disable merging, appropriate for local target files.
643
+ Neighboring byte ranges will only be merged when the size of
644
+ the aggregated range is <= ``max_block``. Default is 256MB.
645
+ cache_size : int
646
+ Maximum size of LRU cache, where cache_size*record_size denotes
647
+ the total number of references that can be loaded in memory at once.
648
+ Only used for lazily loaded references.
649
+ kwargs : passed to parent class
650
+ """
651
+ super().__init__(**kwargs)
652
+ self.target = target
653
+ self.template_overrides = template_overrides
654
+ self.simple_templates = simple_templates
655
+ self.templates = {}
656
+ self.fss = {}
657
+ self._dircache = {}
658
+ self.max_gap = max_gap
659
+ self.max_block = max_block
660
+ if isinstance(fo, str):
661
+ dic = dict(
662
+ **(ref_storage_args or target_options or {}), protocol=target_protocol
663
+ )
664
+ ref_fs, fo2 = fsspec.core.url_to_fs(fo, **dic)
665
+ if ref_fs.isfile(fo2):
666
+ # text JSON
667
+ with fsspec.open(fo, "rb", **dic) as f:
668
+ logger.info("Read reference from URL %s", fo)
669
+ text = json.load(f)
670
+ self._process_references(text, template_overrides)
671
+ else:
672
+ # Lazy parquet refs
673
+ logger.info("Open lazy reference dict from URL %s", fo)
674
+ self.references = LazyReferenceMapper(
675
+ fo2,
676
+ fs=ref_fs,
677
+ cache_size=cache_size,
678
+ )
679
+ else:
680
+ # dictionaries
681
+ self._process_references(fo, template_overrides)
682
+ if isinstance(fs, dict):
683
+ self.fss = {
684
+ k: (
685
+ fsspec.filesystem(k.split(":", 1)[0], **opts)
686
+ if isinstance(opts, dict)
687
+ else opts
688
+ )
689
+ for k, opts in fs.items()
690
+ }
691
+ if None not in self.fss:
692
+ self.fss[None] = filesystem("file")
693
+ return
694
+ if fs is not None:
695
+ # single remote FS
696
+ remote_protocol = (
697
+ fs.protocol[0] if isinstance(fs.protocol, tuple) else fs.protocol
698
+ )
699
+ self.fss[remote_protocol] = fs
700
+
701
+ if remote_protocol is None:
702
+ # get single protocol from any templates
703
+ for ref in self.templates.values():
704
+ if callable(ref):
705
+ ref = ref()
706
+ protocol, _ = fsspec.core.split_protocol(ref)
707
+ if protocol and protocol not in self.fss:
708
+ fs = filesystem(protocol, **(remote_options or {}))
709
+ self.fss[protocol] = fs
710
+ if remote_protocol is None:
711
+ # get single protocol from references
712
+ # TODO: warning here, since this can be very expensive?
713
+ for ref in self.references.values():
714
+ if callable(ref):
715
+ ref = ref()
716
+ if isinstance(ref, list) and ref[0]:
717
+ protocol, _ = fsspec.core.split_protocol(ref[0])
718
+ if protocol not in self.fss:
719
+ fs = filesystem(protocol, **(remote_options or {}))
720
+ self.fss[protocol] = fs
721
+ # only use first remote URL
722
+ break
723
+
724
+ if remote_protocol and remote_protocol not in self.fss:
725
+ fs = filesystem(remote_protocol, **(remote_options or {}))
726
+ self.fss[remote_protocol] = fs
727
+
728
+ self.fss[None] = fs or filesystem("file") # default one
729
+
730
+ def _cat_common(self, path, start=None, end=None):
731
+ path = self._strip_protocol(path)
732
+ logger.debug(f"cat: {path}")
733
+ try:
734
+ part = self.references[path]
735
+ except KeyError:
736
+ raise FileNotFoundError(path)
737
+ if isinstance(part, str):
738
+ part = part.encode()
739
+ if isinstance(part, bytes):
740
+ logger.debug(f"Reference: {path}, type bytes")
741
+ if part.startswith(b"base64:"):
742
+ part = base64.b64decode(part[7:])
743
+ return part, None, None
744
+
745
+ if len(part) == 1:
746
+ logger.debug(f"Reference: {path}, whole file => {part}")
747
+ url = part[0]
748
+ start1, end1 = start, end
749
+ else:
750
+ url, start0, size = part
751
+ logger.debug(f"Reference: {path} => {url}, offset {start0}, size {size}")
752
+ end0 = start0 + size
753
+
754
+ if start is not None:
755
+ if start >= 0:
756
+ start1 = start0 + start
757
+ else:
758
+ start1 = end0 + start
759
+ else:
760
+ start1 = start0
761
+ if end is not None:
762
+ if end >= 0:
763
+ end1 = start0 + end
764
+ else:
765
+ end1 = end0 + end
766
+ else:
767
+ end1 = end0
768
+ if url is None:
769
+ url = self.target
770
+ return url, start1, end1
771
+
772
+ async def _cat_file(self, path, start=None, end=None, **kwargs):
773
+ part_or_url, start0, end0 = self._cat_common(path, start=start, end=end)
774
+ if isinstance(part_or_url, bytes):
775
+ return part_or_url[start:end]
776
+ protocol, _ = split_protocol(part_or_url)
777
+ try:
778
+ await self.fss[protocol]._cat_file(part_or_url, start=start, end=end)
779
+ except Exception as e:
780
+ raise ReferenceNotReachable(path, part_or_url) from e
781
+
782
+ def cat_file(self, path, start=None, end=None, **kwargs):
783
+ part_or_url, start0, end0 = self._cat_common(path, start=start, end=end)
784
+ if isinstance(part_or_url, bytes):
785
+ return part_or_url[start:end]
786
+ protocol, _ = split_protocol(part_or_url)
787
+ try:
788
+ return self.fss[protocol].cat_file(part_or_url, start=start0, end=end0)
789
+ except Exception as e:
790
+ raise ReferenceNotReachable(path, part_or_url) from e
791
+
792
+ def pipe_file(self, path, value, **_):
793
+ """Temporarily add binary data or reference as a file"""
794
+ self.references[path] = value
795
+
796
+ async def _get_file(self, rpath, lpath, **kwargs):
797
+ if self.isdir(rpath):
798
+ return os.makedirs(lpath, exist_ok=True)
799
+ data = await self._cat_file(rpath)
800
+ with open(lpath, "wb") as f:
801
+ f.write(data)
802
+
803
+ def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, **kwargs):
804
+ if self.isdir(rpath):
805
+ return os.makedirs(lpath, exist_ok=True)
806
+ data = self.cat_file(rpath, **kwargs)
807
+ callback.set_size(len(data))
808
+ if isfilelike(lpath):
809
+ lpath.write(data)
810
+ else:
811
+ with open(lpath, "wb") as f:
812
+ f.write(data)
813
+ callback.absolute_update(len(data))
814
+
815
+ def get(self, rpath, lpath, recursive=False, **kwargs):
816
+ if recursive:
817
+ # trigger directory build
818
+ self.ls("")
819
+ rpath = self.expand_path(rpath, recursive=recursive)
820
+ fs = fsspec.filesystem("file", auto_mkdir=True)
821
+ targets = other_paths(rpath, lpath)
822
+ if recursive:
823
+ data = self.cat([r for r in rpath if not self.isdir(r)])
824
+ else:
825
+ data = self.cat(rpath)
826
+ for remote, local in zip(rpath, targets):
827
+ if remote in data:
828
+ fs.pipe_file(local, data[remote])
829
+
830
+ def cat(self, path, recursive=False, on_error="raise", **kwargs):
831
+ if isinstance(path, str) and recursive:
832
+ raise NotImplementedError
833
+ if isinstance(path, list) and (recursive or any("*" in p for p in path)):
834
+ raise NotImplementedError
835
+ # TODO: if references is lazy, pre-fetch all paths in batch before access
836
+ proto_dict = _protocol_groups(path, self.references)
837
+ out = {}
838
+ for proto, paths in proto_dict.items():
839
+ fs = self.fss[proto]
840
+ urls, starts, ends, valid_paths = [], [], [], []
841
+ for p in paths:
842
+ # find references or label not-found. Early exit if any not
843
+ # found and on_error is "raise"
844
+ try:
845
+ u, s, e = self._cat_common(p)
846
+ except FileNotFoundError as err:
847
+ if on_error == "raise":
848
+ raise
849
+ if on_error != "omit":
850
+ out[p] = err
851
+ else:
852
+ urls.append(u)
853
+ starts.append(s)
854
+ ends.append(e)
855
+ valid_paths.append(p)
856
+
857
+ # process references into form for merging
858
+ urls2 = []
859
+ starts2 = []
860
+ ends2 = []
861
+ paths2 = []
862
+ whole_files = set()
863
+ for u, s, e, p in zip(urls, starts, ends, valid_paths):
864
+ if isinstance(u, bytes):
865
+ # data
866
+ out[p] = u
867
+ elif s is None:
868
+ # whole file - limits are None, None, but no further
869
+ # entries take for this file
870
+ whole_files.add(u)
871
+ urls2.append(u)
872
+ starts2.append(s)
873
+ ends2.append(e)
874
+ paths2.append(p)
875
+ for u, s, e, p in zip(urls, starts, ends, valid_paths):
876
+ # second run to account for files that are to be loaded whole
877
+ if s is not None and u not in whole_files:
878
+ urls2.append(u)
879
+ starts2.append(s)
880
+ ends2.append(e)
881
+ paths2.append(p)
882
+
883
+ # merge and fetch consolidated ranges
884
+ new_paths, new_starts, new_ends = merge_offset_ranges(
885
+ list(urls2),
886
+ list(starts2),
887
+ list(ends2),
888
+ sort=True,
889
+ max_gap=self.max_gap,
890
+ max_block=self.max_block,
891
+ )
892
+ bytes_out = fs.cat_ranges(new_paths, new_starts, new_ends)
893
+
894
+ # unbundle from merged bytes - simple approach
895
+ for u, s, e, p in zip(urls, starts, ends, valid_paths):
896
+ if p in out:
897
+ continue # was bytes, already handled
898
+ for np, ns, ne, b in zip(new_paths, new_starts, new_ends, bytes_out):
899
+ if np == u and (ns is None or ne is None):
900
+ if isinstance(b, Exception):
901
+ out[p] = b
902
+ else:
903
+ out[p] = b[s:e]
904
+ elif np == u and s >= ns and e <= ne:
905
+ if isinstance(b, Exception):
906
+ out[p] = b
907
+ else:
908
+ out[p] = b[s - ns : (e - ne) or None]
909
+
910
+ for k, v in out.copy().items():
911
+ # these were valid references, but fetch failed, so transform exc
912
+ if isinstance(v, Exception) and k in self.references:
913
+ ex = out[k]
914
+ new_ex = ReferenceNotReachable(k, self.references[k])
915
+ new_ex.__cause__ = ex
916
+ if on_error == "raise":
917
+ raise new_ex
918
+ elif on_error != "omit":
919
+ out[k] = new_ex
920
+
921
+ if len(out) == 1 and isinstance(path, str) and "*" not in path:
922
+ return _first(out)
923
+ return out
924
+
925
+ def _process_references(self, references, template_overrides=None):
926
+ vers = references.get("version", None)
927
+ if vers is None:
928
+ self._process_references0(references)
929
+ elif vers == 1:
930
+ self._process_references1(references, template_overrides=template_overrides)
931
+ else:
932
+ raise ValueError(f"Unknown reference spec version: {vers}")
933
+ # TODO: we make dircache by iterating over all entries, but for Spec >= 1,
934
+ # can replace with programmatic. Is it even needed for mapper interface?
935
+
936
+ def _process_references0(self, references):
937
+ """Make reference dict for Spec Version 0"""
938
+ self.references = references
939
+
940
+ def _process_references1(self, references, template_overrides=None):
941
+ if not self.simple_templates or self.templates:
942
+ import jinja2
943
+ self.references = {}
944
+ self._process_templates(references.get("templates", {}))
945
+
946
+ @lru_cache(1000)
947
+ def _render_jinja(u):
948
+ return jinja2.Template(u).render(**self.templates)
949
+
950
+ for k, v in references.get("refs", {}).items():
951
+ if isinstance(v, str):
952
+ if v.startswith("base64:"):
953
+ self.references[k] = base64.b64decode(v[7:])
954
+ self.references[k] = v
955
+ elif self.templates:
956
+ u = v[0]
957
+ if "{{" in u:
958
+ if self.simple_templates:
959
+ u = (
960
+ u.replace("{{", "{")
961
+ .replace("}}", "}")
962
+ .format(**self.templates)
963
+ )
964
+ else:
965
+ u = _render_jinja(u)
966
+ self.references[k] = [u] if len(v) == 1 else [u, v[1], v[2]]
967
+ else:
968
+ self.references[k] = v
969
+ self.references.update(self._process_gen(references.get("gen", [])))
970
+
971
+ def _process_templates(self, tmp):
972
+ self.templates = {}
973
+ if self.template_overrides is not None:
974
+ tmp.update(self.template_overrides)
975
+ for k, v in tmp.items():
976
+ if "{{" in v:
977
+ import jinja2
978
+
979
+ self.templates[k] = lambda temp=v, **kwargs: jinja2.Template(
980
+ temp
981
+ ).render(**kwargs)
982
+ else:
983
+ self.templates[k] = v
984
+
985
+ def _process_gen(self, gens):
986
+ out = {}
987
+ for gen in gens:
988
+ dimension = {
989
+ k: v
990
+ if isinstance(v, list)
991
+ else range(v.get("start", 0), v["stop"], v.get("step", 1))
992
+ for k, v in gen["dimensions"].items()
993
+ }
994
+ products = (
995
+ dict(zip(dimension.keys(), values))
996
+ for values in itertools.product(*dimension.values())
997
+ )
998
+ for pr in products:
999
+ import jinja2
1000
+
1001
+ key = jinja2.Template(gen["key"]).render(**pr, **self.templates)
1002
+ url = jinja2.Template(gen["url"]).render(**pr, **self.templates)
1003
+ if ("offset" in gen) and ("length" in gen):
1004
+ offset = int(
1005
+ jinja2.Template(gen["offset"]).render(**pr, **self.templates)
1006
+ )
1007
+ length = int(
1008
+ jinja2.Template(gen["length"]).render(**pr, **self.templates)
1009
+ )
1010
+ out[key] = [url, offset, length]
1011
+ elif ("offset" in gen) ^ ("length" in gen):
1012
+ raise ValueError(
1013
+ "Both 'offset' and 'length' are required for a "
1014
+ "reference generator entry if either is provided."
1015
+ )
1016
+ else:
1017
+ out[key] = [url]
1018
+ return out
1019
+
1020
+ def _dircache_from_items(self):
1021
+ self.dircache = {"": []}
1022
+ it = self.references.items()
1023
+ for path, part in it:
1024
+ if isinstance(part, (bytes, str)):
1025
+ size = len(part)
1026
+ elif len(part) == 1:
1027
+ size = None
1028
+ else:
1029
+ _, _, size = part
1030
+ par = path.rsplit("/", 1)[0] if "/" in path else ""
1031
+ par0 = par
1032
+ subdirs = [par0]
1033
+ while par0 and par0 not in self.dircache:
1034
+ # collect parent directories
1035
+ par0 = self._parent(par0)
1036
+ subdirs.append(par0)
1037
+
1038
+ subdirs = subdirs[::-1]
1039
+ for parent, child in zip(subdirs, subdirs[1:]):
1040
+ # register newly discovered directories
1041
+ assert child not in self.dircache
1042
+ assert parent in self.dircache
1043
+ self.dircache[parent].append(
1044
+ {"name": child, "type": "directory", "size": 0}
1045
+ )
1046
+ self.dircache[child] = []
1047
+
1048
+ self.dircache[par].append({"name": path, "type": "file", "size": size})
1049
+
1050
+ def _open(self, path, mode="rb", block_size=None, cache_options=None, **kwargs):
1051
+ data = self.cat_file(path) # load whole chunk into memory
1052
+ return io.BytesIO(data)
1053
+
1054
+ def ls(self, path, detail=True, **kwargs):
1055
+ path = self._strip_protocol(path)
1056
+ if isinstance(self.references, LazyReferenceMapper):
1057
+ try:
1058
+ return self.references.ls(path, detail)
1059
+ except KeyError:
1060
+ pass
1061
+ raise FileNotFoundError(f"'{path}' is not a known key")
1062
+ if not self.dircache:
1063
+ self._dircache_from_items()
1064
+ out = self._ls_from_cache(path)
1065
+ if out is None:
1066
+ raise FileNotFoundError(path)
1067
+ if detail:
1068
+ return out
1069
+ return [o["name"] for o in out]
1070
+
1071
+ def exists(self, path, **kwargs): # overwrite auto-sync version
1072
+ return self.isdir(path) or self.isfile(path)
1073
+
1074
+ def isdir(self, path): # overwrite auto-sync version
1075
+ if self.dircache:
1076
+ return path in self.dircache
1077
+ elif isinstance(self.references, LazyReferenceMapper):
1078
+ return path in self.references.listdir("")
1079
+ else:
1080
+ # this may be faster than building dircache for single calls, but
1081
+ # by looping will be slow for many calls; could cache it?
1082
+ return any(_.startswith(f"{path}/") for _ in self.references)
1083
+
1084
+ def isfile(self, path): # overwrite auto-sync version
1085
+ return path in self.references
1086
+
1087
+ async def _ls(self, path, detail=True, **kwargs): # calls fast sync code
1088
+ return self.ls(path, detail, **kwargs)
1089
+
1090
+ def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
1091
+ if withdirs:
1092
+ return super().find(
1093
+ path, maxdepth=maxdepth, withdirs=withdirs, detail=detail, **kwargs
1094
+ )
1095
+ if path:
1096
+ path = self._strip_protocol(path)
1097
+ r = sorted(k for k in self.references if k.startswith(path))
1098
+ else:
1099
+ r = sorted(self.references)
1100
+ if detail:
1101
+ if not self.dircache:
1102
+ self._dircache_from_items()
1103
+ return {k: self._ls_from_cache(k)[0] for k in r}
1104
+ else:
1105
+ return r
1106
+
1107
+ def info(self, path, **kwargs):
1108
+ out = self.references.get(path)
1109
+ if out is not None:
1110
+ if isinstance(out, (str, bytes)):
1111
+ # decode base64 here
1112
+ return {"name": path, "type": "file", "size": len(out)}
1113
+ elif len(out) > 1:
1114
+ return {"name": path, "type": "file", "size": out[2]}
1115
+ else:
1116
+ out0 = [{"name": path, "type": "file", "size": None}]
1117
+ else:
1118
+ out = self.ls(path, True)
1119
+ out0 = [o for o in out if o["name"] == path]
1120
+ if not out0:
1121
+ return {"name": path, "type": "directory", "size": 0}
1122
+ if out0[0]["size"] is None:
1123
+ # if this is a whole remote file, update size using remote FS
1124
+ prot, _ = split_protocol(self.references[path][0])
1125
+ out0[0]["size"] = self.fss[prot].size(self.references[path][0])
1126
+ return out0[0]
1127
+
1128
+ async def _info(self, path, **kwargs): # calls fast sync code
1129
+ return self.info(path)
1130
+
1131
+ async def _rm_file(self, path, **kwargs):
1132
+ self.references.pop(
1133
+ path, None
1134
+ ) # ignores FileNotFound, just as well for directories
1135
+ self.dircache.clear() # this is a bit heavy handed
1136
+
1137
+ async def _pipe_file(self, path, data):
1138
+ # can be str or bytes
1139
+ self.references[path] = data
1140
+ self.dircache.clear() # this is a bit heavy handed
1141
+
1142
+ async def _put_file(self, lpath, rpath, **kwargs):
1143
+ # puts binary
1144
+ with open(lpath, "rb") as f:
1145
+ self.references[rpath] = f.read()
1146
+ self.dircache.clear() # this is a bit heavy handed
1147
+
1148
+ def save_json(self, url, **storage_options):
1149
+ """Write modified references into new location"""
1150
+ out = {}
1151
+ for k, v in self.references.items():
1152
+ if isinstance(v, bytes):
1153
+ try:
1154
+ out[k] = v.decode("ascii")
1155
+ except UnicodeDecodeError:
1156
+ out[k] = (b"base64:" + base64.b64encode(v)).decode()
1157
+ else:
1158
+ out[k] = v
1159
+ with fsspec.open(url, "wb", **storage_options) as f:
1160
+ f.write(json.dumps({"version": 1, "refs": out}).encode())
.venv/Lib/site-packages/fsspec/implementations/sftp.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import logging
3
+ import os
4
+ import types
5
+ import uuid
6
+ from stat import S_ISDIR, S_ISLNK
7
+
8
+ import paramiko
9
+
10
+ from .. import AbstractFileSystem
11
+ from ..utils import infer_storage_options
12
+
13
+ logger = logging.getLogger("fsspec.sftp")
14
+
15
+
16
+ class SFTPFileSystem(AbstractFileSystem):
17
+ """Files over SFTP/SSH
18
+
19
+ Peer-to-peer filesystem over SSH using paramiko.
20
+
21
+ Note: if using this with the ``open`` or ``open_files``, with full URLs,
22
+ there is no way to tell if a path is relative, so all paths are assumed
23
+ to be absolute.
24
+ """
25
+
26
+ protocol = "sftp", "ssh"
27
+
28
+ def __init__(self, host, **ssh_kwargs):
29
+ """
30
+
31
+ Parameters
32
+ ----------
33
+ host: str
34
+ Hostname or IP as a string
35
+ temppath: str
36
+ Location on the server to put files, when within a transaction
37
+ ssh_kwargs: dict
38
+ Parameters passed on to connection. See details in
39
+ https://docs.paramiko.org/en/3.3/api/client.html#paramiko.client.SSHClient.connect
40
+ May include port, username, password...
41
+ """
42
+ if self._cached:
43
+ return
44
+ super().__init__(**ssh_kwargs)
45
+ self.temppath = ssh_kwargs.pop("temppath", "/tmp") # remote temp directory
46
+ self.host = host
47
+ self.ssh_kwargs = ssh_kwargs
48
+ self._connect()
49
+
50
+ def _connect(self):
51
+ logger.debug("Connecting to SFTP server %s", self.host)
52
+ self.client = paramiko.SSHClient()
53
+ self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
54
+ self.client.connect(self.host, **self.ssh_kwargs)
55
+ self.ftp = self.client.open_sftp()
56
+
57
+ @classmethod
58
+ def _strip_protocol(cls, path):
59
+ return infer_storage_options(path)["path"]
60
+
61
+ @staticmethod
62
+ def _get_kwargs_from_urls(urlpath):
63
+ out = infer_storage_options(urlpath)
64
+ out.pop("path", None)
65
+ out.pop("protocol", None)
66
+ return out
67
+
68
+ def mkdir(self, path, create_parents=True, mode=511):
69
+ logger.debug("Creating folder %s", path)
70
+ if self.exists(path):
71
+ raise FileExistsError(f"File exists: {path}")
72
+
73
+ if create_parents:
74
+ self.makedirs(path)
75
+ else:
76
+ self.ftp.mkdir(path, mode)
77
+
78
+ def makedirs(self, path, exist_ok=False, mode=511):
79
+ if self.exists(path) and not exist_ok:
80
+ raise FileExistsError(f"File exists: {path}")
81
+
82
+ parts = path.split("/")
83
+ new_path = "/" if path[:1] == "/" else ""
84
+
85
+ for part in parts:
86
+ if part:
87
+ new_path = f"{new_path}/{part}" if new_path else part
88
+ if not self.exists(new_path):
89
+ self.ftp.mkdir(new_path, mode)
90
+
91
+ def rmdir(self, path):
92
+ logger.debug("Removing folder %s", path)
93
+ self.ftp.rmdir(path)
94
+
95
+ def info(self, path):
96
+ stat = self._decode_stat(self.ftp.stat(path))
97
+ stat["name"] = path
98
+ return stat
99
+
100
+ @staticmethod
101
+ def _decode_stat(stat, parent_path=None):
102
+ if S_ISDIR(stat.st_mode):
103
+ t = "directory"
104
+ elif S_ISLNK(stat.st_mode):
105
+ t = "link"
106
+ else:
107
+ t = "file"
108
+ out = {
109
+ "name": "",
110
+ "size": stat.st_size,
111
+ "type": t,
112
+ "uid": stat.st_uid,
113
+ "gid": stat.st_gid,
114
+ "time": datetime.datetime.fromtimestamp(
115
+ stat.st_atime, tz=datetime.timezone.utc
116
+ ),
117
+ "mtime": datetime.datetime.fromtimestamp(
118
+ stat.st_mtime, tz=datetime.timezone.utc
119
+ ),
120
+ }
121
+ if parent_path:
122
+ out["name"] = "/".join([parent_path.rstrip("/"), stat.filename])
123
+ return out
124
+
125
+ def ls(self, path, detail=False):
126
+ logger.debug("Listing folder %s", path)
127
+ stats = [self._decode_stat(stat, path) for stat in self.ftp.listdir_iter(path)]
128
+ if detail:
129
+ return stats
130
+ else:
131
+ paths = [stat["name"] for stat in stats]
132
+ return sorted(paths)
133
+
134
+ def put(self, lpath, rpath, callback=None, **kwargs):
135
+ logger.debug("Put file %s into %s", lpath, rpath)
136
+ self.ftp.put(lpath, rpath)
137
+
138
+ def get_file(self, rpath, lpath, **kwargs):
139
+ if self.isdir(rpath):
140
+ os.makedirs(lpath, exist_ok=True)
141
+ else:
142
+ self.ftp.get(self._strip_protocol(rpath), lpath)
143
+
144
+ def _open(self, path, mode="rb", block_size=None, **kwargs):
145
+ """
146
+ block_size: int or None
147
+ If 0, no buffering, if 1, line buffering, if >1, buffer that many
148
+ bytes, if None use default from paramiko.
149
+ """
150
+ logger.debug("Opening file %s", path)
151
+ if kwargs.get("autocommit", True) is False:
152
+ # writes to temporary file, move on commit
153
+ path2 = "/".join([self.temppath, str(uuid.uuid4())])
154
+ f = self.ftp.open(path2, mode, bufsize=block_size if block_size else -1)
155
+ f.temppath = path2
156
+ f.targetpath = path
157
+ f.fs = self
158
+ f.commit = types.MethodType(commit_a_file, f)
159
+ f.discard = types.MethodType(discard_a_file, f)
160
+ else:
161
+ f = self.ftp.open(path, mode, bufsize=block_size if block_size else -1)
162
+ return f
163
+
164
+ def _rm(self, path):
165
+ if self.isdir(path):
166
+ self.ftp.rmdir(path)
167
+ else:
168
+ self.ftp.remove(path)
169
+
170
+ def mv(self, old, new):
171
+ logger.debug("Renaming %s into %s", old, new)
172
+ self.ftp.posix_rename(old, new)
173
+
174
+
175
+ def commit_a_file(self):
176
+ self.fs.mv(self.temppath, self.targetpath)
177
+
178
+
179
+ def discard_a_file(self):
180
+ self.fs._rm(self.temppath)
.venv/Lib/site-packages/fsspec/implementations/smb.py ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module contains SMBFileSystem class responsible for handling access to
3
+ Windows Samba network shares by using package smbprotocol
4
+ """
5
+
6
+ import datetime
7
+ import uuid
8
+ from stat import S_ISDIR, S_ISLNK
9
+
10
+ import smbclient
11
+
12
+ from .. import AbstractFileSystem
13
+ from ..utils import infer_storage_options
14
+
15
+ # ! pylint: disable=bad-continuation
16
+
17
+
18
+ class SMBFileSystem(AbstractFileSystem):
19
+ """Allow reading and writing to Windows and Samba network shares.
20
+
21
+ When using `fsspec.open()` for getting a file-like object the URI
22
+ should be specified as this format:
23
+ ``smb://workgroup;user:password@server:port/share/folder/file.csv``.
24
+
25
+ Example::
26
+
27
+ >>> import fsspec
28
+ >>> with fsspec.open(
29
+ ... 'smb://myuser:[email protected]/' 'share/folder/file.csv'
30
+ ... ) as smbfile:
31
+ ... df = pd.read_csv(smbfile, sep='|', header=None)
32
+
33
+ Note that you need to pass in a valid hostname or IP address for the host
34
+ component of the URL. Do not use the Windows/NetBIOS machine name for the
35
+ host component.
36
+
37
+ The first component of the path in the URL points to the name of the shared
38
+ folder. Subsequent path components will point to the directory/folder/file.
39
+
40
+ The URL components ``workgroup`` , ``user``, ``password`` and ``port`` may be
41
+ optional.
42
+
43
+ .. note::
44
+
45
+ For working this source require `smbprotocol`_ to be installed, e.g.::
46
+
47
+ $ pip install smbprotocol
48
+ # or
49
+ # pip install smbprotocol[kerberos]
50
+
51
+ .. _smbprotocol: https://github.com/jborean93/smbprotocol#requirements
52
+
53
+ Note: if using this with the ``open`` or ``open_files``, with full URLs,
54
+ there is no way to tell if a path is relative, so all paths are assumed
55
+ to be absolute.
56
+ """
57
+
58
+ protocol = "smb"
59
+
60
+ # pylint: disable=too-many-arguments
61
+ def __init__(
62
+ self,
63
+ host,
64
+ port=None,
65
+ username=None,
66
+ password=None,
67
+ timeout=60,
68
+ encrypt=None,
69
+ share_access=None,
70
+ **kwargs,
71
+ ):
72
+ """
73
+ You can use _get_kwargs_from_urls to get some kwargs from
74
+ a reasonable SMB url.
75
+
76
+ Authentication will be anonymous or integrated if username/password are not
77
+ given.
78
+
79
+ Parameters
80
+ ----------
81
+ host: str
82
+ The remote server name/ip to connect to
83
+ port: int or None
84
+ Port to connect with. Usually 445, sometimes 139.
85
+ username: str or None
86
+ Username to connect with. Required if Kerberos auth is not being used.
87
+ password: str or None
88
+ User's password on the server, if using username
89
+ timeout: int
90
+ Connection timeout in seconds
91
+ encrypt: bool
92
+ Whether to force encryption or not, once this has been set to True
93
+ the session cannot be changed back to False.
94
+ share_access: str or None
95
+ Specifies the default access applied to file open operations
96
+ performed with this file system object.
97
+ This affects whether other processes can concurrently open a handle
98
+ to the same file.
99
+
100
+ - None (the default): exclusively locks the file until closed.
101
+ - 'r': Allow other handles to be opened with read access.
102
+ - 'w': Allow other handles to be opened with write access.
103
+ - 'd': Allow other handles to be opened with delete access.
104
+ """
105
+ super().__init__(**kwargs)
106
+ self.host = host
107
+ self.port = port
108
+ self.username = username
109
+ self.password = password
110
+ self.timeout = timeout
111
+ self.encrypt = encrypt
112
+ self.temppath = kwargs.pop("temppath", "")
113
+ self.share_access = share_access
114
+ self._connect()
115
+
116
+ @property
117
+ def _port(self):
118
+ return 445 if self.port is None else self.port
119
+
120
+ def _connect(self):
121
+ smbclient.register_session(
122
+ self.host,
123
+ username=self.username,
124
+ password=self.password,
125
+ port=self._port,
126
+ encrypt=self.encrypt,
127
+ connection_timeout=self.timeout,
128
+ )
129
+
130
+ @classmethod
131
+ def _strip_protocol(cls, path):
132
+ return infer_storage_options(path)["path"]
133
+
134
+ @staticmethod
135
+ def _get_kwargs_from_urls(path):
136
+ # smb://workgroup;user:password@host:port/share/folder/file.csv
137
+ out = infer_storage_options(path)
138
+ out.pop("path", None)
139
+ out.pop("protocol", None)
140
+ return out
141
+
142
+ def mkdir(self, path, create_parents=True, **kwargs):
143
+ wpath = _as_unc_path(self.host, path)
144
+ if create_parents:
145
+ smbclient.makedirs(wpath, exist_ok=False, port=self._port, **kwargs)
146
+ else:
147
+ smbclient.mkdir(wpath, port=self._port, **kwargs)
148
+
149
+ def makedirs(self, path, exist_ok=False):
150
+ if _share_has_path(path):
151
+ wpath = _as_unc_path(self.host, path)
152
+ smbclient.makedirs(wpath, exist_ok=exist_ok, port=self._port)
153
+
154
+ def rmdir(self, path):
155
+ if _share_has_path(path):
156
+ wpath = _as_unc_path(self.host, path)
157
+ smbclient.rmdir(wpath, port=self._port)
158
+
159
+ def info(self, path, **kwargs):
160
+ wpath = _as_unc_path(self.host, path)
161
+ stats = smbclient.stat(wpath, port=self._port, **kwargs)
162
+ if S_ISDIR(stats.st_mode):
163
+ stype = "directory"
164
+ elif S_ISLNK(stats.st_mode):
165
+ stype = "link"
166
+ else:
167
+ stype = "file"
168
+ res = {
169
+ "name": path + "/" if stype == "directory" else path,
170
+ "size": stats.st_size,
171
+ "type": stype,
172
+ "uid": stats.st_uid,
173
+ "gid": stats.st_gid,
174
+ "time": stats.st_atime,
175
+ "mtime": stats.st_mtime,
176
+ }
177
+ return res
178
+
179
+ def created(self, path):
180
+ """Return the created timestamp of a file as a datetime.datetime"""
181
+ wpath = _as_unc_path(self.host, path)
182
+ stats = smbclient.stat(wpath, port=self._port)
183
+ return datetime.datetime.fromtimestamp(stats.st_ctime, tz=datetime.timezone.utc)
184
+
185
+ def modified(self, path):
186
+ """Return the modified timestamp of a file as a datetime.datetime"""
187
+ wpath = _as_unc_path(self.host, path)
188
+ stats = smbclient.stat(wpath, port=self._port)
189
+ return datetime.datetime.fromtimestamp(stats.st_mtime, tz=datetime.timezone.utc)
190
+
191
+ def ls(self, path, detail=True, **kwargs):
192
+ unc = _as_unc_path(self.host, path)
193
+ listed = smbclient.listdir(unc, port=self._port, **kwargs)
194
+ dirs = ["/".join([path.rstrip("/"), p]) for p in listed]
195
+ if detail:
196
+ dirs = [self.info(d) for d in dirs]
197
+ return dirs
198
+
199
+ # pylint: disable=too-many-arguments
200
+ def _open(
201
+ self,
202
+ path,
203
+ mode="rb",
204
+ block_size=-1,
205
+ autocommit=True,
206
+ cache_options=None,
207
+ **kwargs,
208
+ ):
209
+ """
210
+ block_size: int or None
211
+ If 0, no buffering, 1, line buffering, >1, buffer that many bytes
212
+
213
+ Notes
214
+ -----
215
+ By specifying 'share_access' in 'kwargs' it is possible to override the
216
+ default shared access setting applied in the constructor of this object.
217
+ """
218
+ bls = block_size if block_size is not None and block_size >= 0 else -1
219
+ wpath = _as_unc_path(self.host, path)
220
+ share_access = kwargs.pop("share_access", self.share_access)
221
+ if "w" in mode and autocommit is False:
222
+ temp = _as_temp_path(self.host, path, self.temppath)
223
+ return SMBFileOpener(
224
+ wpath, temp, mode, port=self._port, block_size=bls, **kwargs
225
+ )
226
+ return smbclient.open_file(
227
+ wpath,
228
+ mode,
229
+ buffering=bls,
230
+ share_access=share_access,
231
+ port=self._port,
232
+ **kwargs,
233
+ )
234
+
235
+ def copy(self, path1, path2, **kwargs):
236
+ """Copy within two locations in the same filesystem"""
237
+ wpath1 = _as_unc_path(self.host, path1)
238
+ wpath2 = _as_unc_path(self.host, path2)
239
+ smbclient.copyfile(wpath1, wpath2, port=self._port, **kwargs)
240
+
241
+ def _rm(self, path):
242
+ if _share_has_path(path):
243
+ wpath = _as_unc_path(self.host, path)
244
+ stats = smbclient.stat(wpath, port=self._port)
245
+ if S_ISDIR(stats.st_mode):
246
+ smbclient.rmdir(wpath, port=self._port)
247
+ else:
248
+ smbclient.remove(wpath, port=self._port)
249
+
250
+ def mv(self, path1, path2, recursive=None, maxdepth=None, **kwargs):
251
+ wpath1 = _as_unc_path(self.host, path1)
252
+ wpath2 = _as_unc_path(self.host, path2)
253
+ smbclient.rename(wpath1, wpath2, port=self._port, **kwargs)
254
+
255
+
256
+ def _as_unc_path(host, path):
257
+ rpath = path.replace("/", "\\")
258
+ unc = f"\\\\{host}{rpath}"
259
+ return unc
260
+
261
+
262
+ def _as_temp_path(host, path, temppath):
263
+ share = path.split("/")[1]
264
+ temp_file = f"/{share}{temppath}/{uuid.uuid4()}"
265
+ unc = _as_unc_path(host, temp_file)
266
+ return unc
267
+
268
+
269
+ def _share_has_path(path):
270
+ parts = path.count("/")
271
+ if path.endswith("/"):
272
+ return parts > 2
273
+ return parts > 1
274
+
275
+
276
+ class SMBFileOpener:
277
+ """writes to remote temporary file, move on commit"""
278
+
279
+ def __init__(self, path, temp, mode, port=445, block_size=-1, **kwargs):
280
+ self.path = path
281
+ self.temp = temp
282
+ self.mode = mode
283
+ self.block_size = block_size
284
+ self.kwargs = kwargs
285
+ self.smbfile = None
286
+ self._incontext = False
287
+ self.port = port
288
+ self._open()
289
+
290
+ def _open(self):
291
+ if self.smbfile is None or self.smbfile.closed:
292
+ self.smbfile = smbclient.open_file(
293
+ self.temp,
294
+ self.mode,
295
+ port=self.port,
296
+ buffering=self.block_size,
297
+ **self.kwargs,
298
+ )
299
+
300
+ def commit(self):
301
+ """Move temp file to definitive on success."""
302
+ # TODO: use transaction support in SMB protocol
303
+ smbclient.replace(self.temp, self.path, port=self.port)
304
+
305
+ def discard(self):
306
+ """Remove the temp file on failure."""
307
+ smbclient.remove(self.temp, port=self.port)
308
+
309
+ def __fspath__(self):
310
+ return self.path
311
+
312
+ def __iter__(self):
313
+ return self.smbfile.__iter__()
314
+
315
+ def __getattr__(self, item):
316
+ return getattr(self.smbfile, item)
317
+
318
+ def __enter__(self):
319
+ self._incontext = True
320
+ return self.smbfile.__enter__()
321
+
322
+ def __exit__(self, exc_type, exc_value, traceback):
323
+ self._incontext = False
324
+ self.smbfile.__exit__(exc_type, exc_value, traceback)
.venv/Lib/site-packages/fsspec/implementations/tar.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import tarfile
3
+
4
+ import fsspec
5
+ from fsspec.archive import AbstractArchiveFileSystem
6
+ from fsspec.compression import compr
7
+ from fsspec.utils import infer_compression
8
+
9
+ typemap = {b"0": "file", b"5": "directory"}
10
+
11
+ logger = logging.getLogger("tar")
12
+
13
+
14
+ class TarFileSystem(AbstractArchiveFileSystem):
15
+ """Compressed Tar archives as a file-system (read-only)
16
+
17
+ Supports the following formats:
18
+ tar.gz, tar.bz2, tar.xz
19
+ """
20
+
21
+ root_marker = ""
22
+ protocol = "tar"
23
+ cachable = False
24
+
25
+ def __init__(
26
+ self,
27
+ fo="",
28
+ index_store=None,
29
+ target_options=None,
30
+ target_protocol=None,
31
+ compression=None,
32
+ **kwargs,
33
+ ):
34
+ super().__init__(**kwargs)
35
+ target_options = target_options or {}
36
+
37
+ if isinstance(fo, str):
38
+ self.of = fsspec.open(fo, protocol=target_protocol, **target_options)
39
+ fo = self.of.open() # keep the reference
40
+
41
+ # Try to infer compression.
42
+ if compression is None:
43
+ name = None
44
+
45
+ # Try different ways to get hold of the filename. `fo` might either
46
+ # be a `fsspec.LocalFileOpener`, an `io.BufferedReader` or an
47
+ # `fsspec.AbstractFileSystem` instance.
48
+ try:
49
+ # Amended io.BufferedReader or similar.
50
+ # This uses a "protocol extension" where original filenames are
51
+ # propagated to archive-like filesystems in order to let them
52
+ # infer the right compression appropriately.
53
+ if hasattr(fo, "original"):
54
+ name = fo.original
55
+
56
+ # fsspec.LocalFileOpener
57
+ elif hasattr(fo, "path"):
58
+ name = fo.path
59
+
60
+ # io.BufferedReader
61
+ elif hasattr(fo, "name"):
62
+ name = fo.name
63
+
64
+ # fsspec.AbstractFileSystem
65
+ elif hasattr(fo, "info"):
66
+ name = fo.info()["name"]
67
+
68
+ except Exception as ex:
69
+ logger.warning(
70
+ f"Unable to determine file name, not inferring compression: {ex}"
71
+ )
72
+
73
+ if name is not None:
74
+ compression = infer_compression(name)
75
+ logger.info(f"Inferred compression {compression} from file name {name}")
76
+
77
+ if compression is not None:
78
+ # TODO: tarfile already implements compression with modes like "'r:gz'",
79
+ # but then would seek to offset in the file work?
80
+ fo = compr[compression](fo)
81
+
82
+ self._fo_ref = fo
83
+ self.fo = fo # the whole instance is a context
84
+ self.tar = tarfile.TarFile(fileobj=self.fo)
85
+ self.dir_cache = None
86
+
87
+ self.index_store = index_store
88
+ self.index = None
89
+ self._index()
90
+
91
+ def _index(self):
92
+ # TODO: load and set saved index, if exists
93
+ out = {}
94
+ for ti in self.tar:
95
+ info = ti.get_info()
96
+ info["type"] = typemap.get(info["type"], "file")
97
+ name = ti.get_info()["name"].rstrip("/")
98
+ out[name] = (info, ti.offset_data)
99
+
100
+ self.index = out
101
+ # TODO: save index to self.index_store here, if set
102
+
103
+ def _get_dirs(self):
104
+ if self.dir_cache is not None:
105
+ return
106
+
107
+ # This enables ls to get directories as children as well as files
108
+ self.dir_cache = {
109
+ dirname: {"name": dirname, "size": 0, "type": "directory"}
110
+ for dirname in self._all_dirnames(self.tar.getnames())
111
+ }
112
+ for member in self.tar.getmembers():
113
+ info = member.get_info()
114
+ info["name"] = info["name"].rstrip("/")
115
+ info["type"] = typemap.get(info["type"], "file")
116
+ self.dir_cache[info["name"]] = info
117
+
118
+ def _open(self, path, mode="rb", **kwargs):
119
+ if mode != "rb":
120
+ raise ValueError("Read-only filesystem implementation")
121
+ details, offset = self.index[path]
122
+ if details["type"] != "file":
123
+ raise ValueError("Can only handle regular files")
124
+ return self.tar.extractfile(path)
.venv/Lib/site-packages/fsspec/implementations/webhdfs.py ADDED
@@ -0,0 +1,486 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://hadoop.apache.org/docs/r1.0.4/webhdfs.html
2
+
3
+ import logging
4
+ import os
5
+ import secrets
6
+ import shutil
7
+ import tempfile
8
+ import uuid
9
+ from contextlib import suppress
10
+ from urllib.parse import quote
11
+
12
+ import requests
13
+
14
+ from ..spec import AbstractBufferedFile, AbstractFileSystem
15
+ from ..utils import infer_storage_options, tokenize
16
+
17
+ logger = logging.getLogger("webhdfs")
18
+
19
+
20
+ class WebHDFS(AbstractFileSystem):
21
+ """
22
+ Interface to HDFS over HTTP using the WebHDFS API. Supports also HttpFS gateways.
23
+
24
+ Four auth mechanisms are supported:
25
+
26
+ insecure: no auth is done, and the user is assumed to be whoever they
27
+ say they are (parameter ``user``), or a predefined value such as
28
+ "dr.who" if not given
29
+ spnego: when kerberos authentication is enabled, auth is negotiated by
30
+ requests_kerberos https://github.com/requests/requests-kerberos .
31
+ This establishes a session based on existing kinit login and/or
32
+ specified principal/password; parameters are passed with ``kerb_kwargs``
33
+ token: uses an existing Hadoop delegation token from another secured
34
+ service. Indeed, this client can also generate such tokens when
35
+ not insecure. Note that tokens expire, but can be renewed (by a
36
+ previously specified user) and may allow for proxying.
37
+ basic-auth: used when both parameter ``user`` and parameter ``password``
38
+ are provided.
39
+
40
+ """
41
+
42
+ tempdir = str(tempfile.gettempdir())
43
+ protocol = "webhdfs", "webHDFS"
44
+
45
+ def __init__(
46
+ self,
47
+ host,
48
+ port=50070,
49
+ kerberos=False,
50
+ token=None,
51
+ user=None,
52
+ password=None,
53
+ proxy_to=None,
54
+ kerb_kwargs=None,
55
+ data_proxy=None,
56
+ use_https=False,
57
+ session_cert=None,
58
+ session_verify=True,
59
+ **kwargs,
60
+ ):
61
+ """
62
+ Parameters
63
+ ----------
64
+ host: str
65
+ Name-node address
66
+ port: int
67
+ Port for webHDFS
68
+ kerberos: bool
69
+ Whether to authenticate with kerberos for this connection
70
+ token: str or None
71
+ If given, use this token on every call to authenticate. A user
72
+ and user-proxy may be encoded in the token and should not be also
73
+ given
74
+ user: str or None
75
+ If given, assert the user name to connect with
76
+ password: str or None
77
+ If given, assert the password to use for basic auth. If password
78
+ is provided, user must be provided also
79
+ proxy_to: str or None
80
+ If given, the user has the authority to proxy, and this value is
81
+ the user in who's name actions are taken
82
+ kerb_kwargs: dict
83
+ Any extra arguments for HTTPKerberosAuth, see
84
+ `<https://github.com/requests/requests-kerberos/blob/master/requests_kerberos/kerberos_.py>`_
85
+ data_proxy: dict, callable or None
86
+ If given, map data-node addresses. This can be necessary if the
87
+ HDFS cluster is behind a proxy, running on Docker or otherwise has
88
+ a mismatch between the host-names given by the name-node and the
89
+ address by which to refer to them from the client. If a dict,
90
+ maps host names ``host->data_proxy[host]``; if a callable, full
91
+ URLs are passed, and function must conform to
92
+ ``url->data_proxy(url)``.
93
+ use_https: bool
94
+ Whether to connect to the Name-node using HTTPS instead of HTTP
95
+ session_cert: str or Tuple[str, str] or None
96
+ Path to a certificate file, or tuple of (cert, key) files to use
97
+ for the requests.Session
98
+ session_verify: str, bool or None
99
+ Path to a certificate file to use for verifying the requests.Session.
100
+ kwargs
101
+ """
102
+ if self._cached:
103
+ return
104
+ super().__init__(**kwargs)
105
+ self.url = (
106
+ f"{'https' if use_https else 'http'}://{host}:{port}/webhdfs/v1" # noqa
107
+ )
108
+ self.kerb = kerberos
109
+ self.kerb_kwargs = kerb_kwargs or {}
110
+ self.pars = {}
111
+ self.proxy = data_proxy or {}
112
+ if token is not None:
113
+ if user is not None or proxy_to is not None:
114
+ raise ValueError(
115
+ "If passing a delegation token, must not set "
116
+ "user or proxy_to, as these are encoded in the"
117
+ " token"
118
+ )
119
+ self.pars["delegation"] = token
120
+ self.user = user
121
+ self.password = password
122
+
123
+ if password is not None:
124
+ if user is None:
125
+ raise ValueError(
126
+ "If passing a password, the user must also be"
127
+ "set in order to set up the basic-auth"
128
+ )
129
+ else:
130
+ if user is not None:
131
+ self.pars["user.name"] = user
132
+
133
+ if proxy_to is not None:
134
+ self.pars["doas"] = proxy_to
135
+ if kerberos and user is not None:
136
+ raise ValueError(
137
+ "If using Kerberos auth, do not specify the "
138
+ "user, this is handled by kinit."
139
+ )
140
+
141
+ self.session_cert = session_cert
142
+ self.session_verify = session_verify
143
+
144
+ self._connect()
145
+
146
+ self._fsid = f"webhdfs_{tokenize(host, port)}"
147
+
148
+ @property
149
+ def fsid(self):
150
+ return self._fsid
151
+
152
+ def _connect(self):
153
+ self.session = requests.Session()
154
+
155
+ if self.session_cert:
156
+ self.session.cert = self.session_cert
157
+
158
+ self.session.verify = self.session_verify
159
+
160
+ if self.kerb:
161
+ from requests_kerberos import HTTPKerberosAuth
162
+
163
+ self.session.auth = HTTPKerberosAuth(**self.kerb_kwargs)
164
+
165
+ if self.user is not None and self.password is not None:
166
+ from requests.auth import HTTPBasicAuth
167
+
168
+ self.session.auth = HTTPBasicAuth(self.user, self.password)
169
+
170
+ def _call(self, op, method="get", path=None, data=None, redirect=True, **kwargs):
171
+ url = self._apply_proxy(self.url + quote(path or "", safe="/="))
172
+ args = kwargs.copy()
173
+ args.update(self.pars)
174
+ args["op"] = op.upper()
175
+ logger.debug("sending %s with %s", url, method)
176
+ out = self.session.request(
177
+ method=method.upper(),
178
+ url=url,
179
+ params=args,
180
+ data=data,
181
+ allow_redirects=redirect,
182
+ )
183
+ if out.status_code in [400, 401, 403, 404, 500]:
184
+ try:
185
+ err = out.json()
186
+ msg = err["RemoteException"]["message"]
187
+ exp = err["RemoteException"]["exception"]
188
+ except (ValueError, KeyError):
189
+ pass
190
+ else:
191
+ if exp in ["IllegalArgumentException", "UnsupportedOperationException"]:
192
+ raise ValueError(msg)
193
+ elif exp in ["SecurityException", "AccessControlException"]:
194
+ raise PermissionError(msg)
195
+ elif exp in ["FileNotFoundException"]:
196
+ raise FileNotFoundError(msg)
197
+ else:
198
+ raise RuntimeError(msg)
199
+ out.raise_for_status()
200
+ return out
201
+
202
+ def _open(
203
+ self,
204
+ path,
205
+ mode="rb",
206
+ block_size=None,
207
+ autocommit=True,
208
+ replication=None,
209
+ permissions=None,
210
+ **kwargs,
211
+ ):
212
+ """
213
+
214
+ Parameters
215
+ ----------
216
+ path: str
217
+ File location
218
+ mode: str
219
+ 'rb', 'wb', etc.
220
+ block_size: int
221
+ Client buffer size for read-ahead or write buffer
222
+ autocommit: bool
223
+ If False, writes to temporary file that only gets put in final
224
+ location upon commit
225
+ replication: int
226
+ Number of copies of file on the cluster, write mode only
227
+ permissions: str or int
228
+ posix permissions, write mode only
229
+ kwargs
230
+
231
+ Returns
232
+ -------
233
+ WebHDFile instance
234
+ """
235
+ block_size = block_size or self.blocksize
236
+ return WebHDFile(
237
+ self,
238
+ path,
239
+ mode=mode,
240
+ block_size=block_size,
241
+ tempdir=self.tempdir,
242
+ autocommit=autocommit,
243
+ replication=replication,
244
+ permissions=permissions,
245
+ )
246
+
247
+ @staticmethod
248
+ def _process_info(info):
249
+ info["type"] = info["type"].lower()
250
+ info["size"] = info["length"]
251
+ return info
252
+
253
+ @classmethod
254
+ def _strip_protocol(cls, path):
255
+ return infer_storage_options(path)["path"]
256
+
257
+ @staticmethod
258
+ def _get_kwargs_from_urls(urlpath):
259
+ out = infer_storage_options(urlpath)
260
+ out.pop("path", None)
261
+ out.pop("protocol", None)
262
+ if "username" in out:
263
+ out["user"] = out.pop("username")
264
+ return out
265
+
266
+ def info(self, path):
267
+ out = self._call("GETFILESTATUS", path=path)
268
+ info = out.json()["FileStatus"]
269
+ info["name"] = path
270
+ return self._process_info(info)
271
+
272
+ def ls(self, path, detail=False):
273
+ out = self._call("LISTSTATUS", path=path)
274
+ infos = out.json()["FileStatuses"]["FileStatus"]
275
+ for info in infos:
276
+ self._process_info(info)
277
+ info["name"] = path.rstrip("/") + "/" + info["pathSuffix"]
278
+ if detail:
279
+ return sorted(infos, key=lambda i: i["name"])
280
+ else:
281
+ return sorted(info["name"] for info in infos)
282
+
283
+ def content_summary(self, path):
284
+ """Total numbers of files, directories and bytes under path"""
285
+ out = self._call("GETCONTENTSUMMARY", path=path)
286
+ return out.json()["ContentSummary"]
287
+
288
+ def ukey(self, path):
289
+ """Checksum info of file, giving method and result"""
290
+ out = self._call("GETFILECHECKSUM", path=path, redirect=False)
291
+ if "Location" in out.headers:
292
+ location = self._apply_proxy(out.headers["Location"])
293
+ out2 = self.session.get(location)
294
+ out2.raise_for_status()
295
+ return out2.json()["FileChecksum"]
296
+ else:
297
+ out.raise_for_status()
298
+ return out.json()["FileChecksum"]
299
+
300
+ def home_directory(self):
301
+ """Get user's home directory"""
302
+ out = self._call("GETHOMEDIRECTORY")
303
+ return out.json()["Path"]
304
+
305
+ def get_delegation_token(self, renewer=None):
306
+ """Retrieve token which can give the same authority to other uses
307
+
308
+ Parameters
309
+ ----------
310
+ renewer: str or None
311
+ User who may use this token; if None, will be current user
312
+ """
313
+ if renewer:
314
+ out = self._call("GETDELEGATIONTOKEN", renewer=renewer)
315
+ else:
316
+ out = self._call("GETDELEGATIONTOKEN")
317
+ t = out.json()["Token"]
318
+ if t is None:
319
+ raise ValueError("No token available for this user/security context")
320
+ return t["urlString"]
321
+
322
+ def renew_delegation_token(self, token):
323
+ """Make token live longer. Returns new expiry time"""
324
+ out = self._call("RENEWDELEGATIONTOKEN", method="put", token=token)
325
+ return out.json()["long"]
326
+
327
+ def cancel_delegation_token(self, token):
328
+ """Stop the token from being useful"""
329
+ self._call("CANCELDELEGATIONTOKEN", method="put", token=token)
330
+
331
+ def chmod(self, path, mod):
332
+ """Set the permission at path
333
+
334
+ Parameters
335
+ ----------
336
+ path: str
337
+ location to set (file or directory)
338
+ mod: str or int
339
+ posix epresentation or permission, give as oct string, e.g, '777'
340
+ or 0o777
341
+ """
342
+ self._call("SETPERMISSION", method="put", path=path, permission=mod)
343
+
344
+ def chown(self, path, owner=None, group=None):
345
+ """Change owning user and/or group"""
346
+ kwargs = {}
347
+ if owner is not None:
348
+ kwargs["owner"] = owner
349
+ if group is not None:
350
+ kwargs["group"] = group
351
+ self._call("SETOWNER", method="put", path=path, **kwargs)
352
+
353
+ def set_replication(self, path, replication):
354
+ """
355
+ Set file replication factor
356
+
357
+ Parameters
358
+ ----------
359
+ path: str
360
+ File location (not for directories)
361
+ replication: int
362
+ Number of copies of file on the cluster. Should be smaller than
363
+ number of data nodes; normally 3 on most systems.
364
+ """
365
+ self._call("SETREPLICATION", path=path, method="put", replication=replication)
366
+
367
+ def mkdir(self, path, **kwargs):
368
+ self._call("MKDIRS", method="put", path=path)
369
+
370
+ def makedirs(self, path, exist_ok=False):
371
+ if exist_ok is False and self.exists(path):
372
+ raise FileExistsError(path)
373
+ self.mkdir(path)
374
+
375
+ def mv(self, path1, path2, **kwargs):
376
+ self._call("RENAME", method="put", path=path1, destination=path2)
377
+
378
+ def rm(self, path, recursive=False, **kwargs):
379
+ self._call(
380
+ "DELETE",
381
+ method="delete",
382
+ path=path,
383
+ recursive="true" if recursive else "false",
384
+ )
385
+
386
+ def rm_file(self, path, **kwargs):
387
+ self.rm(path)
388
+
389
+ def cp_file(self, lpath, rpath, **kwargs):
390
+ with self.open(lpath) as lstream:
391
+ tmp_fname = "/".join([self._parent(rpath), f".tmp.{secrets.token_hex(16)}"])
392
+ # Perform an atomic copy (stream to a temporary file and
393
+ # move it to the actual destination).
394
+ try:
395
+ with self.open(tmp_fname, "wb") as rstream:
396
+ shutil.copyfileobj(lstream, rstream)
397
+ self.mv(tmp_fname, rpath)
398
+ except BaseException: # noqa
399
+ with suppress(FileNotFoundError):
400
+ self.rm(tmp_fname)
401
+ raise
402
+
403
+ def _apply_proxy(self, location):
404
+ if self.proxy and callable(self.proxy):
405
+ location = self.proxy(location)
406
+ elif self.proxy:
407
+ # as a dict
408
+ for k, v in self.proxy.items():
409
+ location = location.replace(k, v, 1)
410
+ return location
411
+
412
+
413
+ class WebHDFile(AbstractBufferedFile):
414
+ """A file living in HDFS over webHDFS"""
415
+
416
+ def __init__(self, fs, path, **kwargs):
417
+ super().__init__(fs, path, **kwargs)
418
+ kwargs = kwargs.copy()
419
+ if kwargs.get("permissions", None) is None:
420
+ kwargs.pop("permissions", None)
421
+ if kwargs.get("replication", None) is None:
422
+ kwargs.pop("replication", None)
423
+ self.permissions = kwargs.pop("permissions", 511)
424
+ tempdir = kwargs.pop("tempdir")
425
+ if kwargs.pop("autocommit", False) is False:
426
+ self.target = self.path
427
+ self.path = os.path.join(tempdir, str(uuid.uuid4()))
428
+
429
+ def _upload_chunk(self, final=False):
430
+ """Write one part of a multi-block file upload
431
+
432
+ Parameters
433
+ ==========
434
+ final: bool
435
+ This is the last block, so should complete file, if
436
+ self.autocommit is True.
437
+ """
438
+ out = self.fs.session.post(
439
+ self.location,
440
+ data=self.buffer.getvalue(),
441
+ headers={"content-type": "application/octet-stream"},
442
+ )
443
+ out.raise_for_status()
444
+ return True
445
+
446
+ def _initiate_upload(self):
447
+ """Create remote file/upload"""
448
+ kwargs = self.kwargs.copy()
449
+ if "a" in self.mode:
450
+ op, method = "APPEND", "POST"
451
+ else:
452
+ op, method = "CREATE", "PUT"
453
+ kwargs["overwrite"] = "true"
454
+ out = self.fs._call(op, method, self.path, redirect=False, **kwargs)
455
+ location = self.fs._apply_proxy(out.headers["Location"])
456
+ if "w" in self.mode:
457
+ # create empty file to append to
458
+ out2 = self.fs.session.put(
459
+ location, headers={"content-type": "application/octet-stream"}
460
+ )
461
+ out2.raise_for_status()
462
+ # after creating empty file, change location to append to
463
+ out2 = self.fs._call("APPEND", "POST", self.path, redirect=False, **kwargs)
464
+ self.location = self.fs._apply_proxy(out2.headers["Location"])
465
+
466
+ def _fetch_range(self, start, end):
467
+ start = max(start, 0)
468
+ end = min(self.size, end)
469
+ if start >= end or start >= self.size:
470
+ return b""
471
+ out = self.fs._call(
472
+ "OPEN", path=self.path, offset=start, length=end - start, redirect=False
473
+ )
474
+ out.raise_for_status()
475
+ if "Location" in out.headers:
476
+ location = out.headers["Location"]
477
+ out2 = self.fs.session.get(self.fs._apply_proxy(location))
478
+ return out2.content
479
+ else:
480
+ return out.content
481
+
482
+ def commit(self):
483
+ self.fs.mv(self.path, self.target)
484
+
485
+ def discard(self):
486
+ self.fs.rm(self.path)
.venv/Lib/site-packages/fsspec/implementations/zip.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import zipfile
2
+
3
+ import fsspec
4
+ from fsspec.archive import AbstractArchiveFileSystem
5
+
6
+
7
+ class ZipFileSystem(AbstractArchiveFileSystem):
8
+ """Read/Write contents of ZIP archive as a file-system
9
+
10
+ Keeps file object open while instance lives.
11
+
12
+ This class is pickleable, but not necessarily thread-safe
13
+ """
14
+
15
+ root_marker = ""
16
+ protocol = "zip"
17
+ cachable = False
18
+
19
+ def __init__(
20
+ self,
21
+ fo="",
22
+ mode="r",
23
+ target_protocol=None,
24
+ target_options=None,
25
+ compression=zipfile.ZIP_STORED,
26
+ allowZip64=True,
27
+ compresslevel=None,
28
+ **kwargs,
29
+ ):
30
+ """
31
+ Parameters
32
+ ----------
33
+ fo: str or file-like
34
+ Contains ZIP, and must exist. If a str, will fetch file using
35
+ :meth:`~fsspec.open_files`, which must return one file exactly.
36
+ mode: str
37
+ Accept: "r", "w", "a"
38
+ target_protocol: str (optional)
39
+ If ``fo`` is a string, this value can be used to override the
40
+ FS protocol inferred from a URL
41
+ target_options: dict (optional)
42
+ Kwargs passed when instantiating the target FS, if ``fo`` is
43
+ a string.
44
+ compression, allowZip64, compresslevel: passed to ZipFile
45
+ Only relevant when creating a ZIP
46
+ """
47
+ super().__init__(self, **kwargs)
48
+ if mode not in set("rwa"):
49
+ raise ValueError(f"mode '{mode}' no understood")
50
+ self.mode = mode
51
+ if isinstance(fo, str):
52
+ if mode == "a":
53
+ m = "r+b"
54
+ else:
55
+ m = mode + "b"
56
+ fo = fsspec.open(
57
+ fo, mode=m, protocol=target_protocol, **(target_options or {})
58
+ )
59
+ self.of = fo
60
+ self.fo = fo.__enter__() # the whole instance is a context
61
+ self.zip = zipfile.ZipFile(
62
+ self.fo,
63
+ mode=mode,
64
+ compression=compression,
65
+ allowZip64=allowZip64,
66
+ compresslevel=compresslevel,
67
+ )
68
+ self.dir_cache = None
69
+
70
+ @classmethod
71
+ def _strip_protocol(cls, path):
72
+ # zip file paths are always relative to the archive root
73
+ return super()._strip_protocol(path).lstrip("/")
74
+
75
+ def __del__(self):
76
+ if hasattr(self, "zip"):
77
+ self.close()
78
+ del self.zip
79
+
80
+ def close(self):
81
+ """Commits any write changes to the file. Done on ``del`` too."""
82
+ self.zip.close()
83
+
84
+ def _get_dirs(self):
85
+ if self.dir_cache is None or self.mode in set("wa"):
86
+ # when writing, dir_cache is always in the ZipFile's attributes,
87
+ # not read from the file.
88
+ files = self.zip.infolist()
89
+ self.dir_cache = {
90
+ dirname.rstrip("/"): {
91
+ "name": dirname.rstrip("/"),
92
+ "size": 0,
93
+ "type": "directory",
94
+ }
95
+ for dirname in self._all_dirnames(self.zip.namelist())
96
+ }
97
+ for z in files:
98
+ f = {s: getattr(z, s, None) for s in zipfile.ZipInfo.__slots__}
99
+ f.update(
100
+ {
101
+ "name": z.filename.rstrip("/"),
102
+ "size": z.file_size,
103
+ "type": ("directory" if z.is_dir() else "file"),
104
+ }
105
+ )
106
+ self.dir_cache[f["name"]] = f
107
+
108
+ def pipe_file(self, path, value, **kwargs):
109
+ # override upstream, because we know the exact file size in this case
110
+ self.zip.writestr(path, value, **kwargs)
111
+
112
+ def _open(
113
+ self,
114
+ path,
115
+ mode="rb",
116
+ block_size=None,
117
+ autocommit=True,
118
+ cache_options=None,
119
+ **kwargs,
120
+ ):
121
+ path = self._strip_protocol(path)
122
+ if "r" in mode and self.mode in set("wa"):
123
+ if self.exists(path):
124
+ raise OSError("ZipFS can only be open for reading or writing, not both")
125
+ raise FileNotFoundError(path)
126
+ if "r" in self.mode and "w" in mode:
127
+ raise OSError("ZipFS can only be open for reading or writing, not both")
128
+ out = self.zip.open(path, mode.strip("b"))
129
+ if "r" in mode:
130
+ info = self.info(path)
131
+ out.size = info["size"]
132
+ out.name = info["name"]
133
+ return out
.venv/Lib/site-packages/fsspec/tests/abstract/__init__.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from hashlib import md5
3
+
4
+ import pytest
5
+
6
+ from fsspec.implementations.local import LocalFileSystem
7
+ from fsspec.tests.abstract.copy import AbstractCopyTests # noqa
8
+ from fsspec.tests.abstract.get import AbstractGetTests # noqa
9
+ from fsspec.tests.abstract.put import AbstractPutTests # noqa
10
+
11
+
12
+ class BaseAbstractFixtures:
13
+ """
14
+ Abstract base class containing fixtures that are used by but never need to
15
+ be overridden in derived filesystem-specific classes to run the abstract
16
+ tests on such filesystems.
17
+ """
18
+
19
+ @pytest.fixture
20
+ def fs_bulk_operations_scenario_0(self, fs, fs_join, fs_path):
21
+ """
22
+ Scenario on remote filesystem that is used for many cp/get/put tests.
23
+
24
+ Cleans up at the end of each test it which it is used.
25
+ """
26
+ source = self._bulk_operations_scenario_0(fs, fs_join, fs_path)
27
+ yield source
28
+ fs.rm(source, recursive=True)
29
+
30
+ @pytest.fixture
31
+ def fs_glob_edge_cases_files(self, fs, fs_join, fs_path):
32
+ """
33
+ Scenario on remote filesystem that is used for glob edge cases cp/get/put tests.
34
+
35
+ Cleans up at the end of each test it which it is used.
36
+ """
37
+ source = self._glob_edge_cases_files(fs, fs_join, fs_path)
38
+ yield source
39
+ fs.rm(source, recursive=True)
40
+
41
+ @pytest.fixture
42
+ def fs_dir_and_file_with_same_name_prefix(self, fs, fs_join, fs_path):
43
+ """
44
+ Scenario on remote filesystem that is used to check cp/get/put on directory
45
+ and file with the same name prefixes.
46
+
47
+ Cleans up at the end of each test it which it is used.
48
+ """
49
+ source = self._dir_and_file_with_same_name_prefix(fs, fs_join, fs_path)
50
+ yield source
51
+ fs.rm(source, recursive=True)
52
+
53
+ @pytest.fixture
54
+ def fs_10_files_with_hashed_names(self, fs, fs_join, fs_path):
55
+ """
56
+ Scenario on remote filesystem that is used to check cp/get/put files order
57
+ when source and destination are lists.
58
+
59
+ Cleans up at the end of each test it which it is used.
60
+ """
61
+ source = self._10_files_with_hashed_names(fs, fs_join, fs_path)
62
+ yield source
63
+ fs.rm(source, recursive=True)
64
+
65
+ @pytest.fixture
66
+ def fs_target(self, fs, fs_join, fs_path):
67
+ """
68
+ Return name of remote directory that does not yet exist to copy into.
69
+
70
+ Cleans up at the end of each test it which it is used.
71
+ """
72
+ target = fs_join(fs_path, "target")
73
+ yield target
74
+ if fs.exists(target):
75
+ fs.rm(target, recursive=True)
76
+
77
+ @pytest.fixture
78
+ def local_bulk_operations_scenario_0(self, local_fs, local_join, local_path):
79
+ """
80
+ Scenario on local filesystem that is used for many cp/get/put tests.
81
+
82
+ Cleans up at the end of each test it which it is used.
83
+ """
84
+ source = self._bulk_operations_scenario_0(local_fs, local_join, local_path)
85
+ yield source
86
+ local_fs.rm(source, recursive=True)
87
+
88
+ @pytest.fixture
89
+ def local_glob_edge_cases_files(self, local_fs, local_join, local_path):
90
+ """
91
+ Scenario on local filesystem that is used for glob edge cases cp/get/put tests.
92
+
93
+ Cleans up at the end of each test it which it is used.
94
+ """
95
+ source = self._glob_edge_cases_files(local_fs, local_join, local_path)
96
+ yield source
97
+ local_fs.rm(source, recursive=True)
98
+
99
+ @pytest.fixture
100
+ def local_dir_and_file_with_same_name_prefix(
101
+ self, local_fs, local_join, local_path
102
+ ):
103
+ """
104
+ Scenario on local filesystem that is used to check cp/get/put on directory
105
+ and file with the same name prefixes.
106
+
107
+ Cleans up at the end of each test it which it is used.
108
+ """
109
+ source = self._dir_and_file_with_same_name_prefix(
110
+ local_fs, local_join, local_path
111
+ )
112
+ yield source
113
+ local_fs.rm(source, recursive=True)
114
+
115
+ @pytest.fixture
116
+ def local_10_files_with_hashed_names(self, local_fs, local_join, local_path):
117
+ """
118
+ Scenario on local filesystem that is used to check cp/get/put files order
119
+ when source and destination are lists.
120
+
121
+ Cleans up at the end of each test it which it is used.
122
+ """
123
+ source = self._10_files_with_hashed_names(local_fs, local_join, local_path)
124
+ yield source
125
+ local_fs.rm(source, recursive=True)
126
+
127
+ @pytest.fixture
128
+ def local_target(self, local_fs, local_join, local_path):
129
+ """
130
+ Return name of local directory that does not yet exist to copy into.
131
+
132
+ Cleans up at the end of each test it which it is used.
133
+ """
134
+ target = local_join(local_path, "target")
135
+ yield target
136
+ if local_fs.exists(target):
137
+ local_fs.rm(target, recursive=True)
138
+
139
+ def _glob_edge_cases_files(self, some_fs, some_join, some_path):
140
+ """
141
+ Scenario that is used for glob edge cases cp/get/put tests.
142
+ Creates the following directory and file structure:
143
+
144
+ 📁 source
145
+ ├── 📄 file1
146
+ ├── 📄 file2
147
+ ├── 📁 subdir0
148
+ │ ├── 📄 subfile1
149
+ │ ├── 📄 subfile2
150
+ │ └── 📁 nesteddir
151
+ │ └── 📄 nestedfile
152
+ └── 📁 subdir1
153
+ ├── 📄 subfile1
154
+ ├── 📄 subfile2
155
+ └── 📁 nesteddir
156
+ └── 📄 nestedfile
157
+ """
158
+ source = some_join(some_path, "source")
159
+ some_fs.touch(some_join(source, "file1"))
160
+ some_fs.touch(some_join(source, "file2"))
161
+
162
+ for subdir_idx in range(2):
163
+ subdir = some_join(source, f"subdir{subdir_idx}")
164
+ nesteddir = some_join(subdir, "nesteddir")
165
+ some_fs.makedirs(nesteddir)
166
+ some_fs.touch(some_join(subdir, "subfile1"))
167
+ some_fs.touch(some_join(subdir, "subfile2"))
168
+ some_fs.touch(some_join(nesteddir, "nestedfile"))
169
+
170
+ return source
171
+
172
+ def _bulk_operations_scenario_0(self, some_fs, some_join, some_path):
173
+ """
174
+ Scenario that is used for many cp/get/put tests. Creates the following
175
+ directory and file structure:
176
+
177
+ 📁 source
178
+ ├── 📄 file1
179
+ ├── 📄 file2
180
+ └── 📁 subdir
181
+ ├── 📄 subfile1
182
+ ├── 📄 subfile2
183
+ └── 📁 nesteddir
184
+ └── 📄 nestedfile
185
+ """
186
+ source = some_join(some_path, "source")
187
+ subdir = some_join(source, "subdir")
188
+ nesteddir = some_join(subdir, "nesteddir")
189
+ some_fs.makedirs(nesteddir)
190
+ some_fs.touch(some_join(source, "file1"))
191
+ some_fs.touch(some_join(source, "file2"))
192
+ some_fs.touch(some_join(subdir, "subfile1"))
193
+ some_fs.touch(some_join(subdir, "subfile2"))
194
+ some_fs.touch(some_join(nesteddir, "nestedfile"))
195
+ return source
196
+
197
+ def _dir_and_file_with_same_name_prefix(self, some_fs, some_join, some_path):
198
+ """
199
+ Scenario that is used to check cp/get/put on directory and file with
200
+ the same name prefixes. Creates the following directory and file structure:
201
+
202
+ 📁 source
203
+ ├── 📄 subdir.txt
204
+ └── 📁 subdir
205
+ └── 📄 subfile.txt
206
+ """
207
+ source = some_join(some_path, "source")
208
+ subdir = some_join(source, "subdir")
209
+ file = some_join(source, "subdir.txt")
210
+ subfile = some_join(subdir, "subfile.txt")
211
+ some_fs.makedirs(subdir)
212
+ some_fs.touch(file)
213
+ some_fs.touch(subfile)
214
+ return source
215
+
216
+ def _10_files_with_hashed_names(self, some_fs, some_join, some_path):
217
+ """
218
+ Scenario that is used to check cp/get/put files order when source and
219
+ destination are lists. Creates the following directory and file structure:
220
+
221
+ 📁 source
222
+ └── 📄 {hashed([0-9])}.txt
223
+ """
224
+ source = some_join(some_path, "source")
225
+ for i in range(10):
226
+ hashed_i = md5(str(i).encode("utf-8")).hexdigest()
227
+ path = some_join(source, f"{hashed_i}.txt")
228
+ some_fs.pipe(path=path, value=f"{i}".encode("utf-8"))
229
+ return source
230
+
231
+
232
+ class AbstractFixtures(BaseAbstractFixtures):
233
+ """
234
+ Abstract base class containing fixtures that may be overridden in derived
235
+ filesystem-specific classes to run the abstract tests on such filesystems.
236
+
237
+ For any particular filesystem some of these fixtures must be overridden,
238
+ such as ``fs`` and ``fs_path``, and others may be overridden if the
239
+ default functions here are not appropriate, such as ``fs_join``.
240
+ """
241
+
242
+ @pytest.fixture
243
+ def fs(self):
244
+ raise NotImplementedError("This function must be overridden in derived classes")
245
+
246
+ @pytest.fixture
247
+ def fs_join(self):
248
+ """
249
+ Return a function that joins its arguments together into a path.
250
+
251
+ Most fsspec implementations join paths in a platform-dependent way,
252
+ but some will override this to always use a forward slash.
253
+ """
254
+ return os.path.join
255
+
256
+ @pytest.fixture
257
+ def fs_path(self):
258
+ raise NotImplementedError("This function must be overridden in derived classes")
259
+
260
+ @pytest.fixture(scope="class")
261
+ def local_fs(self):
262
+ # Maybe need an option for auto_mkdir=False? This is only relevant
263
+ # for certain implementations.
264
+ return LocalFileSystem(auto_mkdir=True)
265
+
266
+ @pytest.fixture
267
+ def local_join(self):
268
+ """
269
+ Return a function that joins its arguments together into a path, on
270
+ the local filesystem.
271
+ """
272
+ return os.path.join
273
+
274
+ @pytest.fixture
275
+ def local_path(self, tmpdir):
276
+ return tmpdir
277
+
278
+ @pytest.fixture
279
+ def supports_empty_directories(self):
280
+ """
281
+ Return whether this implementation supports empty directories.
282
+ """
283
+ return True
284
+
285
+ @pytest.fixture
286
+ def fs_sanitize_path(self):
287
+ return lambda x: x
.venv/Lib/site-packages/fsspec/tests/abstract/common.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GLOB_EDGE_CASES_TESTS = {
2
+ "argnames": ("path", "recursive", "maxdepth", "expected"),
3
+ "argvalues": [
4
+ ("fil?1", False, None, ["file1"]),
5
+ ("fil?1", True, None, ["file1"]),
6
+ ("file[1-2]", False, None, ["file1", "file2"]),
7
+ ("file[1-2]", True, None, ["file1", "file2"]),
8
+ ("*", False, None, ["file1", "file2"]),
9
+ (
10
+ "*",
11
+ True,
12
+ None,
13
+ [
14
+ "file1",
15
+ "file2",
16
+ "subdir0/subfile1",
17
+ "subdir0/subfile2",
18
+ "subdir0/nesteddir/nestedfile",
19
+ "subdir1/subfile1",
20
+ "subdir1/subfile2",
21
+ "subdir1/nesteddir/nestedfile",
22
+ ],
23
+ ),
24
+ ("*", True, 1, ["file1", "file2"]),
25
+ (
26
+ "*",
27
+ True,
28
+ 2,
29
+ [
30
+ "file1",
31
+ "file2",
32
+ "subdir0/subfile1",
33
+ "subdir0/subfile2",
34
+ "subdir1/subfile1",
35
+ "subdir1/subfile2",
36
+ ],
37
+ ),
38
+ ("*1", False, None, ["file1"]),
39
+ (
40
+ "*1",
41
+ True,
42
+ None,
43
+ [
44
+ "file1",
45
+ "subdir1/subfile1",
46
+ "subdir1/subfile2",
47
+ "subdir1/nesteddir/nestedfile",
48
+ ],
49
+ ),
50
+ ("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]),
51
+ (
52
+ "**",
53
+ False,
54
+ None,
55
+ [
56
+ "file1",
57
+ "file2",
58
+ "subdir0/subfile1",
59
+ "subdir0/subfile2",
60
+ "subdir0/nesteddir/nestedfile",
61
+ "subdir1/subfile1",
62
+ "subdir1/subfile2",
63
+ "subdir1/nesteddir/nestedfile",
64
+ ],
65
+ ),
66
+ (
67
+ "**",
68
+ True,
69
+ None,
70
+ [
71
+ "file1",
72
+ "file2",
73
+ "subdir0/subfile1",
74
+ "subdir0/subfile2",
75
+ "subdir0/nesteddir/nestedfile",
76
+ "subdir1/subfile1",
77
+ "subdir1/subfile2",
78
+ "subdir1/nesteddir/nestedfile",
79
+ ],
80
+ ),
81
+ ("**", True, 1, ["file1", "file2"]),
82
+ (
83
+ "**",
84
+ True,
85
+ 2,
86
+ [
87
+ "file1",
88
+ "file2",
89
+ "subdir0/subfile1",
90
+ "subdir0/subfile2",
91
+ "subdir0/nesteddir/nestedfile",
92
+ "subdir1/subfile1",
93
+ "subdir1/subfile2",
94
+ "subdir1/nesteddir/nestedfile",
95
+ ],
96
+ ),
97
+ (
98
+ "**",
99
+ False,
100
+ 2,
101
+ [
102
+ "file1",
103
+ "file2",
104
+ "subdir0/subfile1",
105
+ "subdir0/subfile2",
106
+ "subdir1/subfile1",
107
+ "subdir1/subfile2",
108
+ ],
109
+ ),
110
+ ("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
111
+ (
112
+ "**/*1",
113
+ True,
114
+ None,
115
+ [
116
+ "file1",
117
+ "subdir0/subfile1",
118
+ "subdir1/subfile1",
119
+ "subdir1/subfile2",
120
+ "subdir1/nesteddir/nestedfile",
121
+ ],
122
+ ),
123
+ ("**/*1", True, 1, ["file1"]),
124
+ (
125
+ "**/*1",
126
+ True,
127
+ 2,
128
+ ["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
129
+ ),
130
+ ("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
131
+ ("**/subdir0", False, None, []),
132
+ ("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
133
+ ("**/subdir0/nested*", False, 2, []),
134
+ ("**/subdir0/nested*", True, 2, ["nestedfile"]),
135
+ ("subdir[1-2]", False, None, []),
136
+ ("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
137
+ ("subdir[1-2]", True, 2, ["subfile1", "subfile2"]),
138
+ ("subdir[0-1]", False, None, []),
139
+ (
140
+ "subdir[0-1]",
141
+ True,
142
+ None,
143
+ [
144
+ "subdir0/subfile1",
145
+ "subdir0/subfile2",
146
+ "subdir0/nesteddir/nestedfile",
147
+ "subdir1/subfile1",
148
+ "subdir1/subfile2",
149
+ "subdir1/nesteddir/nestedfile",
150
+ ],
151
+ ),
152
+ (
153
+ "subdir[0-1]/*fil[e]*",
154
+ False,
155
+ None,
156
+ [
157
+ "subdir0/subfile1",
158
+ "subdir0/subfile2",
159
+ "subdir1/subfile1",
160
+ "subdir1/subfile2",
161
+ ],
162
+ ),
163
+ (
164
+ "subdir[0-1]/*fil[e]*",
165
+ True,
166
+ None,
167
+ [
168
+ "subdir0/subfile1",
169
+ "subdir0/subfile2",
170
+ "subdir1/subfile1",
171
+ "subdir1/subfile2",
172
+ ],
173
+ ),
174
+ ],
175
+ }
.venv/Lib/site-packages/fsspec/tests/abstract/copy.py ADDED
@@ -0,0 +1,557 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from hashlib import md5
2
+ from itertools import product
3
+
4
+ import pytest
5
+
6
+ from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
7
+
8
+
9
+ class AbstractCopyTests:
10
+ def test_copy_file_to_existing_directory(
11
+ self,
12
+ fs,
13
+ fs_join,
14
+ fs_bulk_operations_scenario_0,
15
+ fs_target,
16
+ supports_empty_directories,
17
+ ):
18
+ # Copy scenario 1a
19
+ source = fs_bulk_operations_scenario_0
20
+
21
+ target = fs_target
22
+ fs.mkdir(target)
23
+ if not supports_empty_directories:
24
+ # Force target directory to exist by adding a dummy file
25
+ fs.touch(fs_join(target, "dummy"))
26
+ assert fs.isdir(target)
27
+
28
+ target_file2 = fs_join(target, "file2")
29
+ target_subfile1 = fs_join(target, "subfile1")
30
+
31
+ # Copy from source directory
32
+ fs.cp(fs_join(source, "file2"), target)
33
+ assert fs.isfile(target_file2)
34
+
35
+ # Copy from sub directory
36
+ fs.cp(fs_join(source, "subdir", "subfile1"), target)
37
+ assert fs.isfile(target_subfile1)
38
+
39
+ # Remove copied files
40
+ fs.rm([target_file2, target_subfile1])
41
+ assert not fs.exists(target_file2)
42
+ assert not fs.exists(target_subfile1)
43
+
44
+ # Repeat with trailing slash on target
45
+ fs.cp(fs_join(source, "file2"), target + "/")
46
+ assert fs.isdir(target)
47
+ assert fs.isfile(target_file2)
48
+
49
+ fs.cp(fs_join(source, "subdir", "subfile1"), target + "/")
50
+ assert fs.isfile(target_subfile1)
51
+
52
+ def test_copy_file_to_new_directory(
53
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
54
+ ):
55
+ # Copy scenario 1b
56
+ source = fs_bulk_operations_scenario_0
57
+
58
+ target = fs_target
59
+ fs.mkdir(target)
60
+
61
+ fs.cp(
62
+ fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
63
+ ) # Note trailing slash
64
+ assert fs.isdir(target)
65
+ assert fs.isdir(fs_join(target, "newdir"))
66
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
67
+
68
+ def test_copy_file_to_file_in_existing_directory(
69
+ self,
70
+ fs,
71
+ fs_join,
72
+ fs_bulk_operations_scenario_0,
73
+ fs_target,
74
+ supports_empty_directories,
75
+ ):
76
+ # Copy scenario 1c
77
+ source = fs_bulk_operations_scenario_0
78
+
79
+ target = fs_target
80
+ fs.mkdir(target)
81
+ if not supports_empty_directories:
82
+ # Force target directory to exist by adding a dummy file
83
+ fs.touch(fs_join(target, "dummy"))
84
+ assert fs.isdir(target)
85
+
86
+ fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
87
+ assert fs.isfile(fs_join(target, "newfile"))
88
+
89
+ def test_copy_file_to_file_in_new_directory(
90
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
91
+ ):
92
+ # Copy scenario 1d
93
+ source = fs_bulk_operations_scenario_0
94
+
95
+ target = fs_target
96
+ fs.mkdir(target)
97
+
98
+ fs.cp(
99
+ fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir", "newfile")
100
+ )
101
+ assert fs.isdir(fs_join(target, "newdir"))
102
+ assert fs.isfile(fs_join(target, "newdir", "newfile"))
103
+
104
+ def test_copy_directory_to_existing_directory(
105
+ self,
106
+ fs,
107
+ fs_join,
108
+ fs_bulk_operations_scenario_0,
109
+ fs_target,
110
+ supports_empty_directories,
111
+ ):
112
+ # Copy scenario 1e
113
+ source = fs_bulk_operations_scenario_0
114
+
115
+ target = fs_target
116
+ fs.mkdir(target)
117
+ if not supports_empty_directories:
118
+ # Force target directory to exist by adding a dummy file
119
+ dummy = fs_join(target, "dummy")
120
+ fs.touch(dummy)
121
+ assert fs.isdir(target)
122
+
123
+ for source_slash, target_slash in zip([False, True], [False, True]):
124
+ s = fs_join(source, "subdir")
125
+ if source_slash:
126
+ s += "/"
127
+ t = target + "/" if target_slash else target
128
+
129
+ # Without recursive does nothing
130
+ fs.cp(s, t)
131
+ assert fs.ls(target, detail=False) == (
132
+ [] if supports_empty_directories else [dummy]
133
+ )
134
+
135
+ # With recursive
136
+ fs.cp(s, t, recursive=True)
137
+ if source_slash:
138
+ assert fs.isfile(fs_join(target, "subfile1"))
139
+ assert fs.isfile(fs_join(target, "subfile2"))
140
+ assert fs.isdir(fs_join(target, "nesteddir"))
141
+ assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
142
+ assert not fs.exists(fs_join(target, "subdir"))
143
+
144
+ fs.rm(
145
+ [
146
+ fs_join(target, "subfile1"),
147
+ fs_join(target, "subfile2"),
148
+ fs_join(target, "nesteddir"),
149
+ ],
150
+ recursive=True,
151
+ )
152
+ else:
153
+ assert fs.isdir(fs_join(target, "subdir"))
154
+ assert fs.isfile(fs_join(target, "subdir", "subfile1"))
155
+ assert fs.isfile(fs_join(target, "subdir", "subfile2"))
156
+ assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
157
+ assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
158
+
159
+ fs.rm(fs_join(target, "subdir"), recursive=True)
160
+ assert fs.ls(target, detail=False) == (
161
+ [] if supports_empty_directories else [dummy]
162
+ )
163
+
164
+ # Limit recursive by maxdepth
165
+ fs.cp(s, t, recursive=True, maxdepth=1)
166
+ if source_slash:
167
+ assert fs.isfile(fs_join(target, "subfile1"))
168
+ assert fs.isfile(fs_join(target, "subfile2"))
169
+ assert not fs.exists(fs_join(target, "nesteddir"))
170
+ assert not fs.exists(fs_join(target, "subdir"))
171
+
172
+ fs.rm(
173
+ [
174
+ fs_join(target, "subfile1"),
175
+ fs_join(target, "subfile2"),
176
+ ],
177
+ recursive=True,
178
+ )
179
+ else:
180
+ assert fs.isdir(fs_join(target, "subdir"))
181
+ assert fs.isfile(fs_join(target, "subdir", "subfile1"))
182
+ assert fs.isfile(fs_join(target, "subdir", "subfile2"))
183
+ assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
184
+
185
+ fs.rm(fs_join(target, "subdir"), recursive=True)
186
+ assert fs.ls(target, detail=False) == (
187
+ [] if supports_empty_directories else [dummy]
188
+ )
189
+
190
+ def test_copy_directory_to_new_directory(
191
+ self,
192
+ fs,
193
+ fs_join,
194
+ fs_bulk_operations_scenario_0,
195
+ fs_target,
196
+ supports_empty_directories,
197
+ ):
198
+ # Copy scenario 1f
199
+ source = fs_bulk_operations_scenario_0
200
+
201
+ target = fs_target
202
+ fs.mkdir(target)
203
+
204
+ for source_slash, target_slash in zip([False, True], [False, True]):
205
+ s = fs_join(source, "subdir")
206
+ if source_slash:
207
+ s += "/"
208
+ t = fs_join(target, "newdir")
209
+ if target_slash:
210
+ t += "/"
211
+
212
+ # Without recursive does nothing
213
+ fs.cp(s, t)
214
+ if supports_empty_directories:
215
+ assert fs.ls(target) == []
216
+ else:
217
+ with pytest.raises(FileNotFoundError):
218
+ fs.ls(target)
219
+
220
+ # With recursive
221
+ fs.cp(s, t, recursive=True)
222
+ assert fs.isdir(fs_join(target, "newdir"))
223
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
224
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
225
+ assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
226
+ assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
227
+ assert not fs.exists(fs_join(target, "subdir"))
228
+
229
+ fs.rm(fs_join(target, "newdir"), recursive=True)
230
+ assert not fs.exists(fs_join(target, "newdir"))
231
+
232
+ # Limit recursive by maxdepth
233
+ fs.cp(s, t, recursive=True, maxdepth=1)
234
+ assert fs.isdir(fs_join(target, "newdir"))
235
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
236
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
237
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
238
+ assert not fs.exists(fs_join(target, "subdir"))
239
+
240
+ fs.rm(fs_join(target, "newdir"), recursive=True)
241
+ assert not fs.exists(fs_join(target, "newdir"))
242
+
243
+ def test_copy_glob_to_existing_directory(
244
+ self,
245
+ fs,
246
+ fs_join,
247
+ fs_bulk_operations_scenario_0,
248
+ fs_target,
249
+ supports_empty_directories,
250
+ ):
251
+ # Copy scenario 1g
252
+ source = fs_bulk_operations_scenario_0
253
+
254
+ target = fs_target
255
+ fs.mkdir(target)
256
+ if not supports_empty_directories:
257
+ # Force target directory to exist by adding a dummy file
258
+ dummy = fs_join(target, "dummy")
259
+ fs.touch(dummy)
260
+ assert fs.isdir(target)
261
+
262
+ for target_slash in [False, True]:
263
+ t = target + "/" if target_slash else target
264
+
265
+ # Without recursive
266
+ fs.cp(fs_join(source, "subdir", "*"), t)
267
+ assert fs.isfile(fs_join(target, "subfile1"))
268
+ assert fs.isfile(fs_join(target, "subfile2"))
269
+ assert not fs.isdir(fs_join(target, "nesteddir"))
270
+ assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
271
+ assert not fs.exists(fs_join(target, "subdir"))
272
+
273
+ fs.rm(
274
+ [
275
+ fs_join(target, "subfile1"),
276
+ fs_join(target, "subfile2"),
277
+ ],
278
+ recursive=True,
279
+ )
280
+ assert fs.ls(target, detail=False) == (
281
+ [] if supports_empty_directories else [dummy]
282
+ )
283
+
284
+ # With recursive
285
+ for glob, recursive in zip(["*", "**"], [True, False]):
286
+ fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
287
+ assert fs.isfile(fs_join(target, "subfile1"))
288
+ assert fs.isfile(fs_join(target, "subfile2"))
289
+ assert fs.isdir(fs_join(target, "nesteddir"))
290
+ assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
291
+ assert not fs.exists(fs_join(target, "subdir"))
292
+
293
+ fs.rm(
294
+ [
295
+ fs_join(target, "subfile1"),
296
+ fs_join(target, "subfile2"),
297
+ fs_join(target, "nesteddir"),
298
+ ],
299
+ recursive=True,
300
+ )
301
+ assert fs.ls(target, detail=False) == (
302
+ [] if supports_empty_directories else [dummy]
303
+ )
304
+
305
+ # Limit recursive by maxdepth
306
+ fs.cp(
307
+ fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
308
+ )
309
+ assert fs.isfile(fs_join(target, "subfile1"))
310
+ assert fs.isfile(fs_join(target, "subfile2"))
311
+ assert not fs.exists(fs_join(target, "nesteddir"))
312
+ assert not fs.exists(fs_join(target, "subdir"))
313
+
314
+ fs.rm(
315
+ [
316
+ fs_join(target, "subfile1"),
317
+ fs_join(target, "subfile2"),
318
+ ],
319
+ recursive=True,
320
+ )
321
+ assert fs.ls(target, detail=False) == (
322
+ [] if supports_empty_directories else [dummy]
323
+ )
324
+
325
+ def test_copy_glob_to_new_directory(
326
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
327
+ ):
328
+ # Copy scenario 1h
329
+ source = fs_bulk_operations_scenario_0
330
+
331
+ target = fs_target
332
+ fs.mkdir(target)
333
+
334
+ for target_slash in [False, True]:
335
+ t = fs_join(target, "newdir")
336
+ if target_slash:
337
+ t += "/"
338
+
339
+ # Without recursive
340
+ fs.cp(fs_join(source, "subdir", "*"), t)
341
+ assert fs.isdir(fs_join(target, "newdir"))
342
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
343
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
344
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
345
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
346
+ assert not fs.exists(fs_join(target, "subdir"))
347
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
348
+
349
+ fs.rm(fs_join(target, "newdir"), recursive=True)
350
+ assert not fs.exists(fs_join(target, "newdir"))
351
+
352
+ # With recursive
353
+ for glob, recursive in zip(["*", "**"], [True, False]):
354
+ fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
355
+ assert fs.isdir(fs_join(target, "newdir"))
356
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
357
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
358
+ assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
359
+ assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
360
+ assert not fs.exists(fs_join(target, "subdir"))
361
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
362
+
363
+ fs.rm(fs_join(target, "newdir"), recursive=True)
364
+ assert not fs.exists(fs_join(target, "newdir"))
365
+
366
+ # Limit recursive by maxdepth
367
+ fs.cp(
368
+ fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
369
+ )
370
+ assert fs.isdir(fs_join(target, "newdir"))
371
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
372
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
373
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
374
+ assert not fs.exists(fs_join(target, "subdir"))
375
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
376
+
377
+ fs.rm(fs_join(target, "newdir"), recursive=True)
378
+ assert not fs.exists(fs_join(target, "newdir"))
379
+
380
+ @pytest.mark.parametrize(
381
+ GLOB_EDGE_CASES_TESTS["argnames"],
382
+ GLOB_EDGE_CASES_TESTS["argvalues"],
383
+ )
384
+ def test_copy_glob_edge_cases(
385
+ self,
386
+ path,
387
+ recursive,
388
+ maxdepth,
389
+ expected,
390
+ fs,
391
+ fs_join,
392
+ fs_glob_edge_cases_files,
393
+ fs_target,
394
+ fs_sanitize_path,
395
+ ):
396
+ # Copy scenario 1g
397
+ source = fs_glob_edge_cases_files
398
+
399
+ target = fs_target
400
+
401
+ for new_dir, target_slash in product([True, False], [True, False]):
402
+ fs.mkdir(target)
403
+
404
+ t = fs_join(target, "newdir") if new_dir else target
405
+ t = t + "/" if target_slash else t
406
+
407
+ fs.copy(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
408
+
409
+ output = fs.find(target)
410
+ if new_dir:
411
+ prefixed_expected = [
412
+ fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
413
+ ]
414
+ else:
415
+ prefixed_expected = [
416
+ fs_sanitize_path(fs_join(target, p)) for p in expected
417
+ ]
418
+ assert sorted(output) == sorted(prefixed_expected)
419
+
420
+ try:
421
+ fs.rm(target, recursive=True)
422
+ except FileNotFoundError:
423
+ pass
424
+
425
+ def test_copy_list_of_files_to_existing_directory(
426
+ self,
427
+ fs,
428
+ fs_join,
429
+ fs_bulk_operations_scenario_0,
430
+ fs_target,
431
+ supports_empty_directories,
432
+ ):
433
+ # Copy scenario 2a
434
+ source = fs_bulk_operations_scenario_0
435
+
436
+ target = fs_target
437
+ fs.mkdir(target)
438
+ if not supports_empty_directories:
439
+ # Force target directory to exist by adding a dummy file
440
+ dummy = fs_join(target, "dummy")
441
+ fs.touch(dummy)
442
+ assert fs.isdir(target)
443
+
444
+ source_files = [
445
+ fs_join(source, "file1"),
446
+ fs_join(source, "file2"),
447
+ fs_join(source, "subdir", "subfile1"),
448
+ ]
449
+
450
+ for target_slash in [False, True]:
451
+ t = target + "/" if target_slash else target
452
+
453
+ fs.cp(source_files, t)
454
+ assert fs.isfile(fs_join(target, "file1"))
455
+ assert fs.isfile(fs_join(target, "file2"))
456
+ assert fs.isfile(fs_join(target, "subfile1"))
457
+
458
+ fs.rm(
459
+ [
460
+ fs_join(target, "file1"),
461
+ fs_join(target, "file2"),
462
+ fs_join(target, "subfile1"),
463
+ ],
464
+ recursive=True,
465
+ )
466
+ assert fs.ls(target, detail=False) == (
467
+ [] if supports_empty_directories else [dummy]
468
+ )
469
+
470
+ def test_copy_list_of_files_to_new_directory(
471
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
472
+ ):
473
+ # Copy scenario 2b
474
+ source = fs_bulk_operations_scenario_0
475
+
476
+ target = fs_target
477
+ fs.mkdir(target)
478
+
479
+ source_files = [
480
+ fs_join(source, "file1"),
481
+ fs_join(source, "file2"),
482
+ fs_join(source, "subdir", "subfile1"),
483
+ ]
484
+
485
+ fs.cp(source_files, fs_join(target, "newdir") + "/") # Note trailing slash
486
+ assert fs.isdir(fs_join(target, "newdir"))
487
+ assert fs.isfile(fs_join(target, "newdir", "file1"))
488
+ assert fs.isfile(fs_join(target, "newdir", "file2"))
489
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
490
+
491
+ def test_copy_two_files_new_directory(
492
+ self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
493
+ ):
494
+ # This is a duplicate of test_copy_list_of_files_to_new_directory and
495
+ # can eventually be removed.
496
+ source = fs_bulk_operations_scenario_0
497
+
498
+ target = fs_target
499
+ assert not fs.exists(target)
500
+ fs.cp([fs_join(source, "file1"), fs_join(source, "file2")], target)
501
+
502
+ assert fs.isdir(target)
503
+ assert fs.isfile(fs_join(target, "file1"))
504
+ assert fs.isfile(fs_join(target, "file2"))
505
+
506
+ def test_copy_directory_without_files_with_same_name_prefix(
507
+ self,
508
+ fs,
509
+ fs_join,
510
+ fs_target,
511
+ fs_dir_and_file_with_same_name_prefix,
512
+ supports_empty_directories,
513
+ ):
514
+ # Create the test dirs
515
+ source = fs_dir_and_file_with_same_name_prefix
516
+ target = fs_target
517
+
518
+ # Test without glob
519
+ fs.cp(fs_join(source, "subdir"), target, recursive=True)
520
+
521
+ assert fs.isfile(fs_join(target, "subfile.txt"))
522
+ assert not fs.isfile(fs_join(target, "subdir.txt"))
523
+
524
+ fs.rm([fs_join(target, "subfile.txt")])
525
+ if supports_empty_directories:
526
+ assert fs.ls(target) == []
527
+ else:
528
+ assert not fs.exists(target)
529
+
530
+ # Test with glob
531
+ fs.cp(fs_join(source, "subdir*"), target, recursive=True)
532
+
533
+ assert fs.isdir(fs_join(target, "subdir"))
534
+ assert fs.isfile(fs_join(target, "subdir", "subfile.txt"))
535
+ assert fs.isfile(fs_join(target, "subdir.txt"))
536
+
537
+ def test_copy_with_source_and_destination_as_list(
538
+ self, fs, fs_target, fs_join, fs_10_files_with_hashed_names
539
+ ):
540
+ # Create the test dir
541
+ source = fs_10_files_with_hashed_names
542
+ target = fs_target
543
+
544
+ # Create list of files for source and destination
545
+ source_files = []
546
+ destination_files = []
547
+ for i in range(10):
548
+ hashed_i = md5(str(i).encode("utf-8")).hexdigest()
549
+ source_files.append(fs_join(source, f"{hashed_i}.txt"))
550
+ destination_files.append(fs_join(target, f"{hashed_i}.txt"))
551
+
552
+ # Copy and assert order was kept
553
+ fs.copy(path1=source_files, path2=destination_files)
554
+
555
+ for i in range(10):
556
+ file_content = fs.cat(destination_files[i]).decode("utf-8")
557
+ assert file_content == str(i)
.venv/Lib/site-packages/fsspec/tests/abstract/get.py ADDED
@@ -0,0 +1,587 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from hashlib import md5
2
+ from itertools import product
3
+
4
+ import pytest
5
+
6
+ from fsspec.implementations.local import make_path_posix
7
+ from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
8
+
9
+
10
+ class AbstractGetTests:
11
+ def test_get_file_to_existing_directory(
12
+ self,
13
+ fs,
14
+ fs_join,
15
+ fs_bulk_operations_scenario_0,
16
+ local_fs,
17
+ local_join,
18
+ local_target,
19
+ ):
20
+ # Copy scenario 1a
21
+ source = fs_bulk_operations_scenario_0
22
+
23
+ target = local_target
24
+ local_fs.mkdir(target)
25
+ assert local_fs.isdir(target)
26
+
27
+ target_file2 = local_join(target, "file2")
28
+ target_subfile1 = local_join(target, "subfile1")
29
+
30
+ # Copy from source directory
31
+ fs.get(fs_join(source, "file2"), target)
32
+ assert local_fs.isfile(target_file2)
33
+
34
+ # Copy from sub directory
35
+ fs.get(fs_join(source, "subdir", "subfile1"), target)
36
+ assert local_fs.isfile(target_subfile1)
37
+
38
+ # Remove copied files
39
+ local_fs.rm([target_file2, target_subfile1])
40
+ assert not local_fs.exists(target_file2)
41
+ assert not local_fs.exists(target_subfile1)
42
+
43
+ # Repeat with trailing slash on target
44
+ fs.get(fs_join(source, "file2"), target + "/")
45
+ assert local_fs.isdir(target)
46
+ assert local_fs.isfile(target_file2)
47
+
48
+ fs.get(fs_join(source, "subdir", "subfile1"), target + "/")
49
+ assert local_fs.isfile(target_subfile1)
50
+
51
+ def test_get_file_to_new_directory(
52
+ self,
53
+ fs,
54
+ fs_join,
55
+ fs_bulk_operations_scenario_0,
56
+ local_fs,
57
+ local_join,
58
+ local_target,
59
+ ):
60
+ # Copy scenario 1b
61
+ source = fs_bulk_operations_scenario_0
62
+
63
+ target = local_target
64
+ local_fs.mkdir(target)
65
+
66
+ fs.get(
67
+ fs_join(source, "subdir", "subfile1"), local_join(target, "newdir/")
68
+ ) # Note trailing slash
69
+
70
+ assert local_fs.isdir(target)
71
+ assert local_fs.isdir(local_join(target, "newdir"))
72
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
73
+
74
+ def test_get_file_to_file_in_existing_directory(
75
+ self,
76
+ fs,
77
+ fs_join,
78
+ fs_bulk_operations_scenario_0,
79
+ local_fs,
80
+ local_join,
81
+ local_target,
82
+ ):
83
+ # Copy scenario 1c
84
+ source = fs_bulk_operations_scenario_0
85
+
86
+ target = local_target
87
+ local_fs.mkdir(target)
88
+
89
+ fs.get(fs_join(source, "subdir", "subfile1"), local_join(target, "newfile"))
90
+ assert local_fs.isfile(local_join(target, "newfile"))
91
+
92
+ def test_get_file_to_file_in_new_directory(
93
+ self,
94
+ fs,
95
+ fs_join,
96
+ fs_bulk_operations_scenario_0,
97
+ local_fs,
98
+ local_join,
99
+ local_target,
100
+ ):
101
+ # Copy scenario 1d
102
+ source = fs_bulk_operations_scenario_0
103
+
104
+ target = local_target
105
+ local_fs.mkdir(target)
106
+
107
+ fs.get(
108
+ fs_join(source, "subdir", "subfile1"),
109
+ local_join(target, "newdir", "newfile"),
110
+ )
111
+ assert local_fs.isdir(local_join(target, "newdir"))
112
+ assert local_fs.isfile(local_join(target, "newdir", "newfile"))
113
+
114
+ def test_get_directory_to_existing_directory(
115
+ self,
116
+ fs,
117
+ fs_join,
118
+ fs_bulk_operations_scenario_0,
119
+ local_fs,
120
+ local_join,
121
+ local_target,
122
+ ):
123
+ # Copy scenario 1e
124
+ source = fs_bulk_operations_scenario_0
125
+
126
+ target = local_target
127
+ local_fs.mkdir(target)
128
+ assert local_fs.isdir(target)
129
+
130
+ for source_slash, target_slash in zip([False, True], [False, True]):
131
+ s = fs_join(source, "subdir")
132
+ if source_slash:
133
+ s += "/"
134
+ t = target + "/" if target_slash else target
135
+
136
+ # Without recursive does nothing
137
+ fs.get(s, t)
138
+ assert local_fs.ls(target) == []
139
+
140
+ # With recursive
141
+ fs.get(s, t, recursive=True)
142
+ if source_slash:
143
+ assert local_fs.isfile(local_join(target, "subfile1"))
144
+ assert local_fs.isfile(local_join(target, "subfile2"))
145
+ assert local_fs.isdir(local_join(target, "nesteddir"))
146
+ assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
147
+ assert not local_fs.exists(local_join(target, "subdir"))
148
+
149
+ local_fs.rm(
150
+ [
151
+ local_join(target, "subfile1"),
152
+ local_join(target, "subfile2"),
153
+ local_join(target, "nesteddir"),
154
+ ],
155
+ recursive=True,
156
+ )
157
+ else:
158
+ assert local_fs.isdir(local_join(target, "subdir"))
159
+ assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
160
+ assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
161
+ assert local_fs.isdir(local_join(target, "subdir", "nesteddir"))
162
+ assert local_fs.isfile(
163
+ local_join(target, "subdir", "nesteddir", "nestedfile")
164
+ )
165
+
166
+ local_fs.rm(local_join(target, "subdir"), recursive=True)
167
+ assert local_fs.ls(target) == []
168
+
169
+ # Limit recursive by maxdepth
170
+ fs.get(s, t, recursive=True, maxdepth=1)
171
+ if source_slash:
172
+ assert local_fs.isfile(local_join(target, "subfile1"))
173
+ assert local_fs.isfile(local_join(target, "subfile2"))
174
+ assert not local_fs.exists(local_join(target, "nesteddir"))
175
+ assert not local_fs.exists(local_join(target, "subdir"))
176
+
177
+ local_fs.rm(
178
+ [
179
+ local_join(target, "subfile1"),
180
+ local_join(target, "subfile2"),
181
+ ],
182
+ recursive=True,
183
+ )
184
+ else:
185
+ assert local_fs.isdir(local_join(target, "subdir"))
186
+ assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
187
+ assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
188
+ assert not local_fs.exists(local_join(target, "subdir", "nesteddir"))
189
+
190
+ local_fs.rm(local_join(target, "subdir"), recursive=True)
191
+ assert local_fs.ls(target) == []
192
+
193
+ def test_get_directory_to_new_directory(
194
+ self,
195
+ fs,
196
+ fs_join,
197
+ fs_bulk_operations_scenario_0,
198
+ local_fs,
199
+ local_join,
200
+ local_target,
201
+ ):
202
+ # Copy scenario 1f
203
+ source = fs_bulk_operations_scenario_0
204
+
205
+ target = local_target
206
+ local_fs.mkdir(target)
207
+
208
+ for source_slash, target_slash in zip([False, True], [False, True]):
209
+ s = fs_join(source, "subdir")
210
+ if source_slash:
211
+ s += "/"
212
+ t = local_join(target, "newdir")
213
+ if target_slash:
214
+ t += "/"
215
+
216
+ # Without recursive does nothing
217
+ fs.get(s, t)
218
+ assert local_fs.ls(target) == []
219
+
220
+ # With recursive
221
+ fs.get(s, t, recursive=True)
222
+ assert local_fs.isdir(local_join(target, "newdir"))
223
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
224
+ assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
225
+ assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
226
+ assert local_fs.isfile(
227
+ local_join(target, "newdir", "nesteddir", "nestedfile")
228
+ )
229
+ assert not local_fs.exists(local_join(target, "subdir"))
230
+
231
+ local_fs.rm(local_join(target, "newdir"), recursive=True)
232
+ assert local_fs.ls(target) == []
233
+
234
+ # Limit recursive by maxdepth
235
+ fs.get(s, t, recursive=True, maxdepth=1)
236
+ assert local_fs.isdir(local_join(target, "newdir"))
237
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
238
+ assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
239
+ assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
240
+ assert not local_fs.exists(local_join(target, "subdir"))
241
+
242
+ local_fs.rm(local_join(target, "newdir"), recursive=True)
243
+ assert not local_fs.exists(local_join(target, "newdir"))
244
+
245
+ def test_get_glob_to_existing_directory(
246
+ self,
247
+ fs,
248
+ fs_join,
249
+ fs_bulk_operations_scenario_0,
250
+ local_fs,
251
+ local_join,
252
+ local_target,
253
+ ):
254
+ # Copy scenario 1g
255
+ source = fs_bulk_operations_scenario_0
256
+
257
+ target = local_target
258
+ local_fs.mkdir(target)
259
+
260
+ for target_slash in [False, True]:
261
+ t = target + "/" if target_slash else target
262
+
263
+ # Without recursive
264
+ fs.get(fs_join(source, "subdir", "*"), t)
265
+ assert local_fs.isfile(local_join(target, "subfile1"))
266
+ assert local_fs.isfile(local_join(target, "subfile2"))
267
+ assert not local_fs.isdir(local_join(target, "nesteddir"))
268
+ assert not local_fs.exists(local_join(target, "nesteddir", "nestedfile"))
269
+ assert not local_fs.exists(local_join(target, "subdir"))
270
+
271
+ local_fs.rm(
272
+ [
273
+ local_join(target, "subfile1"),
274
+ local_join(target, "subfile2"),
275
+ ],
276
+ recursive=True,
277
+ )
278
+ assert local_fs.ls(target) == []
279
+
280
+ # With recursive
281
+ for glob, recursive in zip(["*", "**"], [True, False]):
282
+ fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
283
+ assert local_fs.isfile(local_join(target, "subfile1"))
284
+ assert local_fs.isfile(local_join(target, "subfile2"))
285
+ assert local_fs.isdir(local_join(target, "nesteddir"))
286
+ assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
287
+ assert not local_fs.exists(local_join(target, "subdir"))
288
+
289
+ local_fs.rm(
290
+ [
291
+ local_join(target, "subfile1"),
292
+ local_join(target, "subfile2"),
293
+ local_join(target, "nesteddir"),
294
+ ],
295
+ recursive=True,
296
+ )
297
+ assert local_fs.ls(target) == []
298
+
299
+ # Limit recursive by maxdepth
300
+ fs.get(
301
+ fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
302
+ )
303
+ assert local_fs.isfile(local_join(target, "subfile1"))
304
+ assert local_fs.isfile(local_join(target, "subfile2"))
305
+ assert not local_fs.exists(local_join(target, "nesteddir"))
306
+ assert not local_fs.exists(local_join(target, "subdir"))
307
+
308
+ local_fs.rm(
309
+ [
310
+ local_join(target, "subfile1"),
311
+ local_join(target, "subfile2"),
312
+ ],
313
+ recursive=True,
314
+ )
315
+ assert local_fs.ls(target) == []
316
+
317
+ def test_get_glob_to_new_directory(
318
+ self,
319
+ fs,
320
+ fs_join,
321
+ fs_bulk_operations_scenario_0,
322
+ local_fs,
323
+ local_join,
324
+ local_target,
325
+ ):
326
+ # Copy scenario 1h
327
+ source = fs_bulk_operations_scenario_0
328
+
329
+ target = local_target
330
+ local_fs.mkdir(target)
331
+
332
+ for target_slash in [False, True]:
333
+ t = fs_join(target, "newdir")
334
+ if target_slash:
335
+ t += "/"
336
+
337
+ # Without recursive
338
+ fs.get(fs_join(source, "subdir", "*"), t)
339
+ assert local_fs.isdir(local_join(target, "newdir"))
340
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
341
+ assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
342
+ assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
343
+ assert not local_fs.exists(
344
+ local_join(target, "newdir", "nesteddir", "nestedfile")
345
+ )
346
+ assert not local_fs.exists(local_join(target, "subdir"))
347
+ assert not local_fs.exists(local_join(target, "newdir", "subdir"))
348
+
349
+ local_fs.rm(local_join(target, "newdir"), recursive=True)
350
+ assert local_fs.ls(target) == []
351
+
352
+ # With recursive
353
+ for glob, recursive in zip(["*", "**"], [True, False]):
354
+ fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
355
+ assert local_fs.isdir(local_join(target, "newdir"))
356
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
357
+ assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
358
+ assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
359
+ assert local_fs.isfile(
360
+ local_join(target, "newdir", "nesteddir", "nestedfile")
361
+ )
362
+ assert not local_fs.exists(local_join(target, "subdir"))
363
+ assert not local_fs.exists(local_join(target, "newdir", "subdir"))
364
+
365
+ local_fs.rm(local_join(target, "newdir"), recursive=True)
366
+ assert not local_fs.exists(local_join(target, "newdir"))
367
+
368
+ # Limit recursive by maxdepth
369
+ fs.get(
370
+ fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
371
+ )
372
+ assert local_fs.isdir(local_join(target, "newdir"))
373
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
374
+ assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
375
+ assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
376
+ assert not local_fs.exists(local_join(target, "subdir"))
377
+ assert not local_fs.exists(local_join(target, "newdir", "subdir"))
378
+
379
+ local_fs.rm(local_fs.ls(target, detail=False), recursive=True)
380
+ assert not local_fs.exists(local_join(target, "newdir"))
381
+
382
+ @pytest.mark.parametrize(
383
+ GLOB_EDGE_CASES_TESTS["argnames"],
384
+ GLOB_EDGE_CASES_TESTS["argvalues"],
385
+ )
386
+ def test_get_glob_edge_cases(
387
+ self,
388
+ path,
389
+ recursive,
390
+ maxdepth,
391
+ expected,
392
+ fs,
393
+ fs_join,
394
+ fs_glob_edge_cases_files,
395
+ local_fs,
396
+ local_join,
397
+ local_target,
398
+ ):
399
+ # Copy scenario 1g
400
+ source = fs_glob_edge_cases_files
401
+
402
+ target = local_target
403
+
404
+ for new_dir, target_slash in product([True, False], [True, False]):
405
+ local_fs.mkdir(target)
406
+
407
+ t = local_join(target, "newdir") if new_dir else target
408
+ t = t + "/" if target_slash else t
409
+
410
+ fs.get(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
411
+
412
+ output = local_fs.find(target)
413
+ if new_dir:
414
+ prefixed_expected = [
415
+ make_path_posix(local_join(target, "newdir", p)) for p in expected
416
+ ]
417
+ else:
418
+ prefixed_expected = [
419
+ make_path_posix(local_join(target, p)) for p in expected
420
+ ]
421
+ assert sorted(output) == sorted(prefixed_expected)
422
+
423
+ try:
424
+ local_fs.rm(target, recursive=True)
425
+ except FileNotFoundError:
426
+ pass
427
+
428
+ def test_get_list_of_files_to_existing_directory(
429
+ self,
430
+ fs,
431
+ fs_join,
432
+ fs_bulk_operations_scenario_0,
433
+ local_fs,
434
+ local_join,
435
+ local_target,
436
+ ):
437
+ # Copy scenario 2a
438
+ source = fs_bulk_operations_scenario_0
439
+
440
+ target = local_target
441
+ local_fs.mkdir(target)
442
+
443
+ source_files = [
444
+ fs_join(source, "file1"),
445
+ fs_join(source, "file2"),
446
+ fs_join(source, "subdir", "subfile1"),
447
+ ]
448
+
449
+ for target_slash in [False, True]:
450
+ t = target + "/" if target_slash else target
451
+
452
+ fs.get(source_files, t)
453
+ assert local_fs.isfile(local_join(target, "file1"))
454
+ assert local_fs.isfile(local_join(target, "file2"))
455
+ assert local_fs.isfile(local_join(target, "subfile1"))
456
+
457
+ local_fs.rm(
458
+ [
459
+ local_join(target, "file1"),
460
+ local_join(target, "file2"),
461
+ local_join(target, "subfile1"),
462
+ ],
463
+ recursive=True,
464
+ )
465
+ assert local_fs.ls(target) == []
466
+
467
+ def test_get_list_of_files_to_new_directory(
468
+ self,
469
+ fs,
470
+ fs_join,
471
+ fs_bulk_operations_scenario_0,
472
+ local_fs,
473
+ local_join,
474
+ local_target,
475
+ ):
476
+ # Copy scenario 2b
477
+ source = fs_bulk_operations_scenario_0
478
+
479
+ target = local_target
480
+ local_fs.mkdir(target)
481
+
482
+ source_files = [
483
+ fs_join(source, "file1"),
484
+ fs_join(source, "file2"),
485
+ fs_join(source, "subdir", "subfile1"),
486
+ ]
487
+
488
+ fs.get(source_files, local_join(target, "newdir") + "/") # Note trailing slash
489
+ assert local_fs.isdir(local_join(target, "newdir"))
490
+ assert local_fs.isfile(local_join(target, "newdir", "file1"))
491
+ assert local_fs.isfile(local_join(target, "newdir", "file2"))
492
+ assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
493
+
494
+ def test_get_directory_recursive(
495
+ self, fs, fs_join, fs_path, local_fs, local_join, local_target
496
+ ):
497
+ # https://github.com/fsspec/filesystem_spec/issues/1062
498
+ # Recursive cp/get/put of source directory into non-existent target directory.
499
+ src = fs_join(fs_path, "src")
500
+ src_file = fs_join(src, "file")
501
+ fs.mkdir(src)
502
+ fs.touch(src_file)
503
+
504
+ target = local_target
505
+
506
+ # get without slash
507
+ assert not local_fs.exists(target)
508
+ for loop in range(2):
509
+ fs.get(src, target, recursive=True)
510
+ assert local_fs.isdir(target)
511
+
512
+ if loop == 0:
513
+ assert local_fs.isfile(local_join(target, "file"))
514
+ assert not local_fs.exists(local_join(target, "src"))
515
+ else:
516
+ assert local_fs.isfile(local_join(target, "file"))
517
+ assert local_fs.isdir(local_join(target, "src"))
518
+ assert local_fs.isfile(local_join(target, "src", "file"))
519
+
520
+ local_fs.rm(target, recursive=True)
521
+
522
+ # get with slash
523
+ assert not local_fs.exists(target)
524
+ for loop in range(2):
525
+ fs.get(src + "/", target, recursive=True)
526
+ assert local_fs.isdir(target)
527
+ assert local_fs.isfile(local_join(target, "file"))
528
+ assert not local_fs.exists(local_join(target, "src"))
529
+
530
+ def test_get_directory_without_files_with_same_name_prefix(
531
+ self,
532
+ fs,
533
+ fs_join,
534
+ local_fs,
535
+ local_join,
536
+ local_target,
537
+ fs_dir_and_file_with_same_name_prefix,
538
+ ):
539
+ # Create the test dirs
540
+ source = fs_dir_and_file_with_same_name_prefix
541
+ target = local_target
542
+
543
+ # Test without glob
544
+ fs.get(fs_join(source, "subdir"), target, recursive=True)
545
+
546
+ assert local_fs.isfile(local_join(target, "subfile.txt"))
547
+ assert not local_fs.isfile(local_join(target, "subdir.txt"))
548
+
549
+ local_fs.rm([local_join(target, "subfile.txt")])
550
+ assert local_fs.ls(target) == []
551
+
552
+ # Test with glob
553
+ fs.get(fs_join(source, "subdir*"), target, recursive=True)
554
+
555
+ assert local_fs.isdir(local_join(target, "subdir"))
556
+ assert local_fs.isfile(local_join(target, "subdir", "subfile.txt"))
557
+ assert local_fs.isfile(local_join(target, "subdir.txt"))
558
+
559
+ def test_get_with_source_and_destination_as_list(
560
+ self,
561
+ fs,
562
+ fs_join,
563
+ local_fs,
564
+ local_join,
565
+ local_target,
566
+ fs_10_files_with_hashed_names,
567
+ ):
568
+ # Create the test dir
569
+ source = fs_10_files_with_hashed_names
570
+ target = local_target
571
+
572
+ # Create list of files for source and destination
573
+ source_files = []
574
+ destination_files = []
575
+ for i in range(10):
576
+ hashed_i = md5(str(i).encode("utf-8")).hexdigest()
577
+ source_files.append(fs_join(source, f"{hashed_i}.txt"))
578
+ destination_files.append(
579
+ make_path_posix(local_join(target, f"{hashed_i}.txt"))
580
+ )
581
+
582
+ # Copy and assert order was kept
583
+ fs.get(rpath=source_files, lpath=destination_files)
584
+
585
+ for i in range(10):
586
+ file_content = local_fs.cat(destination_files[i]).decode("utf-8")
587
+ assert file_content == str(i)
.venv/Lib/site-packages/fsspec/tests/abstract/put.py ADDED
@@ -0,0 +1,591 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from hashlib import md5
2
+ from itertools import product
3
+
4
+ import pytest
5
+
6
+ from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
7
+
8
+
9
+ class AbstractPutTests:
10
+ def test_put_file_to_existing_directory(
11
+ self,
12
+ fs,
13
+ fs_join,
14
+ fs_target,
15
+ local_join,
16
+ local_bulk_operations_scenario_0,
17
+ supports_empty_directories,
18
+ ):
19
+ # Copy scenario 1a
20
+ source = local_bulk_operations_scenario_0
21
+
22
+ target = fs_target
23
+ fs.mkdir(target)
24
+ if not supports_empty_directories:
25
+ # Force target directory to exist by adding a dummy file
26
+ fs.touch(fs_join(target, "dummy"))
27
+ assert fs.isdir(target)
28
+
29
+ target_file2 = fs_join(target, "file2")
30
+ target_subfile1 = fs_join(target, "subfile1")
31
+
32
+ # Copy from source directory
33
+ fs.put(local_join(source, "file2"), target)
34
+ assert fs.isfile(target_file2)
35
+
36
+ # Copy from sub directory
37
+ fs.put(local_join(source, "subdir", "subfile1"), target)
38
+ assert fs.isfile(target_subfile1)
39
+
40
+ # Remove copied files
41
+ fs.rm([target_file2, target_subfile1])
42
+ assert not fs.exists(target_file2)
43
+ assert not fs.exists(target_subfile1)
44
+
45
+ # Repeat with trailing slash on target
46
+ fs.put(local_join(source, "file2"), target + "/")
47
+ assert fs.isdir(target)
48
+ assert fs.isfile(target_file2)
49
+
50
+ fs.put(local_join(source, "subdir", "subfile1"), target + "/")
51
+ assert fs.isfile(target_subfile1)
52
+
53
+ def test_put_file_to_new_directory(
54
+ self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
55
+ ):
56
+ # Copy scenario 1b
57
+ source = local_bulk_operations_scenario_0
58
+
59
+ target = fs_target
60
+ fs.mkdir(target)
61
+
62
+ fs.put(
63
+ local_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
64
+ ) # Note trailing slash
65
+ assert fs.isdir(target)
66
+ assert fs.isdir(fs_join(target, "newdir"))
67
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
68
+
69
+ def test_put_file_to_file_in_existing_directory(
70
+ self,
71
+ fs,
72
+ fs_join,
73
+ fs_target,
74
+ local_join,
75
+ supports_empty_directories,
76
+ local_bulk_operations_scenario_0,
77
+ ):
78
+ # Copy scenario 1c
79
+ source = local_bulk_operations_scenario_0
80
+
81
+ target = fs_target
82
+ fs.mkdir(target)
83
+ if not supports_empty_directories:
84
+ # Force target directory to exist by adding a dummy file
85
+ fs.touch(fs_join(target, "dummy"))
86
+ assert fs.isdir(target)
87
+
88
+ fs.put(local_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
89
+ assert fs.isfile(fs_join(target, "newfile"))
90
+
91
+ def test_put_file_to_file_in_new_directory(
92
+ self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
93
+ ):
94
+ # Copy scenario 1d
95
+ source = local_bulk_operations_scenario_0
96
+
97
+ target = fs_target
98
+ fs.mkdir(target)
99
+
100
+ fs.put(
101
+ local_join(source, "subdir", "subfile1"),
102
+ fs_join(target, "newdir", "newfile"),
103
+ )
104
+ assert fs.isdir(fs_join(target, "newdir"))
105
+ assert fs.isfile(fs_join(target, "newdir", "newfile"))
106
+
107
+ def test_put_directory_to_existing_directory(
108
+ self,
109
+ fs,
110
+ fs_join,
111
+ fs_target,
112
+ local_bulk_operations_scenario_0,
113
+ supports_empty_directories,
114
+ ):
115
+ # Copy scenario 1e
116
+ source = local_bulk_operations_scenario_0
117
+
118
+ target = fs_target
119
+ fs.mkdir(target)
120
+ if not supports_empty_directories:
121
+ # Force target directory to exist by adding a dummy file
122
+ dummy = fs_join(target, "dummy")
123
+ fs.touch(dummy)
124
+ assert fs.isdir(target)
125
+
126
+ for source_slash, target_slash in zip([False, True], [False, True]):
127
+ s = fs_join(source, "subdir")
128
+ if source_slash:
129
+ s += "/"
130
+ t = target + "/" if target_slash else target
131
+
132
+ # Without recursive does nothing
133
+ fs.put(s, t)
134
+ assert fs.ls(target, detail=False) == (
135
+ [] if supports_empty_directories else [dummy]
136
+ )
137
+
138
+ # With recursive
139
+ fs.put(s, t, recursive=True)
140
+ if source_slash:
141
+ assert fs.isfile(fs_join(target, "subfile1"))
142
+ assert fs.isfile(fs_join(target, "subfile2"))
143
+ assert fs.isdir(fs_join(target, "nesteddir"))
144
+ assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
145
+ assert not fs.exists(fs_join(target, "subdir"))
146
+
147
+ fs.rm(
148
+ [
149
+ fs_join(target, "subfile1"),
150
+ fs_join(target, "subfile2"),
151
+ fs_join(target, "nesteddir"),
152
+ ],
153
+ recursive=True,
154
+ )
155
+ else:
156
+ assert fs.isdir(fs_join(target, "subdir"))
157
+ assert fs.isfile(fs_join(target, "subdir", "subfile1"))
158
+ assert fs.isfile(fs_join(target, "subdir", "subfile2"))
159
+ assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
160
+ assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
161
+
162
+ fs.rm(fs_join(target, "subdir"), recursive=True)
163
+ assert fs.ls(target, detail=False) == (
164
+ [] if supports_empty_directories else [dummy]
165
+ )
166
+
167
+ # Limit recursive by maxdepth
168
+ fs.put(s, t, recursive=True, maxdepth=1)
169
+ if source_slash:
170
+ assert fs.isfile(fs_join(target, "subfile1"))
171
+ assert fs.isfile(fs_join(target, "subfile2"))
172
+ assert not fs.exists(fs_join(target, "nesteddir"))
173
+ assert not fs.exists(fs_join(target, "subdir"))
174
+
175
+ fs.rm(
176
+ [
177
+ fs_join(target, "subfile1"),
178
+ fs_join(target, "subfile2"),
179
+ ],
180
+ recursive=True,
181
+ )
182
+ else:
183
+ assert fs.isdir(fs_join(target, "subdir"))
184
+ assert fs.isfile(fs_join(target, "subdir", "subfile1"))
185
+ assert fs.isfile(fs_join(target, "subdir", "subfile2"))
186
+ assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
187
+
188
+ fs.rm(fs_join(target, "subdir"), recursive=True)
189
+ assert fs.ls(target, detail=False) == (
190
+ [] if supports_empty_directories else [dummy]
191
+ )
192
+
193
+ def test_put_directory_to_new_directory(
194
+ self,
195
+ fs,
196
+ fs_join,
197
+ fs_target,
198
+ local_bulk_operations_scenario_0,
199
+ supports_empty_directories,
200
+ ):
201
+ # Copy scenario 1f
202
+ source = local_bulk_operations_scenario_0
203
+
204
+ target = fs_target
205
+ fs.mkdir(target)
206
+
207
+ for source_slash, target_slash in zip([False, True], [False, True]):
208
+ s = fs_join(source, "subdir")
209
+ if source_slash:
210
+ s += "/"
211
+ t = fs_join(target, "newdir")
212
+ if target_slash:
213
+ t += "/"
214
+
215
+ # Without recursive does nothing
216
+ fs.put(s, t)
217
+ if supports_empty_directories:
218
+ assert fs.ls(target) == []
219
+ else:
220
+ with pytest.raises(FileNotFoundError):
221
+ fs.ls(target)
222
+
223
+ # With recursive
224
+ fs.put(s, t, recursive=True)
225
+ assert fs.isdir(fs_join(target, "newdir"))
226
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
227
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
228
+ assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
229
+ assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
230
+ assert not fs.exists(fs_join(target, "subdir"))
231
+
232
+ fs.rm(fs_join(target, "newdir"), recursive=True)
233
+ assert not fs.exists(fs_join(target, "newdir"))
234
+
235
+ # Limit recursive by maxdepth
236
+ fs.put(s, t, recursive=True, maxdepth=1)
237
+ assert fs.isdir(fs_join(target, "newdir"))
238
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
239
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
240
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
241
+ assert not fs.exists(fs_join(target, "subdir"))
242
+
243
+ fs.rm(fs_join(target, "newdir"), recursive=True)
244
+ assert not fs.exists(fs_join(target, "newdir"))
245
+
246
+ def test_put_glob_to_existing_directory(
247
+ self,
248
+ fs,
249
+ fs_join,
250
+ fs_target,
251
+ local_join,
252
+ supports_empty_directories,
253
+ local_bulk_operations_scenario_0,
254
+ ):
255
+ # Copy scenario 1g
256
+ source = local_bulk_operations_scenario_0
257
+
258
+ target = fs_target
259
+ fs.mkdir(target)
260
+ if not supports_empty_directories:
261
+ # Force target directory to exist by adding a dummy file
262
+ dummy = fs_join(target, "dummy")
263
+ fs.touch(dummy)
264
+ assert fs.isdir(target)
265
+
266
+ for target_slash in [False, True]:
267
+ t = target + "/" if target_slash else target
268
+
269
+ # Without recursive
270
+ fs.put(local_join(source, "subdir", "*"), t)
271
+ assert fs.isfile(fs_join(target, "subfile1"))
272
+ assert fs.isfile(fs_join(target, "subfile2"))
273
+ assert not fs.isdir(fs_join(target, "nesteddir"))
274
+ assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
275
+ assert not fs.exists(fs_join(target, "subdir"))
276
+
277
+ fs.rm(
278
+ [
279
+ fs_join(target, "subfile1"),
280
+ fs_join(target, "subfile2"),
281
+ ],
282
+ recursive=True,
283
+ )
284
+ assert fs.ls(target, detail=False) == (
285
+ [] if supports_empty_directories else [dummy]
286
+ )
287
+
288
+ # With recursive
289
+ for glob, recursive in zip(["*", "**"], [True, False]):
290
+ fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
291
+ assert fs.isfile(fs_join(target, "subfile1"))
292
+ assert fs.isfile(fs_join(target, "subfile2"))
293
+ assert fs.isdir(fs_join(target, "nesteddir"))
294
+ assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
295
+ assert not fs.exists(fs_join(target, "subdir"))
296
+
297
+ fs.rm(
298
+ [
299
+ fs_join(target, "subfile1"),
300
+ fs_join(target, "subfile2"),
301
+ fs_join(target, "nesteddir"),
302
+ ],
303
+ recursive=True,
304
+ )
305
+ assert fs.ls(target, detail=False) == (
306
+ [] if supports_empty_directories else [dummy]
307
+ )
308
+
309
+ # Limit recursive by maxdepth
310
+ fs.put(
311
+ local_join(source, "subdir", glob),
312
+ t,
313
+ recursive=recursive,
314
+ maxdepth=1,
315
+ )
316
+ assert fs.isfile(fs_join(target, "subfile1"))
317
+ assert fs.isfile(fs_join(target, "subfile2"))
318
+ assert not fs.exists(fs_join(target, "nesteddir"))
319
+ assert not fs.exists(fs_join(target, "subdir"))
320
+
321
+ fs.rm(
322
+ [
323
+ fs_join(target, "subfile1"),
324
+ fs_join(target, "subfile2"),
325
+ ],
326
+ recursive=True,
327
+ )
328
+ assert fs.ls(target, detail=False) == (
329
+ [] if supports_empty_directories else [dummy]
330
+ )
331
+
332
+ def test_put_glob_to_new_directory(
333
+ self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
334
+ ):
335
+ # Copy scenario 1h
336
+ source = local_bulk_operations_scenario_0
337
+
338
+ target = fs_target
339
+ fs.mkdir(target)
340
+
341
+ for target_slash in [False, True]:
342
+ t = fs_join(target, "newdir")
343
+ if target_slash:
344
+ t += "/"
345
+
346
+ # Without recursive
347
+ fs.put(local_join(source, "subdir", "*"), t)
348
+ assert fs.isdir(fs_join(target, "newdir"))
349
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
350
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
351
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
352
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
353
+ assert not fs.exists(fs_join(target, "subdir"))
354
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
355
+
356
+ fs.rm(fs_join(target, "newdir"), recursive=True)
357
+ assert not fs.exists(fs_join(target, "newdir"))
358
+
359
+ # With recursive
360
+ for glob, recursive in zip(["*", "**"], [True, False]):
361
+ fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
362
+ assert fs.isdir(fs_join(target, "newdir"))
363
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
364
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
365
+ assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
366
+ assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
367
+ assert not fs.exists(fs_join(target, "subdir"))
368
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
369
+
370
+ fs.rm(fs_join(target, "newdir"), recursive=True)
371
+ assert not fs.exists(fs_join(target, "newdir"))
372
+
373
+ # Limit recursive by maxdepth
374
+ fs.put(
375
+ local_join(source, "subdir", glob),
376
+ t,
377
+ recursive=recursive,
378
+ maxdepth=1,
379
+ )
380
+ assert fs.isdir(fs_join(target, "newdir"))
381
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
382
+ assert fs.isfile(fs_join(target, "newdir", "subfile2"))
383
+ assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
384
+ assert not fs.exists(fs_join(target, "subdir"))
385
+ assert not fs.exists(fs_join(target, "newdir", "subdir"))
386
+
387
+ fs.rm(fs_join(target, "newdir"), recursive=True)
388
+ assert not fs.exists(fs_join(target, "newdir"))
389
+
390
+ @pytest.mark.parametrize(
391
+ GLOB_EDGE_CASES_TESTS["argnames"],
392
+ GLOB_EDGE_CASES_TESTS["argvalues"],
393
+ )
394
+ def test_put_glob_edge_cases(
395
+ self,
396
+ path,
397
+ recursive,
398
+ maxdepth,
399
+ expected,
400
+ fs,
401
+ fs_join,
402
+ fs_target,
403
+ local_glob_edge_cases_files,
404
+ local_join,
405
+ fs_sanitize_path,
406
+ ):
407
+ # Copy scenario 1g
408
+ source = local_glob_edge_cases_files
409
+
410
+ target = fs_target
411
+
412
+ for new_dir, target_slash in product([True, False], [True, False]):
413
+ fs.mkdir(target)
414
+
415
+ t = fs_join(target, "newdir") if new_dir else target
416
+ t = t + "/" if target_slash else t
417
+
418
+ fs.put(local_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
419
+
420
+ output = fs.find(target)
421
+ if new_dir:
422
+ prefixed_expected = [
423
+ fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
424
+ ]
425
+ else:
426
+ prefixed_expected = [
427
+ fs_sanitize_path(fs_join(target, p)) for p in expected
428
+ ]
429
+ assert sorted(output) == sorted(prefixed_expected)
430
+
431
+ try:
432
+ fs.rm(target, recursive=True)
433
+ except FileNotFoundError:
434
+ pass
435
+
436
+ def test_put_list_of_files_to_existing_directory(
437
+ self,
438
+ fs,
439
+ fs_join,
440
+ fs_target,
441
+ local_join,
442
+ local_bulk_operations_scenario_0,
443
+ supports_empty_directories,
444
+ ):
445
+ # Copy scenario 2a
446
+ source = local_bulk_operations_scenario_0
447
+
448
+ target = fs_target
449
+ fs.mkdir(target)
450
+ if not supports_empty_directories:
451
+ # Force target directory to exist by adding a dummy file
452
+ dummy = fs_join(target, "dummy")
453
+ fs.touch(dummy)
454
+ assert fs.isdir(target)
455
+
456
+ source_files = [
457
+ local_join(source, "file1"),
458
+ local_join(source, "file2"),
459
+ local_join(source, "subdir", "subfile1"),
460
+ ]
461
+
462
+ for target_slash in [False, True]:
463
+ t = target + "/" if target_slash else target
464
+
465
+ fs.put(source_files, t)
466
+ assert fs.isfile(fs_join(target, "file1"))
467
+ assert fs.isfile(fs_join(target, "file2"))
468
+ assert fs.isfile(fs_join(target, "subfile1"))
469
+
470
+ fs.rm(
471
+ [
472
+ fs_join(target, "file1"),
473
+ fs_join(target, "file2"),
474
+ fs_join(target, "subfile1"),
475
+ ],
476
+ recursive=True,
477
+ )
478
+ assert fs.ls(target, detail=False) == (
479
+ [] if supports_empty_directories else [dummy]
480
+ )
481
+
482
+ def test_put_list_of_files_to_new_directory(
483
+ self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
484
+ ):
485
+ # Copy scenario 2b
486
+ source = local_bulk_operations_scenario_0
487
+
488
+ target = fs_target
489
+ fs.mkdir(target)
490
+
491
+ source_files = [
492
+ local_join(source, "file1"),
493
+ local_join(source, "file2"),
494
+ local_join(source, "subdir", "subfile1"),
495
+ ]
496
+
497
+ fs.put(source_files, fs_join(target, "newdir") + "/") # Note trailing slash
498
+ assert fs.isdir(fs_join(target, "newdir"))
499
+ assert fs.isfile(fs_join(target, "newdir", "file1"))
500
+ assert fs.isfile(fs_join(target, "newdir", "file2"))
501
+ assert fs.isfile(fs_join(target, "newdir", "subfile1"))
502
+
503
+ def test_put_directory_recursive(
504
+ self, fs, fs_join, fs_target, local_fs, local_join, local_path
505
+ ):
506
+ # https://github.com/fsspec/filesystem_spec/issues/1062
507
+ # Recursive cp/get/put of source directory into non-existent target directory.
508
+ src = local_join(local_path, "src")
509
+ src_file = local_join(src, "file")
510
+ local_fs.mkdir(src)
511
+ local_fs.touch(src_file)
512
+
513
+ target = fs_target
514
+
515
+ # put without slash
516
+ assert not fs.exists(target)
517
+ for loop in range(2):
518
+ fs.put(src, target, recursive=True)
519
+ assert fs.isdir(target)
520
+
521
+ if loop == 0:
522
+ assert fs.isfile(fs_join(target, "file"))
523
+ assert not fs.exists(fs_join(target, "src"))
524
+ else:
525
+ assert fs.isfile(fs_join(target, "file"))
526
+ assert fs.isdir(fs_join(target, "src"))
527
+ assert fs.isfile(fs_join(target, "src", "file"))
528
+
529
+ fs.rm(target, recursive=True)
530
+
531
+ # put with slash
532
+ assert not fs.exists(target)
533
+ for loop in range(2):
534
+ fs.put(src + "/", target, recursive=True)
535
+ assert fs.isdir(target)
536
+ assert fs.isfile(fs_join(target, "file"))
537
+ assert not fs.exists(fs_join(target, "src"))
538
+
539
+ def test_put_directory_without_files_with_same_name_prefix(
540
+ self,
541
+ fs,
542
+ fs_join,
543
+ fs_target,
544
+ local_join,
545
+ local_dir_and_file_with_same_name_prefix,
546
+ supports_empty_directories,
547
+ ):
548
+ # Create the test dirs
549
+ source = local_dir_and_file_with_same_name_prefix
550
+ target = fs_target
551
+
552
+ # Test without glob
553
+ fs.put(local_join(source, "subdir"), fs_target, recursive=True)
554
+
555
+ assert fs.isfile(fs_join(fs_target, "subfile.txt"))
556
+ assert not fs.isfile(fs_join(fs_target, "subdir.txt"))
557
+
558
+ fs.rm([fs_join(target, "subfile.txt")])
559
+ if supports_empty_directories:
560
+ assert fs.ls(target) == []
561
+ else:
562
+ assert not fs.exists(target)
563
+
564
+ # Test with glob
565
+ fs.put(local_join(source, "subdir*"), fs_target, recursive=True)
566
+
567
+ assert fs.isdir(fs_join(fs_target, "subdir"))
568
+ assert fs.isfile(fs_join(fs_target, "subdir", "subfile.txt"))
569
+ assert fs.isfile(fs_join(fs_target, "subdir.txt"))
570
+
571
+ def test_copy_with_source_and_destination_as_list(
572
+ self, fs, fs_target, fs_join, local_join, local_10_files_with_hashed_names
573
+ ):
574
+ # Create the test dir
575
+ source = local_10_files_with_hashed_names
576
+ target = fs_target
577
+
578
+ # Create list of files for source and destination
579
+ source_files = []
580
+ destination_files = []
581
+ for i in range(10):
582
+ hashed_i = md5(str(i).encode("utf-8")).hexdigest()
583
+ source_files.append(local_join(source, f"{hashed_i}.txt"))
584
+ destination_files.append(fs_join(target, f"{hashed_i}.txt"))
585
+
586
+ # Copy and assert order was kept
587
+ fs.put(lpath=source_files, rpath=destination_files)
588
+
589
+ for i in range(10):
590
+ file_content = fs.cat(destination_files[i]).decode("utf-8")
591
+ assert file_content == str(i)
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ uv
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Paul O'Leary McCann
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/LICENSE.mecab ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2001-2008, Taku Kudo
2
+ Copyright (c) 2004-2008, Nippon Telegraph and Telephone Corporation
3
+ All rights reserved.
4
+
5
+ Redistribution and use in source and binary forms, with or without modification, are
6
+ permitted provided that the following conditions are met:
7
+
8
+ * Redistributions of source code must retain the above
9
+ copyright notice, this list of conditions and the
10
+ following disclaimer.
11
+
12
+ * Redistributions in binary form must reproduce the above
13
+ copyright notice, this list of conditions and the
14
+ following disclaimer in the documentation and/or other
15
+ materials provided with the distribution.
16
+
17
+ * Neither the name of the Nippon Telegraph and Telegraph Corporation
18
+ nor the names of its contributors may be used to endorse or
19
+ promote products derived from this software without specific
20
+ prior written permission.
21
+
22
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
23
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24
+ PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
25
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
28
+ TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29
+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/METADATA ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.1
2
+ Name: fugashi
3
+ Version: 1.4.0
4
+ Summary: A Cython MeCab wrapper for fast, pythonic Japanese tokenization.
5
+ Home-page: https://github.com/polm/fugashi
6
+ Author: Paul O'Leary McCann
7
+ Author-email: [email protected]
8
+ License: MIT
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Natural Language :: Japanese
11
+ Requires-Python: >=3.8
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ License-File: LICENSE.mecab
15
+ Provides-Extra: unidic
16
+ Requires-Dist: unidic; extra == "unidic"
17
+ Provides-Extra: unidic-lite
18
+ Requires-Dist: unidic-lite; extra == "unidic-lite"
19
+
20
+ [![Open in Streamlit](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://fugashi.streamlit.app)
21
+ [![Current PyPI packages](https://badge.fury.io/py/fugashi.svg)](https://pypi.org/project/fugashi/)
22
+ ![Test Status](https://github.com/polm/fugashi/workflows/test-manylinux/badge.svg)
23
+ [![PyPI - Downloads](https://img.shields.io/pypi/dm/fugashi)](https://pypi.org/project/fugashi/)
24
+ ![Supported Platforms](https://img.shields.io/badge/platforms-linux%20macosx%20windows-blue)
25
+
26
+ # fugashi
27
+
28
+ <img src="https://github.com/polm/fugashi/raw/master/fugashi.png" width=125 height=125 alt="fugashi by Irasutoya" />
29
+
30
+ fugashi is a Cython wrapper for [MeCab](https://taku910.github.io/mecab/), a
31
+ Japanese tokenizer and morphological analysis tool. Wheels are provided for
32
+ Linux, OSX (Intel), and Win64, and UniDic is [easy to install](#installing-a-dictionary).
33
+
34
+ **issueを英語で書く必要はありません。**
35
+
36
+ Check out the [interactive demo][], see the [blog post](https://www.dampfkraft.com/nlp/fugashi.html) for background
37
+ on why fugashi exists and some of the design decisions, or see [this
38
+ guide][guide] for a basic introduction to Japanese tokenization.
39
+
40
+ [guide]: https://www.dampfkraft.com/nlp/how-to-tokenize-japanese.html
41
+ [interactive demo]: https://fugashi.streamlit.app
42
+
43
+ If you are on a platform for which wheels are not provided, you'll need to
44
+ install MeCab first. It's recommended you install [from
45
+ source](https://github.com/taku910/mecab). If you need to build from source on
46
+ Windows, [@chezou's fork](https://github.com/chezou/mecab) is recommended; see
47
+ [issue #44](https://github.com/polm/fugashi/issues/44#issuecomment-954426115)
48
+ for an explanation of the problems with the official repo.
49
+
50
+ Known platforms without wheels:
51
+
52
+ - musl-based distros like alpine [#77](https://github.com/polm/fugashi/issues/77)
53
+ - PowerPC
54
+ - Windows 32bit
55
+
56
+ ## Usage
57
+
58
+ ```python
59
+ from fugashi import Tagger
60
+
61
+ tagger = Tagger('-Owakati')
62
+ text = "麩菓子は、麩を主材料とした日本の菓子。"
63
+ tagger.parse(text)
64
+ # => '麩 菓子 は 、 麩 を 主材 料 と し た 日本 の 菓子 。'
65
+ for word in tagger(text):
66
+ print(word, word.feature.lemma, word.pos, sep='\t')
67
+ # "feature" is the Unidic feature data as a named tuple
68
+ ```
69
+
70
+ ## Installing a Dictionary
71
+
72
+ fugashi requires a dictionary. [UniDic](https://unidic.ninjal.ac.jp/) is
73
+ recommended, and two easy-to-install versions are provided.
74
+
75
+ - [unidic-lite](https://github.com/polm/unidic-lite), a slightly modified version 2.1.2 of Unidic (from 2013) that's relatively small
76
+ - [unidic](https://github.com/polm/unidic-py), the latest UniDic 3.1.0, which is 770MB on disk and requires a separate download step
77
+
78
+ If you just want to make sure things work you can start with `unidic-lite`, but
79
+ for more serious processing `unidic` is recommended. For production use you'll
80
+ generally want to generate your own dictionary too; for details see the [MeCab
81
+ documentation](https://taku910.github.io/mecab/learn.html).
82
+
83
+ To get either of these dictionaries, you can install them directly using `pip`
84
+ or do the below:
85
+
86
+ ```sh
87
+ pip install 'fugashi[unidic-lite]'
88
+
89
+ # The full version of UniDic requires a separate download step
90
+ pip install 'fugashi[unidic]'
91
+ python -m unidic download
92
+ ```
93
+
94
+ For more information on the different MeCab dictionaries available, see [this article](https://www.dampfkraft.com/nlp/japanese-tokenizer-dictionaries.html).
95
+
96
+ ## Dictionary Use
97
+
98
+ fugashi is written with the assumption you'll use Unidic to process Japanese,
99
+ but it supports arbitrary dictionaries.
100
+
101
+ If you're using a dictionary besides Unidic you can use the GenericTagger like this:
102
+
103
+ ```python
104
+ from fugashi import GenericTagger
105
+ tagger = GenericTagger()
106
+
107
+ # parse can be used as normal
108
+ tagger.parse('something')
109
+ # features from the dictionary can be accessed by field numbers
110
+ for word in tagger(text):
111
+ print(word.surface, word.feature[0])
112
+ ```
113
+
114
+ You can also create a dictionary wrapper to get feature information as a named tuple.
115
+
116
+ ```python
117
+ from fugashi import GenericTagger, create_feature_wrapper
118
+ CustomFeatures = create_feature_wrapper('CustomFeatures', 'alpha beta gamma')
119
+ tagger = GenericTagger(wrapper=CustomFeatures)
120
+ for word in tagger.parseToNodeList(text):
121
+ print(word.surface, word.feature.alpha)
122
+ ```
123
+
124
+ ## Citation
125
+
126
+ If you use fugashi in research, it would be appreciated if you cite this paper. You can read it at [the ACL Anthology](https://www.aclweb.org/anthology/2020.nlposs-1.7/) or [on Arxiv](https://arxiv.org/abs/2010.06858).
127
+
128
+ @inproceedings{mccann-2020-fugashi,
129
+ title = "fugashi, a Tool for Tokenizing {J}apanese in Python",
130
+ author = "McCann, Paul",
131
+ booktitle = "Proceedings of Second Workshop for NLP Open Source Software (NLP-OSS)",
132
+ month = nov,
133
+ year = "2020",
134
+ address = "Online",
135
+ publisher = "Association for Computational Linguistics",
136
+ url = "https://www.aclweb.org/anthology/2020.nlposs-1.7",
137
+ pages = "44--51",
138
+ abstract = "Recent years have seen an increase in the number of large-scale multilingual NLP projects. However, even in such projects, languages with special processing requirements are often excluded. One such language is Japanese. Japanese is written without spaces, tokenization is non-trivial, and while high quality open source tokenizers exist they can be hard to use and lack English documentation. This paper introduces fugashi, a MeCab wrapper for Python, and gives an introduction to tokenizing Japanese.",
139
+ }
140
+
141
+ ## Alternatives
142
+
143
+ If you have a problem with fugashi feel free to open an issue. However, there
144
+ are some cases where it might be better to use a different library.
145
+
146
+ - If you don't want to deal with installing MeCab at all, try [SudachiPy](https://github.com/WorksApplications/sudachi.rs).
147
+ - If you need to work with Korean, try [pymecab-ko](https://github.com/NoUnique/pymecab-ko) or [KoNLPy](https://konlpy.org/en/latest/).
148
+
149
+ ## License and Copyright Notice
150
+
151
+ fugashi is released under the terms of the [MIT license](./LICENSE). Please
152
+ copy it far and wide.
153
+
154
+ fugashi is a wrapper for MeCab, and fugashi wheels include MeCab binaries.
155
+ MeCab is copyrighted free software by Taku Kudo `<[email protected]>` and Nippon
156
+ Telegraph and Telephone Corporation, and is redistributed under the [BSD
157
+ License](./LICENSE.mecab).
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/RECORD ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ..\..\Scripts\fugashi-build-dict.exe,sha256=LZb2-amxDd5IoFQbVAzDDziSB4U-xnLpaTEW1UJqCRU,41432
2
+ ..\..\Scripts\fugashi-info.exe,sha256=gy05XrnUBSd9AORdNnNmHG6DkudXGw03U4uul0n6NTo,41420
3
+ ..\..\Scripts\fugashi.exe,sha256=dgp3IR-hWxA25GVyVKrbnMck3KTU7emSHsucry_rwZY,41420
4
+ ..\..\lib\site-packages\fugashi\libmecab.dll,sha256=2N3AeRQ3zoxKGHrnxpovaDzkI2g7el7P2hxM70NsHKs,1910784
5
+ fugashi-1.4.0.dist-info/LICENSE,sha256=2vfu3p70KKWeqFRofnatHm5flYb_aZjXy2GJqHiQRvk,1097
6
+ fugashi-1.4.0.dist-info/LICENSE.mecab,sha256=Pb-TvC2ag2gCYgej6C7fwu67r-83z1cBIU9C_dP4pxk,1631
7
+ fugashi-1.4.0.dist-info/METADATA,sha256=lPJ1OXNya8_ikeo7cUopng_cDpk8Np9LOdULri2-X1g,7059
8
+ fugashi-1.4.0.dist-info/RECORD,,
9
+ fugashi-1.4.0.dist-info/WHEEL,sha256=zq3MnTB53_Huh0eFGROKhLNn5cmUbG6gUFCG6-LWXTY,99
10
+ fugashi-1.4.0.dist-info/entry_points.txt,sha256=jV282mMQTVkhqOVFTdm_ZQ03pJndByW2JtrSa_a2Wms,121
11
+ fugashi-1.4.0.dist-info/top_level.txt,sha256=1CQTgPUFi4hjTQg2nHdIR-oH6EfyXtpLhiUglCmuOoM,8
12
+ fugashi-1.4.0.dist-info\INSTALLER,sha256=5hhM4Q4mYTT9z6QB6PGpUAW81PGNFrYrdXMj4oM_6ak,2
13
+ fugashi-1.4.0.dist-info\REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ fugashi/__init__.py,sha256=KW98SIOE-cAtzst_n-YNtEhYznwWLTX6tm_5XJJlOPA,26
15
+ fugashi/cli.py,sha256=wwLj3Nkl1Dtx1SjDeAAaYB3KWsRp5PALqmhdvKN4ZAk,1553
16
+ fugashi/fugashi.cp39-win_amd64.pyd,sha256=XRyL_8gC8WWR6OLV-mdqnFtQHJ387AqiLnT6aiQzVag,112640
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/REQUESTED ADDED
File without changes
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/WHEEL ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (75.3.0)
3
+ Root-Is-Purelib: false
4
+ Tag: cp39-cp39-win_amd64
5
+
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/entry_points.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [console_scripts]
2
+ fugashi = fugashi.cli:main
3
+ fugashi-build-dict = fugashi.cli:build_dict
4
+ fugashi-info = fugashi.cli:info
.venv/Lib/site-packages/fugashi-1.4.0.dist-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ fugashi
.venv/Lib/site-packages/fugashi/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .fugashi import *
2
+
.venv/Lib/site-packages/fugashi/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (203 Bytes). View file
 
.venv/Lib/site-packages/fugashi/cli.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fugashi import GenericTagger, Tagger, build_dictionary
2
+ import sys
3
+ import fileinput
4
+
5
+ def main():
6
+ """
7
+ This is a simple wrapper for fugashi so you can test it from the command line.
8
+ Like the mecab binary, it treats each line of stdin as one sentence. You can
9
+ pass tagger arguments here too.
10
+ """
11
+ args = ' '.join(sys.argv[1:])
12
+
13
+ # This should work if you specify a different dictionary,
14
+ # but it should also work with the pip unidic.
15
+ # Try the GenericTagger and then try the Unidic tagger.
16
+ try:
17
+ tagger = GenericTagger(args, quiet=True)
18
+ except RuntimeError:
19
+ tagger = Tagger(args)
20
+
21
+ for line in fileinput.input([]):
22
+ print(tagger.parse(line.strip()))
23
+
24
+ def info():
25
+ """Print configuration info."""
26
+ args = ' '.join(sys.argv[1:])
27
+ try:
28
+ tagger = GenericTagger(args, quiet=True)
29
+ except RuntimeError:
30
+ tagger = Tagger(args)
31
+ #TODO get the fugashi version here too
32
+ print("Fugashi dictionary info:")
33
+ print("-----")
34
+ for di in tagger.dictionary_info:
35
+ for field in 'version size charset filename'.split():
36
+ print( (field + ':').ljust(10), di[field])
37
+ print('-----')
38
+
39
+ def build_dict():
40
+ """EXPERIMENTAL A wrapper for MeCab's user dictionary building command.
41
+
42
+ This also defaults to utf8.
43
+ """
44
+ # TODO simplify using pip-installed dictionaries as base
45
+ args = sys.argv[0] + " -f utf8 -t utf8 " + ' '.join(sys.argv[1:])
46
+ print(args)
47
+ build_dictionary(args)
.venv/Lib/site-packages/fugashi/fugashi.cp39-win_amd64.pyd ADDED
Binary file (113 kB). View file
 
.venv/Lib/site-packages/functorch/_C.cp39-win_amd64.pyd ADDED
Binary file (322 kB). View file
 
.venv/Lib/site-packages/functorch/__init__.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ import torch
7
+ from torch._functorch.deprecated import (
8
+ combine_state_for_ensemble,
9
+ functionalize,
10
+ grad,
11
+ grad_and_value,
12
+ hessian,
13
+ jacfwd,
14
+ jacrev,
15
+ jvp,
16
+ make_functional,
17
+ make_functional_with_buffers,
18
+ vjp,
19
+ vmap,
20
+ )
21
+
22
+ # utilities. Maybe these should go in their own namespace in the future?
23
+ from torch._functorch.make_functional import (
24
+ FunctionalModule,
25
+ FunctionalModuleWithBuffers,
26
+ )
27
+
28
+ # Was never documented
29
+ from torch._functorch.python_key import make_fx
30
+
31
+
32
+ # Top-level APIs. Please think carefully before adding something to the
33
+ # top-level namespace:
34
+ # - private helper functions should go into torch._functorch
35
+ # - very experimental things should go into functorch.experimental
36
+ # - compilation related things should go into functorch.compile
37
+
38
+
39
+ __version__ = torch.__version__
.venv/Lib/site-packages/functorch/_src/make_functional/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # This file has moved to under torch/_functorch. It is not public API.
2
+ # If you are not a PyTorch developer and you are relying on the following
3
+ # imports, please file an issue.
4
+ from torch._functorch.make_functional import _swap_state
.venv/Lib/site-packages/functorch/_src/vmap/__init__.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file has moved to under torch/_functorch. It is not public API.
2
+ # If you are not a PyTorch developer and you are relying on the following
3
+ # imports, please file an issue.
4
+ from torch._functorch.vmap import (
5
+ _add_batch_dim,
6
+ _broadcast_to_and_flatten,
7
+ _create_batched_inputs,
8
+ _get_name,
9
+ _process_batched_inputs,
10
+ _remove_batch_dim,
11
+ _unwrap_batched,
12
+ _validate_and_get_batch_size,
13
+ Tensor,
14
+ tree_flatten,
15
+ tree_unflatten,
16
+ )
.venv/Lib/site-packages/functorch/compile/__init__.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch._functorch import config
2
+ from torch._functorch.aot_autograd import (
3
+ aot_function,
4
+ aot_module,
5
+ aot_module_simplified,
6
+ compiled_function,
7
+ compiled_module,
8
+ get_aot_compilation_context,
9
+ get_aot_graph_name,
10
+ get_graph_being_compiled,
11
+ make_boxed_compiler,
12
+ make_boxed_func,
13
+ )
14
+ from torch._functorch.compilers import (
15
+ debug_compile,
16
+ default_decompositions,
17
+ draw_graph_compile,
18
+ memory_efficient_fusion,
19
+ nnc_jit,
20
+ nop,
21
+ print_compile,
22
+ ts_compile,
23
+ )
24
+ from torch._functorch.fx_minifier import minifier
25
+ from torch._functorch.partitioners import (
26
+ default_partition,
27
+ draw_graph,
28
+ min_cut_rematerialization_partition,
29
+ )
30
+ from torch._functorch.python_key import pythonkey_decompose
.venv/Lib/site-packages/functorch/dim/batch_tensor.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ from contextlib import contextmanager
7
+
8
+ from torch._C._functorch import _vmap_add_layers, _vmap_remove_layers
9
+
10
+
11
+ _enabled = False
12
+
13
+
14
+ @contextmanager
15
+ def _enable_layers(dims):
16
+ global _enabled
17
+ assert not _enabled
18
+ input = sorted((d._level, d.size) for d in dims if not isinstance(d, int))
19
+ n = len(input)
20
+ try:
21
+ _vmap_add_layers(input)
22
+ _enabled = True
23
+ yield
24
+ finally:
25
+ _enabled = False
26
+ _vmap_remove_layers(n)
.venv/Lib/site-packages/functorch/dim/delayed_mul_tensor.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ import torch
7
+
8
+ from . import _Tensor, Tensor
9
+ from .reference import _dims, _enable_layers, llist, ltuple
10
+
11
+
12
+ class DelayedMulTensor(_Tensor):
13
+ def __init__(self, lhs, rhs):
14
+ self._lhs, self._rhs = lhs, rhs
15
+ self._data = None
16
+ self._levels_data = None
17
+ self._has_device = lhs._has_device or rhs._has_device
18
+ self._batchtensor_data = None
19
+ self._tensor_data = None
20
+
21
+ @property
22
+ def _levels(self):
23
+ if self._levels_data is None:
24
+ levels = llist(self._lhs._levels)
25
+ for l in self._rhs._levels:
26
+ if l not in levels:
27
+ levels.append(l)
28
+ self._levels_data = ltuple(levels)
29
+ return self._levels_data
30
+
31
+ @property
32
+ def _batchtensor(self):
33
+ if self._batchtensor_data is None:
34
+ with _enable_layers(self._levels):
35
+ print("bt multiply fallback")
36
+ self._batchtensor_data = self._lhs._batchtensor * self._rhs._batchtensor
37
+ return self._batchtensor_data
38
+
39
+ @property
40
+ def _tensor(self):
41
+ if self._tensor_data is None:
42
+ self._tensor_data = Tensor.from_batched(
43
+ self._batchtensor, self._has_device
44
+ )._tensor
45
+ return self._tensor_data
46
+
47
+ @property
48
+ def ndim(self):
49
+ return self._batchtensor.ndim
50
+
51
+ @property
52
+ def dims(self):
53
+ return ltuple(super().dims)
54
+
55
+ def sum(self, dim):
56
+ dims = _dims(dim, 0, False, False)
57
+ n = ord("a")
58
+ all_levels = self._levels
59
+
60
+ def to_char(d):
61
+ return chr(n + all_levels.index(d))
62
+
63
+ plhs, levelslhs = self._lhs._tensor, self._lhs._levels
64
+ prhs, levelsrhs = self._rhs._tensor, self._rhs._levels
65
+ new_dims = tuple(d for d in self.dims if d not in dims)
66
+ new_levels = [l for l in self._levels if l not in dims]
67
+ fmt = "".join(
68
+ [
69
+ *(to_char(d) for d in levelslhs),
70
+ ",",
71
+ *(to_char(d) for d in levelsrhs),
72
+ "->",
73
+ *(to_char(d) for d in new_levels),
74
+ ]
75
+ )
76
+ result_data = torch.einsum(fmt, (plhs, prhs))
77
+ return Tensor.from_positional(result_data, new_levels, True)
.venv/Lib/site-packages/functorch/dim/dim.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ import dis
7
+ import inspect
8
+ from dataclasses import dataclass
9
+ from typing import Union
10
+
11
+ from . import DimList
12
+
13
+
14
+ _vmap_levels = []
15
+
16
+
17
+ @dataclass
18
+ class LevelInfo:
19
+ level: int
20
+ alive: bool = True
21
+
22
+
23
+ class Dim:
24
+ def __init__(self, name: str, size: Union[None, int] = None):
25
+ self.name = name
26
+ self._size = None
27
+ self._vmap_level = None
28
+ if size is not None:
29
+ self.size = size
30
+
31
+ def __del__(self):
32
+ if self._vmap_level is not None:
33
+ _vmap_active_levels[self._vmap_stack].alive = False # noqa: F821
34
+ while (
35
+ not _vmap_levels[-1].alive
36
+ and current_level() == _vmap_levels[-1].level # noqa: F821
37
+ ):
38
+ _vmap_decrement_nesting() # noqa: F821
39
+ _vmap_levels.pop()
40
+
41
+ @property
42
+ def size(self):
43
+ assert self.is_bound
44
+ return self._size
45
+
46
+ @size.setter
47
+ def size(self, size: int):
48
+ from . import DimensionBindError
49
+
50
+ if self._size is None:
51
+ self._size = size
52
+ self._vmap_level = _vmap_increment_nesting(size, "same") # noqa: F821
53
+ self._vmap_stack = len(_vmap_levels)
54
+ _vmap_levels.append(LevelInfo(self._vmap_level))
55
+
56
+ elif self._size != size:
57
+ raise DimensionBindError(
58
+ f"Dim '{self}' previously bound to a dimension of size {self._size} cannot bind to a dimension of size {size}"
59
+ )
60
+
61
+ @property
62
+ def is_bound(self):
63
+ return self._size is not None
64
+
65
+ def __repr__(self):
66
+ return self.name
67
+
68
+
69
+ def extract_name(inst):
70
+ assert inst.opname == "STORE_FAST" or inst.opname == "STORE_NAME"
71
+ return inst.argval
72
+
73
+
74
+ _cache = {}
75
+
76
+
77
+ def dims(lists=0):
78
+ frame = inspect.currentframe()
79
+ assert frame is not None
80
+ calling_frame = frame.f_back
81
+ assert calling_frame is not None
82
+ code, lasti = calling_frame.f_code, calling_frame.f_lasti
83
+ key = (code, lasti)
84
+ if key not in _cache:
85
+ first = lasti // 2 + 1
86
+ instructions = list(dis.get_instructions(calling_frame.f_code))
87
+ unpack = instructions[first]
88
+
89
+ if unpack.opname == "STORE_FAST" or unpack.opname == "STORE_NAME":
90
+ # just a single dim, not a list
91
+ name = unpack.argval
92
+ ctor = Dim if lists == 0 else DimList
93
+ _cache[key] = lambda: ctor(name=name)
94
+ else:
95
+ assert unpack.opname == "UNPACK_SEQUENCE"
96
+ ndims = unpack.argval
97
+ names = tuple(
98
+ extract_name(instructions[first + 1 + i]) for i in range(ndims)
99
+ )
100
+ first_list = len(names) - lists
101
+ _cache[key] = lambda: tuple(
102
+ Dim(n) if i < first_list else DimList(name=n)
103
+ for i, n in enumerate(names)
104
+ )
105
+ return _cache[key]()
106
+
107
+
108
+ def _dim_set(positional, arg):
109
+ def convert(a):
110
+ if isinstance(a, Dim):
111
+ return a
112
+ else:
113
+ assert isinstance(a, int)
114
+ return positional[a]
115
+
116
+ if arg is None:
117
+ return positional
118
+ elif not isinstance(arg, (Dim, int)):
119
+ return tuple(convert(a) for a in arg)
120
+ else:
121
+ return (convert(arg),)
.venv/Lib/site-packages/functorch/dim/magic_trace.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ import os
7
+ import signal
8
+ import subprocess
9
+ from contextlib import contextmanager
10
+
11
+
12
+ @contextmanager
13
+ def magic_trace(output="trace.fxt", magic_trace_cache="/tmp/magic-trace"):
14
+ pid = os.getpid()
15
+ if not os.path.exists(magic_trace_cache):
16
+ print(f"Downloading magic_trace to: {magic_trace_cache}")
17
+ subprocess.run(
18
+ [
19
+ "wget",
20
+ "-O",
21
+ magic_trace_cache,
22
+ "-q",
23
+ "https://github.com/janestreet/magic-trace/releases/download/v1.0.2/magic-trace",
24
+ ]
25
+ )
26
+ subprocess.run(["chmod", "+x", magic_trace_cache])
27
+ args = [magic_trace_cache, "attach", "-pid", str(pid), "-o", output]
28
+ p = subprocess.Popen(args, stderr=subprocess.PIPE, encoding="utf-8")
29
+ while True:
30
+ x = p.stderr.readline()
31
+ print(x)
32
+ if "Attached" in x:
33
+ break
34
+ try:
35
+ yield
36
+ finally:
37
+ p.send_signal(signal.SIGINT)
38
+ r = p.wait()
39
+ print(p.stderr.read())
40
+ p.stderr.close()
41
+ if r != 0:
42
+ raise ValueError(f"magic_trace exited abnormally: {r}")
.venv/Lib/site-packages/functorch/dim/op_properties.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ import torch
7
+
8
+
9
+ # pointwise operators can go through a faster pathway
10
+
11
+ tensor_magic_methods = ["add", ""]
12
+ pointwise_magic_methods_with_reverse = (
13
+ "add",
14
+ "sub",
15
+ "mul",
16
+ "floordiv",
17
+ "div",
18
+ "truediv",
19
+ "mod",
20
+ "pow",
21
+ "lshift",
22
+ "rshift",
23
+ "and",
24
+ "or",
25
+ "xor",
26
+ )
27
+ pointwise_magic_methods = (
28
+ *(x for m in pointwise_magic_methods_with_reverse for x in (m, "r" + m)),
29
+ "eq",
30
+ "gt",
31
+ "le",
32
+ "lt",
33
+ "ge",
34
+ "gt",
35
+ "ne",
36
+ "neg",
37
+ "pos",
38
+ "abs",
39
+ "invert",
40
+ "iadd",
41
+ "isub",
42
+ "imul",
43
+ "ifloordiv",
44
+ "idiv",
45
+ "itruediv",
46
+ "imod",
47
+ "ipow",
48
+ "ilshift",
49
+ "irshift",
50
+ "iand",
51
+ "ior",
52
+ "ixor",
53
+ "int",
54
+ "long",
55
+ "float",
56
+ "complex",
57
+ )
58
+
59
+ pointwise_methods = (*(f"__{m}__" for m in pointwise_magic_methods),)
60
+
61
+ pointwise = (
62
+ *(getattr(torch.Tensor, m) for m in pointwise_methods),
63
+ torch.nn.functional.dropout,
64
+ torch.where,
65
+ torch.Tensor.abs,
66
+ torch.abs,
67
+ torch.Tensor.acos,
68
+ torch.acos,
69
+ torch.Tensor.acosh,
70
+ torch.acosh,
71
+ torch.Tensor.add,
72
+ torch.add,
73
+ torch.Tensor.addcdiv,
74
+ torch.addcdiv,
75
+ torch.Tensor.addcmul,
76
+ torch.addcmul,
77
+ torch.Tensor.addr,
78
+ torch.addr,
79
+ torch.Tensor.angle,
80
+ torch.angle,
81
+ torch.Tensor.asin,
82
+ torch.asin,
83
+ torch.Tensor.asinh,
84
+ torch.asinh,
85
+ torch.Tensor.atan,
86
+ torch.atan,
87
+ torch.Tensor.atan2,
88
+ torch.atan2,
89
+ torch.Tensor.atanh,
90
+ torch.atanh,
91
+ torch.Tensor.bitwise_and,
92
+ torch.bitwise_and,
93
+ torch.Tensor.bitwise_left_shift,
94
+ torch.bitwise_left_shift,
95
+ torch.Tensor.bitwise_not,
96
+ torch.bitwise_not,
97
+ torch.Tensor.bitwise_or,
98
+ torch.bitwise_or,
99
+ torch.Tensor.bitwise_right_shift,
100
+ torch.bitwise_right_shift,
101
+ torch.Tensor.bitwise_xor,
102
+ torch.bitwise_xor,
103
+ torch.Tensor.ceil,
104
+ torch.ceil,
105
+ torch.celu,
106
+ torch.nn.functional.celu,
107
+ torch.Tensor.clamp,
108
+ torch.clamp,
109
+ torch.Tensor.clamp_max,
110
+ torch.clamp_max,
111
+ torch.Tensor.clamp_min,
112
+ torch.clamp_min,
113
+ torch.Tensor.copysign,
114
+ torch.copysign,
115
+ torch.Tensor.cos,
116
+ torch.cos,
117
+ torch.Tensor.cosh,
118
+ torch.cosh,
119
+ torch.Tensor.deg2rad,
120
+ torch.deg2rad,
121
+ torch.Tensor.digamma,
122
+ torch.digamma,
123
+ torch.Tensor.div,
124
+ torch.div,
125
+ torch.dropout,
126
+ torch.nn.functional.dropout,
127
+ torch.nn.functional.elu,
128
+ torch.Tensor.eq,
129
+ torch.eq,
130
+ torch.Tensor.erf,
131
+ torch.erf,
132
+ torch.Tensor.erfc,
133
+ torch.erfc,
134
+ torch.Tensor.erfinv,
135
+ torch.erfinv,
136
+ torch.Tensor.exp,
137
+ torch.exp,
138
+ torch.Tensor.exp2,
139
+ torch.exp2,
140
+ torch.Tensor.expm1,
141
+ torch.expm1,
142
+ torch.feature_dropout,
143
+ torch.Tensor.float_power,
144
+ torch.float_power,
145
+ torch.Tensor.floor,
146
+ torch.floor,
147
+ torch.Tensor.floor_divide,
148
+ torch.floor_divide,
149
+ torch.Tensor.fmod,
150
+ torch.fmod,
151
+ torch.Tensor.frac,
152
+ torch.frac,
153
+ torch.Tensor.frexp,
154
+ torch.frexp,
155
+ torch.Tensor.gcd,
156
+ torch.gcd,
157
+ torch.Tensor.ge,
158
+ torch.ge,
159
+ torch.nn.functional.gelu,
160
+ torch.nn.functional.glu,
161
+ torch.Tensor.gt,
162
+ torch.gt,
163
+ torch.Tensor.hardshrink,
164
+ torch.hardshrink,
165
+ torch.nn.functional.hardshrink,
166
+ torch.nn.functional.hardsigmoid,
167
+ torch.nn.functional.hardswish,
168
+ torch.nn.functional.hardtanh,
169
+ torch.Tensor.heaviside,
170
+ torch.heaviside,
171
+ torch.Tensor.hypot,
172
+ torch.hypot,
173
+ torch.Tensor.i0,
174
+ torch.i0,
175
+ torch.Tensor.igamma,
176
+ torch.igamma,
177
+ torch.Tensor.igammac,
178
+ torch.igammac,
179
+ torch.Tensor.isclose,
180
+ torch.isclose,
181
+ torch.Tensor.isfinite,
182
+ torch.isfinite,
183
+ torch.Tensor.isinf,
184
+ torch.isinf,
185
+ torch.Tensor.isnan,
186
+ torch.isnan,
187
+ torch.Tensor.isneginf,
188
+ torch.isneginf,
189
+ torch.Tensor.isposinf,
190
+ torch.isposinf,
191
+ torch.Tensor.isreal,
192
+ torch.isreal,
193
+ torch.Tensor.kron,
194
+ torch.kron,
195
+ torch.Tensor.lcm,
196
+ torch.lcm,
197
+ torch.Tensor.ldexp,
198
+ torch.ldexp,
199
+ torch.Tensor.le,
200
+ torch.le,
201
+ torch.nn.functional.leaky_relu,
202
+ torch.Tensor.lerp,
203
+ torch.lerp,
204
+ torch.Tensor.lgamma,
205
+ torch.lgamma,
206
+ torch.Tensor.log,
207
+ torch.log,
208
+ torch.Tensor.log10,
209
+ torch.log10,
210
+ torch.Tensor.log1p,
211
+ torch.log1p,
212
+ torch.Tensor.log2,
213
+ torch.log2,
214
+ torch.nn.functional.logsigmoid,
215
+ torch.Tensor.logical_and,
216
+ torch.logical_and,
217
+ torch.Tensor.logical_not,
218
+ torch.logical_not,
219
+ torch.Tensor.logical_or,
220
+ torch.logical_or,
221
+ torch.Tensor.logical_xor,
222
+ torch.logical_xor,
223
+ torch.Tensor.logit,
224
+ torch.logit,
225
+ torch.Tensor.lt,
226
+ torch.lt,
227
+ torch.Tensor.maximum,
228
+ torch.maximum,
229
+ torch.Tensor.minimum,
230
+ torch.minimum,
231
+ torch.nn.functional.mish,
232
+ torch.Tensor.mvlgamma,
233
+ torch.mvlgamma,
234
+ torch.Tensor.nan_to_num,
235
+ torch.nan_to_num,
236
+ torch.Tensor.ne,
237
+ torch.ne,
238
+ torch.Tensor.neg,
239
+ torch.neg,
240
+ torch.Tensor.nextafter,
241
+ torch.nextafter,
242
+ torch.Tensor.outer,
243
+ torch.outer,
244
+ torch.polar,
245
+ torch.Tensor.polygamma,
246
+ torch.polygamma,
247
+ torch.Tensor.positive,
248
+ torch.positive,
249
+ torch.Tensor.pow,
250
+ torch.pow,
251
+ torch.Tensor.prelu,
252
+ torch.prelu,
253
+ torch.nn.functional.prelu,
254
+ torch.Tensor.rad2deg,
255
+ torch.rad2deg,
256
+ torch.Tensor.reciprocal,
257
+ torch.reciprocal,
258
+ torch.Tensor.relu,
259
+ torch.relu,
260
+ torch.nn.functional.relu,
261
+ torch.nn.functional.relu6,
262
+ torch.Tensor.remainder,
263
+ torch.remainder,
264
+ torch.Tensor.round,
265
+ torch.round,
266
+ torch.rrelu,
267
+ torch.nn.functional.rrelu,
268
+ torch.Tensor.rsqrt,
269
+ torch.rsqrt,
270
+ torch.rsub,
271
+ torch.selu,
272
+ torch.nn.functional.selu,
273
+ torch.Tensor.sgn,
274
+ torch.sgn,
275
+ torch.Tensor.sigmoid,
276
+ torch.sigmoid,
277
+ torch.nn.functional.sigmoid,
278
+ torch.Tensor.sign,
279
+ torch.sign,
280
+ torch.Tensor.signbit,
281
+ torch.signbit,
282
+ torch.nn.functional.silu,
283
+ torch.Tensor.sin,
284
+ torch.sin,
285
+ torch.Tensor.sinc,
286
+ torch.sinc,
287
+ torch.Tensor.sinh,
288
+ torch.sinh,
289
+ torch.nn.functional.softplus,
290
+ torch.nn.functional.softshrink,
291
+ torch.Tensor.sqrt,
292
+ torch.sqrt,
293
+ torch.Tensor.square,
294
+ torch.square,
295
+ torch.Tensor.sub,
296
+ torch.sub,
297
+ torch.Tensor.tan,
298
+ torch.tan,
299
+ torch.Tensor.tanh,
300
+ torch.tanh,
301
+ torch.nn.functional.tanh,
302
+ torch.threshold,
303
+ torch.nn.functional.threshold,
304
+ torch.trapz,
305
+ torch.Tensor.true_divide,
306
+ torch.true_divide,
307
+ torch.Tensor.trunc,
308
+ torch.trunc,
309
+ torch.Tensor.xlogy,
310
+ torch.xlogy,
311
+ torch.rand_like,
312
+ )
.venv/Lib/site-packages/functorch/dim/reference.py ADDED
@@ -0,0 +1,645 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # reference python implementations for C ops
8
+ import torch
9
+ from functorch._C import dim as _C
10
+
11
+ from . import op_properties
12
+ from .batch_tensor import _enable_layers
13
+ from .tree_map import tree_flatten, tree_map
14
+
15
+
16
+ DimList = _C.DimList
17
+ import operator
18
+ from functools import reduce
19
+
20
+
21
+ # use dict to avoid writing C++ bindings for set
22
+ pointwise = set(op_properties.pointwise)
23
+
24
+
25
+ def prod(x):
26
+ return reduce(operator.mul, x, 1)
27
+
28
+
29
+ def _wrap_dim(d, N, keepdim):
30
+ from . import Dim
31
+
32
+ if isinstance(d, Dim):
33
+ assert not keepdim, "cannot preserve first-class dimensions with keepdim=True"
34
+ return d
35
+ elif d >= 0:
36
+ return d - N
37
+ else:
38
+ return d
39
+
40
+
41
+ def _dims(d, N, keepdim, single_dim):
42
+ from . import Dim
43
+
44
+ if isinstance(d, (Dim, int)):
45
+ return ltuple((_wrap_dim(d, N, keepdim),))
46
+ assert not single_dim, f"expected a single dimension or int but found: {d}"
47
+ return ltuple(_wrap_dim(x, N, keepdim) for x in d)
48
+
49
+
50
+ def _bind_dims_to_size(lhs_size, rhs, lhs_debug):
51
+ from . import DimensionMismatchError
52
+
53
+ not_bound = tuple((i, r) for i, r in enumerate(rhs) if not r.is_bound)
54
+ if len(not_bound) == 1:
55
+ idx, d = not_bound[0]
56
+ rhs_so_far = prod(r.size for r in rhs if r.is_bound)
57
+ if lhs_size % rhs_so_far != 0:
58
+ rhs_s = tuple("?" if not r.is_bound else str(r.size) for r in rhs)
59
+ raise DimensionMismatchError(
60
+ f"inferred dimension does not evenly fit into larger dimension: {lhs_size} vs {rhs_s}"
61
+ )
62
+ new_size = lhs_size // rhs_so_far
63
+ d.size = new_size
64
+ elif len(not_bound) > 1:
65
+ rhs_s = tuple("?" if not r.is_bound else str(r.size) for r in rhs)
66
+ raise DimensionMismatchError(
67
+ f"cannot infer the size of two dimensions at once: {rhs} with sizes {rhs_s}"
68
+ )
69
+ else:
70
+ rhs_size = prod(r.size for r in rhs)
71
+ if lhs_size != rhs_size:
72
+ raise DimensionMismatchError(
73
+ f"Dimension sizes to do not match ({lhs_size} != {rhs_size}) when matching {lhs_debug} to {rhs}"
74
+ )
75
+
76
+
77
+ def _tensor_levels(inp):
78
+ from . import _Tensor
79
+
80
+ if isinstance(inp, _Tensor):
81
+ return inp._tensor, llist(inp._levels), inp._has_device
82
+ else:
83
+ return inp, llist(range(-inp.ndim, 0)), True
84
+
85
+
86
+ def _match_levels(v, from_levels, to_levels):
87
+ view = []
88
+ permute = []
89
+ requires_view = False
90
+ size = v.size()
91
+ for t in to_levels:
92
+ try:
93
+ idx = from_levels.index(t)
94
+ permute.append(idx)
95
+ view.append(size[idx])
96
+ except ValueError:
97
+ view.append(1)
98
+ requires_view = True
99
+ if permute != list(range(len(permute))):
100
+ v = v.permute(*permute)
101
+ if requires_view:
102
+ v = v.view(*view)
103
+ return v
104
+
105
+
106
+ # make a single dimension positional but do not permute it,
107
+ # used to do multi-tensor operators where the dim being acted on
108
+ # should not physically move if possible
109
+ def _positional_no_permute(self, dim, expand_dim=False):
110
+ from . import Tensor
111
+
112
+ ptensor, levels = self._tensor, llist(self._levels)
113
+ try:
114
+ idx = levels.index(dim)
115
+ except ValueError:
116
+ if not expand_dim:
117
+ raise
118
+ idx = 0
119
+ ptensor = ptensor.expand(dim.size, *ptensor.size())
120
+ levels.insert(0, 0)
121
+ idx_batched = 0
122
+ for i in range(idx):
123
+ if isinstance(levels[i], int):
124
+ levels[i] -= 1
125
+ idx_batched += 1
126
+ levels[idx] = -idx_batched - 1
127
+ return Tensor.from_positional(ptensor, levels, self._has_device), idx_batched
128
+
129
+
130
+ def seq(a, b):
131
+ from . import Dim
132
+
133
+ if isinstance(a, Dim) != isinstance(b, Dim):
134
+ return False
135
+ if isinstance(a, Dim):
136
+ return a is b
137
+ else:
138
+ return a == b
139
+
140
+
141
+ class isin:
142
+ def __contains__(self, item):
143
+ for x in self:
144
+ if seq(item, x):
145
+ return True
146
+ return False
147
+
148
+ def index(self, item):
149
+ for i, x in enumerate(self):
150
+ if seq(item, x):
151
+ return i
152
+ raise ValueError
153
+
154
+
155
+ class llist(isin, list):
156
+ pass
157
+
158
+
159
+ class ltuple(isin, tuple):
160
+ pass
161
+
162
+
163
+ empty_dict = {}
164
+
165
+
166
+ @classmethod
167
+ def __torch_function__(self, orig, cls, args, kwargs=empty_dict):
168
+ from . import _Tensor, Tensor, TensorLike
169
+ from .delayed_mul_tensor import DelayedMulTensor
170
+
171
+ if orig is torch.Tensor.__mul__:
172
+ lhs, rhs = args
173
+ if (
174
+ isinstance(lhs, _Tensor)
175
+ and isinstance(rhs, _Tensor)
176
+ and lhs.ndim == 0
177
+ and rhs.ndim == 0
178
+ ):
179
+ return DelayedMulTensor(lhs, rhs)
180
+ all_dims = llist()
181
+ flat_args, unflatten = tree_flatten((args, kwargs))
182
+ device_holding_tensor = None
183
+ for f in flat_args:
184
+ if isinstance(f, _Tensor):
185
+ if f._has_device:
186
+ device_holding_tensor = f._batchtensor
187
+ for d in f.dims:
188
+ if d not in all_dims:
189
+ all_dims.append(d)
190
+
191
+ def unwrap(t):
192
+ if isinstance(t, _Tensor):
193
+ r = t._batchtensor
194
+ if device_holding_tensor is not None and not t._has_device:
195
+ r = r.to(device=device_holding_tensor.device)
196
+ return r
197
+ return t
198
+
199
+ if orig in pointwise:
200
+ result_levels = llist()
201
+ arg_levels = llist()
202
+ to_expand = []
203
+ for i, f in enumerate(flat_args):
204
+ if isinstance(f, TensorLike):
205
+ ptensor, levels, _ = _tensor_levels(f)
206
+ if (
207
+ isinstance(f, _Tensor)
208
+ and not f._has_device
209
+ and device_holding_tensor is not None
210
+ ):
211
+ ptensor = ptensor.to(device=device_holding_tensor.device)
212
+ flat_args[i] = ptensor
213
+ for l in levels:
214
+ if l not in result_levels:
215
+ result_levels.append(l)
216
+ to_expand.append((i, levels))
217
+
218
+ for i, levels in to_expand:
219
+ flat_args[i] = _match_levels(flat_args[i], levels, result_levels)
220
+ args, kwargs = unflatten(flat_args)
221
+ result = orig(*args, **kwargs)
222
+
223
+ def wrap(t):
224
+ if isinstance(t, TensorLike):
225
+ return Tensor.from_positional(
226
+ t, result_levels, device_holding_tensor is not None
227
+ )
228
+ return t
229
+
230
+ return tree_map(wrap, result)
231
+ else:
232
+
233
+ def wrap(t):
234
+ if isinstance(t, TensorLike):
235
+ return Tensor.from_batched(t, device_holding_tensor is not None)
236
+ return t
237
+
238
+ with _enable_layers(all_dims):
239
+ print(f"batch_tensor for {orig}")
240
+ args, kwargs = unflatten(unwrap(f) for f in flat_args)
241
+ result = orig(*args, **kwargs)
242
+ # print("END", orig)
243
+ return tree_map(wrap, result)
244
+
245
+
246
+ def positional(self, *dims):
247
+ from . import Dim, DimensionBindError, Tensor
248
+
249
+ ptensor, levels = self._tensor, llist(self._levels)
250
+ flat_dims = llist()
251
+ view = []
252
+ needs_view = False
253
+ ndim = self.ndim
254
+ for d in dims:
255
+ if isinstance(d, DimList):
256
+ flat_dims.extend(d)
257
+ view.extend(e.size for e in d)
258
+ elif isinstance(d, Dim):
259
+ flat_dims.append(d)
260
+ view.append(d.size)
261
+ elif isinstance(d, int):
262
+ d = _wrap_dim(d, ndim, False)
263
+ flat_dims.append(d)
264
+ view.append(ptensor.size(d))
265
+ else:
266
+ flat_dims.extend(d)
267
+ view.append(prod(e.size for e in d))
268
+ needs_view = True
269
+
270
+ permute = list(range(len(levels)))
271
+ nflat = len(flat_dims)
272
+ for i, d in enumerate(flat_dims):
273
+ try:
274
+ idx = levels.index(d)
275
+ except ValueError as e:
276
+ raise DimensionBindError(
277
+ f"tensor of dimensions {self.dims} does not contain dim {d}"
278
+ ) from e
279
+ p = permute[idx]
280
+ del levels[idx]
281
+ del permute[idx]
282
+ levels.insert(i, 0)
283
+ permute.insert(i, p)
284
+ ptensor = ptensor.permute(*permute)
285
+ seen = 0
286
+ for i in range(len(levels) - 1, -1, -1):
287
+ if isinstance(levels[i], int):
288
+ seen += 1
289
+ levels[i] = -seen
290
+ result = Tensor.from_positional(ptensor, levels, self._has_device)
291
+ if needs_view:
292
+ result = result.reshape(*view, *result.size()[len(flat_dims) :])
293
+ return result
294
+
295
+
296
+ def _contains_dim(input):
297
+ from . import Dim
298
+
299
+ for i in input:
300
+ if isinstance(i, Dim):
301
+ return True
302
+
303
+
304
+ def expand(self, *sizes):
305
+ if not _contains_dim(sizes):
306
+ return self.__torch_function__(torch.Tensor.expand, None, (self, *sizes))
307
+ dims = sizes
308
+ sizes = [d.size for d in dims] + [-1] * self.ndim
309
+ self = self.expand(*sizes)
310
+ return self[dims]
311
+
312
+
313
+ _not_present = object()
314
+
315
+
316
+ def _getarg(name, offset, args, kwargs, default):
317
+ if len(args) > offset:
318
+ return args[offset]
319
+ return kwargs.get(name, default)
320
+
321
+
322
+ def _patcharg(name, offset, args, kwargs, value):
323
+ if len(args) > offset:
324
+ args[offset] = value
325
+ else:
326
+ kwargs[name] = value
327
+
328
+
329
+ def _wrap(
330
+ orig, dim_offset=0, keepdim_offset=1, dim_name="dim", single_dim=False, reduce=True
331
+ ):
332
+ from . import Dim, Tensor, TensorLike
333
+
334
+ def fn(self, *args, **kwargs):
335
+ dim = _getarg(dim_name, dim_offset, args, kwargs, _not_present)
336
+ if dim is _not_present or (single_dim and not isinstance(dim, Dim)):
337
+ with _enable_layers(self.dims):
338
+ print(f"dim fallback batch_tensor for {orig}")
339
+ return Tensor.from_batched(
340
+ orig(self._batchtensor, *args, **kwargs), self._has_device
341
+ )
342
+ keepdim = (
343
+ _getarg("keepdim", keepdim_offset, args, kwargs, False) if reduce else False
344
+ )
345
+ t, levels = self._tensor, llist(self._levels)
346
+ dims = _dims(dim, self._batchtensor.ndim, keepdim, single_dim)
347
+ dim_indices = tuple(levels.index(d) for d in dims)
348
+ if reduce and not keepdim:
349
+ new_levels = [l for i, l in enumerate(levels) if i not in dim_indices]
350
+ else:
351
+ new_levels = levels
352
+
353
+ if len(dim_indices) == 1:
354
+ dim_indices = dim_indices[
355
+ 0
356
+ ] # so that dims that really only take a single argument work...
357
+ args = list(args)
358
+ _patcharg(dim_name, dim_offset, args, kwargs, dim_indices)
359
+
360
+ def wrap(t):
361
+ if isinstance(t, TensorLike):
362
+ return Tensor.from_positional(t, new_levels, self._has_device)
363
+ return t
364
+
365
+ with _enable_layers(new_levels):
366
+ print(f"dim used batch_tensor for {orig}")
367
+ r = orig(t, *args, **kwargs)
368
+ return tree_map(wrap, r)
369
+
370
+ return fn
371
+
372
+
373
+ def _def(name, *args, **kwargs):
374
+ from . import _Tensor
375
+
376
+ orig = getattr(torch.Tensor, name)
377
+ setattr(_Tensor, name, _wrap(orig, *args, **kwargs))
378
+
379
+
380
+ no_slice = slice(None)
381
+
382
+ _orig_getitem = torch.Tensor.__getitem__
383
+
384
+
385
+ class dim_tracker:
386
+ def __init__(self) -> None:
387
+ self.dims = llist()
388
+ self.count = []
389
+
390
+ def record(self, d):
391
+ if d not in self.dims:
392
+ self.dims.append(d)
393
+ self.count.append(1)
394
+
395
+ def __getitem__(self, d):
396
+ return self.count[self.dims.index(d)]
397
+
398
+
399
+ def t__getitem__(self, input):
400
+ from . import _Tensor, Dim, DimensionBindError, DimList, Tensor, TensorLike
401
+
402
+ # * bail to original example if we have a single non-Dim tensor, or a non-tensor
403
+ # * locate ... or an unbound tensor list, and determine its size, bind dim list
404
+ # (remember that None does not count to the total dim count)
405
+ # * bind simple dims and dim-packs to their sizes, count the number of uses of each dim,
406
+ # produce the re-view if needed
407
+ # * for each single-use dim index, replace with no_slice and mark that it will be added
408
+ # (keep track of whether we have to call super)
409
+ # * call super if needed
410
+ # * if we have dims to bind, bind them (it will help if we eliminated ... and None before)
411
+ # this handles bool indexing handling, as well as some other simple cases.
412
+
413
+ is_simple = (
414
+ not isinstance(input, Dim)
415
+ and not isinstance(input, (tuple, list))
416
+ and
417
+ # WAR for functorch bug where zero time tensors in getitem are not handled correctly.
418
+ not (isinstance(input, TensorLike) and input.ndim == 0)
419
+ )
420
+
421
+ if is_simple:
422
+ if isinstance(self, _Tensor):
423
+ return _Tensor.__torch_function__(_orig_getitem, None, (self, input))
424
+ else:
425
+ return _orig_getitem(self, input)
426
+
427
+ # can further optimize this case
428
+ if not isinstance(input, tuple):
429
+ input = [input]
430
+ else:
431
+ input = list(input)
432
+
433
+ dims_indexed = 0
434
+ expanding_object = None
435
+ dimlists = []
436
+ for i, s in enumerate(input):
437
+ if s is ... or isinstance(s, DimList) and not s.is_bound:
438
+ if expanding_object is not None:
439
+ msg = (
440
+ "at most one ... or unbound dimension list can exist in indexing list but"
441
+ f" found 2 at offsets {i} and {expanding_object}"
442
+ )
443
+ raise DimensionBindError(msg)
444
+ expanding_object = i
445
+
446
+ if isinstance(s, DimList):
447
+ dims_indexed += len(s) if s.is_bound else 0
448
+ dimlists.append(i)
449
+ elif s is not None and s is not ...:
450
+ dims_indexed += 1
451
+
452
+ ndim = self.ndim
453
+ if dims_indexed > ndim:
454
+ raise IndexError(
455
+ f"at least {dims_indexed} indices were supplied but the tensor only has {ndim} dimensions."
456
+ )
457
+ if expanding_object is not None:
458
+ expanding_ndims = ndim - dims_indexed
459
+ obj = input[expanding_object]
460
+ if obj is ...:
461
+ input[expanding_object : expanding_object + 1] = [
462
+ no_slice
463
+ ] * expanding_ndims
464
+ else:
465
+ obj.bind_len(expanding_ndims)
466
+ # flatten the dimslists into the indexing
467
+ for i in reversed(dimlists):
468
+ input[i : i + 1] = input[i]
469
+ dims_indexed = 0
470
+ requires_view = False
471
+ size = self.size()
472
+ view_sizes = []
473
+ dims_seen = dim_tracker()
474
+
475
+ def add_dims(t):
476
+ if not isinstance(t, _Tensor):
477
+ return
478
+ for d in t.dims:
479
+ dims_seen.record(d)
480
+
481
+ add_dims(self)
482
+ dim_packs = []
483
+ for i, idx in enumerate(input):
484
+ if idx is None:
485
+ input[i] = no_slice
486
+ view_sizes.append(1)
487
+ requires_view = True
488
+ else:
489
+ sz = size[dims_indexed]
490
+ if isinstance(idx, Dim):
491
+ idx.size = sz
492
+ dims_seen.record(idx)
493
+ view_sizes.append(sz)
494
+ elif isinstance(idx, (tuple, list)) and idx and isinstance(idx[0], Dim):
495
+ for d in idx:
496
+ dims_seen.record(idx)
497
+ _bind_dims_to_size(sz, idx, f"offset {i}")
498
+ view_sizes.extend(d.size for d in idx)
499
+ requires_view = True
500
+ dim_packs.append(i)
501
+ else:
502
+ add_dims(idx)
503
+ view_sizes.append(sz)
504
+ dims_indexed += 1
505
+ if requires_view:
506
+ self = self.view(*view_sizes)
507
+ for i in reversed(dim_packs):
508
+ input[i : i + 1] = input[i]
509
+
510
+ # currenty:
511
+ # input is flat, containing either Dim, or Tensor, or something valid for standard indexing
512
+ # self may have first-class dims as well.
513
+
514
+ # to index:
515
+ # drop the first class dims from self, they just become direct indices of their positions
516
+
517
+ # figure out the dimensions of the indexing tensors: union of all the dims in the tensors in the index.
518
+ # these dimensions will appear and need to be bound at the first place tensor occures
519
+
520
+ if isinstance(self, _Tensor):
521
+ ptensor_self, levels = self._tensor, list(self._levels)
522
+ # indices to ptensor rather than self which has first-class dimensions
523
+ input_it = iter(input)
524
+ flat_inputs = [next(input_it) if isinstance(l, int) else l for l in levels]
525
+ has_device = self._has_device
526
+ to_pad = 0
527
+ else:
528
+ ptensor_self, flat_inputs = self, input
529
+ to_pad = ptensor_self.ndim - len(flat_inputs)
530
+ has_device = True
531
+
532
+ result_levels = []
533
+ index_levels = []
534
+ tensor_insert_point = None
535
+ to_expand = {}
536
+ requires_getindex = False
537
+ for i, inp in enumerate(flat_inputs):
538
+ if isinstance(inp, Dim) and dims_seen[inp] == 1:
539
+ flat_inputs[i] = no_slice
540
+ result_levels.append(inp)
541
+ elif isinstance(inp, TensorLike):
542
+ requires_getindex = True
543
+ if tensor_insert_point is None:
544
+ tensor_insert_point = len(result_levels)
545
+ ptensor, levels, _ = _tensor_levels(inp)
546
+ to_expand[i] = levels
547
+ flat_inputs[i] = ptensor
548
+ for l in levels:
549
+ if l not in index_levels:
550
+ index_levels.append(l)
551
+ else:
552
+ requires_getindex = True
553
+ result_levels.append(0)
554
+
555
+ if tensor_insert_point is not None:
556
+ result_levels[tensor_insert_point:tensor_insert_point] = index_levels
557
+
558
+ for i, levels in to_expand.items():
559
+ flat_inputs[i] = _match_levels(flat_inputs[i], levels, index_levels)
560
+
561
+ if requires_getindex:
562
+ result = _orig_getitem(ptensor_self, flat_inputs)
563
+ else:
564
+ result = ptensor_self
565
+
566
+ next_positional = -1
567
+ if to_pad > 0:
568
+ result_levels.extend([0] * to_pad)
569
+ for i, r in enumerate(reversed(result_levels)):
570
+ if isinstance(r, int):
571
+ result_levels[-1 - i] = next_positional
572
+ next_positional -= 1
573
+
574
+ return Tensor.from_positional(result, result_levels, has_device)
575
+
576
+
577
+ # XXX - dim is optional and can be the outer-most dimension...
578
+ def stack(tensors, new_dim, dim=0, out=None):
579
+ if isinstance(dim, int):
580
+ return torch.stack(tensors, dim, out).index(dim, new_dim)
581
+ index = None
582
+ if out is not None:
583
+ out, index = _positional_no_permute(out, dim, expand_dim=True)
584
+ ptensors = []
585
+ for t in tensors:
586
+ pt, pi = _positional_no_permute(t, dim, expand_dim=True)
587
+ if index is not None and pi != index:
588
+ pt = pt.move_dim(pi, index)
589
+ else:
590
+ index = pi
591
+ ptensors.append(pt)
592
+ pr = torch.stack(ptensors, index, out=out)
593
+ return pr.index((index, index + 1), (new_dim, dim))
594
+
595
+
596
+ _orig_split = torch.Tensor.split
597
+
598
+
599
+ def split(self, split_size_or_sections, dim=0):
600
+ from . import _Tensor, Dim
601
+
602
+ if isinstance(split_size_or_sections, int) or any(
603
+ isinstance(t, int) for t in split_size_or_sections
604
+ ):
605
+ if isinstance(dim, Dim):
606
+ raise ValueError(
607
+ "when dim is specified as a Dim object, split sizes must also be dimensions."
608
+ )
609
+ return _orig_split(self, split_size_or_sections, dim=dim)
610
+
611
+ if isinstance(dim, Dim):
612
+ assert isinstance(self, _Tensor), f"Tensor does not have dimension {dim}"
613
+ self, dim = _positional_no_permute(self, dim)
614
+
615
+ size = self.size(dim)
616
+ total_bound_size = 0
617
+ unbound = []
618
+ sizes = []
619
+ for i, d in enumerate(split_size_or_sections):
620
+ if d.is_bound:
621
+ sizes.append(d.size)
622
+ total_bound_size += d.size
623
+ else:
624
+ sizes.append(0)
625
+ unbound.append(i)
626
+
627
+ if unbound:
628
+ assert (
629
+ total_bound_size <= size
630
+ ), f"result dimensions are larger than original: {total_bound_size} vs {size} ({split_size_or_sections})"
631
+ remaining_size = size - total_bound_size
632
+ chunk_size = -(-remaining_size // len(unbound))
633
+ for u in unbound:
634
+ sz = min(chunk_size, remaining_size)
635
+ split_size_or_sections[u].size = sz
636
+ sizes[u] = sz
637
+ remaining_size -= sz
638
+ else:
639
+ assert (
640
+ total_bound_size == size
641
+ ), f"result dimensions do not match original: {total_bound_size} vs {size} ({split_size_or_sections})"
642
+ return tuple(
643
+ t.index(dim, d)
644
+ for d, t in zip(split_size_or_sections, _orig_split(self, sizes, dim=dim))
645
+ )
.venv/Lib/site-packages/functorch/dim/tree_map.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ from functorch._C import dim
8
+
9
+
10
+ tree_flatten = dim.tree_flatten
11
+
12
+
13
+ def tree_map(fn, tree):
14
+ vs, unflatten = tree_flatten(tree)
15
+ return unflatten(fn(v) for v in vs)
.venv/Lib/site-packages/functorch/dim/wrap_type.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Facebook, Inc. and its affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ from types import (
8
+ BuiltinMethodType,
9
+ FunctionType,
10
+ GetSetDescriptorType,
11
+ MethodDescriptorType,
12
+ WrapperDescriptorType,
13
+ )
14
+
15
+ from functorch._C import dim as _C
16
+
17
+
18
+ _wrap_method = _C._wrap_method
19
+
20
+ FUNC_TYPES = (
21
+ FunctionType,
22
+ MethodDescriptorType,
23
+ BuiltinMethodType,
24
+ WrapperDescriptorType,
25
+ )
26
+ PROPERTY_TYPES = (GetSetDescriptorType, property)
27
+
28
+
29
+ def _py_wrap_method(orig, __torch_function__):
30
+ def impl(*args, **kwargs):
31
+ return __torch_function__(orig, None, args, kwargs)
32
+
33
+ return impl
34
+
35
+
36
+ def wrap_type(use_c, to_patch, pattern, __torch_function__):
37
+ if use_c:
38
+ wrap_method = _wrap_method
39
+ else:
40
+ wrap_method = _py_wrap_method
41
+
42
+ all = {}
43
+ for t in reversed(pattern.mro()[:-1]): # skip object
44
+ all.update(t.__dict__)
45
+
46
+ def wrap_attr(orig):
47
+ return property(wrap_method(orig.__get__, __torch_function__))
48
+
49
+ for name, obj in all.items():
50
+ if name in (
51
+ "__dict__",
52
+ "__new__",
53
+ "__init__",
54
+ "__repr__",
55
+ "__weakref__",
56
+ "__doc__",
57
+ "__module__",
58
+ "__dir__",
59
+ ):
60
+ continue
61
+
62
+ # skip things that have been overloaded
63
+ # things that come from object like `__eq__` still need to be patched, however.
64
+ if hasattr(to_patch, name) and getattr(to_patch, name) is not getattr(
65
+ object, name, None
66
+ ):
67
+ continue
68
+
69
+ if isinstance(obj, FUNC_TYPES):
70
+ setattr(to_patch, name, wrap_method(obj, __torch_function__))
71
+ elif isinstance(obj, PROPERTY_TYPES):
72
+ setattr(to_patch, name, wrap_attr(obj))
.venv/Lib/site-packages/huggingface_hub/__init__.py ADDED
@@ -0,0 +1,1002 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # ***********
16
+ # `huggingface_hub` init has 2 modes:
17
+ # - Normal usage:
18
+ # If imported to use it, all modules and functions are lazy-loaded. This means
19
+ # they exist at top level in module but are imported only the first time they are
20
+ # used. This way, `from huggingface_hub import something` will import `something`
21
+ # quickly without the hassle of importing all the features from `huggingface_hub`.
22
+ # - Static check:
23
+ # If statically analyzed, all modules and functions are loaded normally. This way
24
+ # static typing check works properly as well as autocomplete in text editors and
25
+ # IDEs.
26
+ #
27
+ # The static model imports are done inside the `if TYPE_CHECKING:` statement at
28
+ # the bottom of this file. Since module/functions imports are duplicated, it is
29
+ # mandatory to make sure to add them twice when adding one. This is checked in the
30
+ # `make quality` command.
31
+ #
32
+ # To update the static imports, please run the following command and commit the changes.
33
+ # ```
34
+ # # Use script
35
+ # python utils/check_static_imports.py --update-file
36
+ #
37
+ # # Or run style on codebase
38
+ # make style
39
+ # ```
40
+ #
41
+ # ***********
42
+ # Lazy loader vendored from https://github.com/scientific-python/lazy_loader
43
+ import importlib
44
+ import os
45
+ import sys
46
+ from typing import TYPE_CHECKING
47
+
48
+
49
+ __version__ = "0.26.5"
50
+
51
+ # Alphabetical order of definitions is ensured in tests
52
+ # WARNING: any comment added in this dictionary definition will be lost when
53
+ # re-generating the file !
54
+ _SUBMOD_ATTRS = {
55
+ "_commit_scheduler": [
56
+ "CommitScheduler",
57
+ ],
58
+ "_inference_endpoints": [
59
+ "InferenceEndpoint",
60
+ "InferenceEndpointError",
61
+ "InferenceEndpointStatus",
62
+ "InferenceEndpointTimeoutError",
63
+ "InferenceEndpointType",
64
+ ],
65
+ "_login": [
66
+ "auth_list",
67
+ "auth_switch",
68
+ "interpreter_login",
69
+ "login",
70
+ "logout",
71
+ "notebook_login",
72
+ ],
73
+ "_multi_commits": [
74
+ "MultiCommitException",
75
+ "plan_multi_commits",
76
+ ],
77
+ "_snapshot_download": [
78
+ "snapshot_download",
79
+ ],
80
+ "_space_api": [
81
+ "SpaceHardware",
82
+ "SpaceRuntime",
83
+ "SpaceStage",
84
+ "SpaceStorage",
85
+ "SpaceVariable",
86
+ ],
87
+ "_tensorboard_logger": [
88
+ "HFSummaryWriter",
89
+ ],
90
+ "_webhooks_payload": [
91
+ "WebhookPayload",
92
+ "WebhookPayloadComment",
93
+ "WebhookPayloadDiscussion",
94
+ "WebhookPayloadDiscussionChanges",
95
+ "WebhookPayloadEvent",
96
+ "WebhookPayloadMovedTo",
97
+ "WebhookPayloadRepo",
98
+ "WebhookPayloadUrl",
99
+ "WebhookPayloadWebhook",
100
+ ],
101
+ "_webhooks_server": [
102
+ "WebhooksServer",
103
+ "webhook_endpoint",
104
+ ],
105
+ "community": [
106
+ "Discussion",
107
+ "DiscussionComment",
108
+ "DiscussionCommit",
109
+ "DiscussionEvent",
110
+ "DiscussionStatusChange",
111
+ "DiscussionTitleChange",
112
+ "DiscussionWithDetails",
113
+ ],
114
+ "constants": [
115
+ "CONFIG_NAME",
116
+ "FLAX_WEIGHTS_NAME",
117
+ "HUGGINGFACE_CO_URL_HOME",
118
+ "HUGGINGFACE_CO_URL_TEMPLATE",
119
+ "PYTORCH_WEIGHTS_NAME",
120
+ "REPO_TYPE_DATASET",
121
+ "REPO_TYPE_MODEL",
122
+ "REPO_TYPE_SPACE",
123
+ "TF2_WEIGHTS_NAME",
124
+ "TF_WEIGHTS_NAME",
125
+ ],
126
+ "fastai_utils": [
127
+ "_save_pretrained_fastai",
128
+ "from_pretrained_fastai",
129
+ "push_to_hub_fastai",
130
+ ],
131
+ "file_download": [
132
+ "HfFileMetadata",
133
+ "_CACHED_NO_EXIST",
134
+ "get_hf_file_metadata",
135
+ "hf_hub_download",
136
+ "hf_hub_url",
137
+ "try_to_load_from_cache",
138
+ ],
139
+ "hf_api": [
140
+ "Collection",
141
+ "CollectionItem",
142
+ "CommitInfo",
143
+ "CommitOperation",
144
+ "CommitOperationAdd",
145
+ "CommitOperationCopy",
146
+ "CommitOperationDelete",
147
+ "DatasetInfo",
148
+ "GitCommitInfo",
149
+ "GitRefInfo",
150
+ "GitRefs",
151
+ "HfApi",
152
+ "ModelInfo",
153
+ "RepoUrl",
154
+ "SpaceInfo",
155
+ "User",
156
+ "UserLikes",
157
+ "WebhookInfo",
158
+ "WebhookWatchedItem",
159
+ "accept_access_request",
160
+ "add_collection_item",
161
+ "add_space_secret",
162
+ "add_space_variable",
163
+ "auth_check",
164
+ "cancel_access_request",
165
+ "change_discussion_status",
166
+ "comment_discussion",
167
+ "create_branch",
168
+ "create_collection",
169
+ "create_commit",
170
+ "create_commits_on_pr",
171
+ "create_discussion",
172
+ "create_inference_endpoint",
173
+ "create_pull_request",
174
+ "create_repo",
175
+ "create_tag",
176
+ "create_webhook",
177
+ "dataset_info",
178
+ "delete_branch",
179
+ "delete_collection",
180
+ "delete_collection_item",
181
+ "delete_file",
182
+ "delete_folder",
183
+ "delete_inference_endpoint",
184
+ "delete_repo",
185
+ "delete_space_secret",
186
+ "delete_space_storage",
187
+ "delete_space_variable",
188
+ "delete_tag",
189
+ "delete_webhook",
190
+ "disable_webhook",
191
+ "duplicate_space",
192
+ "edit_discussion_comment",
193
+ "enable_webhook",
194
+ "file_exists",
195
+ "get_collection",
196
+ "get_dataset_tags",
197
+ "get_discussion_details",
198
+ "get_full_repo_name",
199
+ "get_inference_endpoint",
200
+ "get_model_tags",
201
+ "get_paths_info",
202
+ "get_repo_discussions",
203
+ "get_safetensors_metadata",
204
+ "get_space_runtime",
205
+ "get_space_variables",
206
+ "get_token_permission",
207
+ "get_user_overview",
208
+ "get_webhook",
209
+ "grant_access",
210
+ "like",
211
+ "list_accepted_access_requests",
212
+ "list_collections",
213
+ "list_datasets",
214
+ "list_inference_endpoints",
215
+ "list_liked_repos",
216
+ "list_metrics",
217
+ "list_models",
218
+ "list_organization_members",
219
+ "list_papers",
220
+ "list_pending_access_requests",
221
+ "list_rejected_access_requests",
222
+ "list_repo_commits",
223
+ "list_repo_files",
224
+ "list_repo_likers",
225
+ "list_repo_refs",
226
+ "list_repo_tree",
227
+ "list_spaces",
228
+ "list_user_followers",
229
+ "list_user_following",
230
+ "list_webhooks",
231
+ "merge_pull_request",
232
+ "model_info",
233
+ "move_repo",
234
+ "paper_info",
235
+ "parse_safetensors_file_metadata",
236
+ "pause_inference_endpoint",
237
+ "pause_space",
238
+ "preupload_lfs_files",
239
+ "reject_access_request",
240
+ "rename_discussion",
241
+ "repo_exists",
242
+ "repo_info",
243
+ "repo_type_and_id_from_hf_id",
244
+ "request_space_hardware",
245
+ "request_space_storage",
246
+ "restart_space",
247
+ "resume_inference_endpoint",
248
+ "revision_exists",
249
+ "run_as_future",
250
+ "scale_to_zero_inference_endpoint",
251
+ "set_space_sleep_time",
252
+ "space_info",
253
+ "super_squash_history",
254
+ "unlike",
255
+ "update_collection_item",
256
+ "update_collection_metadata",
257
+ "update_inference_endpoint",
258
+ "update_repo_settings",
259
+ "update_repo_visibility",
260
+ "update_webhook",
261
+ "upload_file",
262
+ "upload_folder",
263
+ "upload_large_folder",
264
+ "whoami",
265
+ ],
266
+ "hf_file_system": [
267
+ "HfFileSystem",
268
+ "HfFileSystemFile",
269
+ "HfFileSystemResolvedPath",
270
+ "HfFileSystemStreamFile",
271
+ ],
272
+ "hub_mixin": [
273
+ "ModelHubMixin",
274
+ "PyTorchModelHubMixin",
275
+ ],
276
+ "inference._client": [
277
+ "InferenceClient",
278
+ "InferenceTimeoutError",
279
+ ],
280
+ "inference._generated._async_client": [
281
+ "AsyncInferenceClient",
282
+ ],
283
+ "inference._generated.types": [
284
+ "AudioClassificationInput",
285
+ "AudioClassificationOutputElement",
286
+ "AudioClassificationOutputTransform",
287
+ "AudioClassificationParameters",
288
+ "AudioToAudioInput",
289
+ "AudioToAudioOutputElement",
290
+ "AutomaticSpeechRecognitionEarlyStoppingEnum",
291
+ "AutomaticSpeechRecognitionGenerationParameters",
292
+ "AutomaticSpeechRecognitionInput",
293
+ "AutomaticSpeechRecognitionOutput",
294
+ "AutomaticSpeechRecognitionOutputChunk",
295
+ "AutomaticSpeechRecognitionParameters",
296
+ "ChatCompletionInput",
297
+ "ChatCompletionInputFunctionDefinition",
298
+ "ChatCompletionInputFunctionName",
299
+ "ChatCompletionInputGrammarType",
300
+ "ChatCompletionInputMessage",
301
+ "ChatCompletionInputMessageChunk",
302
+ "ChatCompletionInputStreamOptions",
303
+ "ChatCompletionInputToolType",
304
+ "ChatCompletionInputURL",
305
+ "ChatCompletionOutput",
306
+ "ChatCompletionOutputComplete",
307
+ "ChatCompletionOutputFunctionDefinition",
308
+ "ChatCompletionOutputLogprob",
309
+ "ChatCompletionOutputLogprobs",
310
+ "ChatCompletionOutputMessage",
311
+ "ChatCompletionOutputToolCall",
312
+ "ChatCompletionOutputTopLogprob",
313
+ "ChatCompletionOutputUsage",
314
+ "ChatCompletionStreamOutput",
315
+ "ChatCompletionStreamOutputChoice",
316
+ "ChatCompletionStreamOutputDelta",
317
+ "ChatCompletionStreamOutputDeltaToolCall",
318
+ "ChatCompletionStreamOutputFunction",
319
+ "ChatCompletionStreamOutputLogprob",
320
+ "ChatCompletionStreamOutputLogprobs",
321
+ "ChatCompletionStreamOutputTopLogprob",
322
+ "ChatCompletionStreamOutputUsage",
323
+ "DepthEstimationInput",
324
+ "DepthEstimationOutput",
325
+ "DocumentQuestionAnsweringInput",
326
+ "DocumentQuestionAnsweringInputData",
327
+ "DocumentQuestionAnsweringOutputElement",
328
+ "DocumentQuestionAnsweringParameters",
329
+ "FeatureExtractionInput",
330
+ "FillMaskInput",
331
+ "FillMaskOutputElement",
332
+ "FillMaskParameters",
333
+ "ImageClassificationInput",
334
+ "ImageClassificationOutputElement",
335
+ "ImageClassificationOutputTransform",
336
+ "ImageClassificationParameters",
337
+ "ImageSegmentationInput",
338
+ "ImageSegmentationOutputElement",
339
+ "ImageSegmentationParameters",
340
+ "ImageToImageInput",
341
+ "ImageToImageOutput",
342
+ "ImageToImageParameters",
343
+ "ImageToImageTargetSize",
344
+ "ImageToTextEarlyStoppingEnum",
345
+ "ImageToTextGenerationParameters",
346
+ "ImageToTextInput",
347
+ "ImageToTextOutput",
348
+ "ImageToTextParameters",
349
+ "ObjectDetectionBoundingBox",
350
+ "ObjectDetectionInput",
351
+ "ObjectDetectionOutputElement",
352
+ "ObjectDetectionParameters",
353
+ "QuestionAnsweringInput",
354
+ "QuestionAnsweringInputData",
355
+ "QuestionAnsweringOutputElement",
356
+ "QuestionAnsweringParameters",
357
+ "SentenceSimilarityInput",
358
+ "SentenceSimilarityInputData",
359
+ "SummarizationInput",
360
+ "SummarizationOutput",
361
+ "SummarizationParameters",
362
+ "TableQuestionAnsweringInput",
363
+ "TableQuestionAnsweringInputData",
364
+ "TableQuestionAnsweringOutputElement",
365
+ "Text2TextGenerationInput",
366
+ "Text2TextGenerationOutput",
367
+ "Text2TextGenerationParameters",
368
+ "TextClassificationInput",
369
+ "TextClassificationOutputElement",
370
+ "TextClassificationOutputTransform",
371
+ "TextClassificationParameters",
372
+ "TextGenerationInput",
373
+ "TextGenerationInputGenerateParameters",
374
+ "TextGenerationInputGrammarType",
375
+ "TextGenerationOutput",
376
+ "TextGenerationOutputBestOfSequence",
377
+ "TextGenerationOutputDetails",
378
+ "TextGenerationOutputPrefillToken",
379
+ "TextGenerationOutputToken",
380
+ "TextGenerationStreamOutput",
381
+ "TextGenerationStreamOutputStreamDetails",
382
+ "TextGenerationStreamOutputToken",
383
+ "TextToAudioEarlyStoppingEnum",
384
+ "TextToAudioGenerationParameters",
385
+ "TextToAudioInput",
386
+ "TextToAudioOutput",
387
+ "TextToAudioParameters",
388
+ "TextToImageInput",
389
+ "TextToImageOutput",
390
+ "TextToImageParameters",
391
+ "TextToImageTargetSize",
392
+ "TextToSpeechEarlyStoppingEnum",
393
+ "TextToSpeechGenerationParameters",
394
+ "TextToSpeechInput",
395
+ "TextToSpeechOutput",
396
+ "TextToSpeechParameters",
397
+ "TokenClassificationInput",
398
+ "TokenClassificationOutputElement",
399
+ "TokenClassificationParameters",
400
+ "ToolElement",
401
+ "TranslationInput",
402
+ "TranslationOutput",
403
+ "TranslationParameters",
404
+ "VideoClassificationInput",
405
+ "VideoClassificationOutputElement",
406
+ "VideoClassificationOutputTransform",
407
+ "VideoClassificationParameters",
408
+ "VisualQuestionAnsweringInput",
409
+ "VisualQuestionAnsweringInputData",
410
+ "VisualQuestionAnsweringOutputElement",
411
+ "VisualQuestionAnsweringParameters",
412
+ "ZeroShotClassificationInput",
413
+ "ZeroShotClassificationInputData",
414
+ "ZeroShotClassificationOutputElement",
415
+ "ZeroShotClassificationParameters",
416
+ "ZeroShotImageClassificationInput",
417
+ "ZeroShotImageClassificationInputData",
418
+ "ZeroShotImageClassificationOutputElement",
419
+ "ZeroShotImageClassificationParameters",
420
+ "ZeroShotObjectDetectionBoundingBox",
421
+ "ZeroShotObjectDetectionInput",
422
+ "ZeroShotObjectDetectionInputData",
423
+ "ZeroShotObjectDetectionOutputElement",
424
+ ],
425
+ "inference_api": [
426
+ "InferenceApi",
427
+ ],
428
+ "keras_mixin": [
429
+ "KerasModelHubMixin",
430
+ "from_pretrained_keras",
431
+ "push_to_hub_keras",
432
+ "save_pretrained_keras",
433
+ ],
434
+ "repocard": [
435
+ "DatasetCard",
436
+ "ModelCard",
437
+ "RepoCard",
438
+ "SpaceCard",
439
+ "metadata_eval_result",
440
+ "metadata_load",
441
+ "metadata_save",
442
+ "metadata_update",
443
+ ],
444
+ "repocard_data": [
445
+ "CardData",
446
+ "DatasetCardData",
447
+ "EvalResult",
448
+ "ModelCardData",
449
+ "SpaceCardData",
450
+ ],
451
+ "repository": [
452
+ "Repository",
453
+ ],
454
+ "serialization": [
455
+ "StateDictSplit",
456
+ "get_tf_storage_size",
457
+ "get_torch_storage_id",
458
+ "get_torch_storage_size",
459
+ "save_torch_model",
460
+ "save_torch_state_dict",
461
+ "split_state_dict_into_shards_factory",
462
+ "split_tf_state_dict_into_shards",
463
+ "split_torch_state_dict_into_shards",
464
+ ],
465
+ "utils": [
466
+ "CacheNotFound",
467
+ "CachedFileInfo",
468
+ "CachedRepoInfo",
469
+ "CachedRevisionInfo",
470
+ "CorruptedCacheException",
471
+ "DeleteCacheStrategy",
472
+ "HFCacheInfo",
473
+ "HfFolder",
474
+ "cached_assets_path",
475
+ "configure_http_backend",
476
+ "dump_environment_info",
477
+ "get_session",
478
+ "get_token",
479
+ "logging",
480
+ "scan_cache_dir",
481
+ ],
482
+ }
483
+
484
+
485
+ def _attach(package_name, submodules=None, submod_attrs=None):
486
+ """Attach lazily loaded submodules, functions, or other attributes.
487
+
488
+ Typically, modules import submodules and attributes as follows:
489
+
490
+ ```py
491
+ import mysubmodule
492
+ import anothersubmodule
493
+
494
+ from .foo import someattr
495
+ ```
496
+
497
+ The idea is to replace a package's `__getattr__`, `__dir__`, and
498
+ `__all__`, such that all imports work exactly the way they would
499
+ with normal imports, except that the import occurs upon first use.
500
+
501
+ The typical way to call this function, replacing the above imports, is:
502
+
503
+ ```python
504
+ __getattr__, __dir__, __all__ = lazy.attach(
505
+ __name__,
506
+ ['mysubmodule', 'anothersubmodule'],
507
+ {'foo': ['someattr']}
508
+ )
509
+ ```
510
+ This functionality requires Python 3.7 or higher.
511
+
512
+ Args:
513
+ package_name (`str`):
514
+ Typically use `__name__`.
515
+ submodules (`set`):
516
+ List of submodules to attach.
517
+ submod_attrs (`dict`):
518
+ Dictionary of submodule -> list of attributes / functions.
519
+ These attributes are imported as they are used.
520
+
521
+ Returns:
522
+ __getattr__, __dir__, __all__
523
+
524
+ """
525
+ if submod_attrs is None:
526
+ submod_attrs = {}
527
+
528
+ if submodules is None:
529
+ submodules = set()
530
+ else:
531
+ submodules = set(submodules)
532
+
533
+ attr_to_modules = {attr: mod for mod, attrs in submod_attrs.items() for attr in attrs}
534
+
535
+ __all__ = list(submodules | attr_to_modules.keys())
536
+
537
+ def __getattr__(name):
538
+ if name in submodules:
539
+ try:
540
+ return importlib.import_module(f"{package_name}.{name}")
541
+ except Exception as e:
542
+ print(f"Error importing {package_name}.{name}: {e}")
543
+ raise
544
+ elif name in attr_to_modules:
545
+ submod_path = f"{package_name}.{attr_to_modules[name]}"
546
+ try:
547
+ submod = importlib.import_module(submod_path)
548
+ except Exception as e:
549
+ print(f"Error importing {submod_path}: {e}")
550
+ raise
551
+ attr = getattr(submod, name)
552
+
553
+ # If the attribute lives in a file (module) with the same
554
+ # name as the attribute, ensure that the attribute and *not*
555
+ # the module is accessible on the package.
556
+ if name == attr_to_modules[name]:
557
+ pkg = sys.modules[package_name]
558
+ pkg.__dict__[name] = attr
559
+
560
+ return attr
561
+ else:
562
+ raise AttributeError(f"No {package_name} attribute {name}")
563
+
564
+ def __dir__():
565
+ return __all__
566
+
567
+ return __getattr__, __dir__, list(__all__)
568
+
569
+
570
+ __getattr__, __dir__, __all__ = _attach(__name__, submodules=[], submod_attrs=_SUBMOD_ATTRS)
571
+
572
+ if os.environ.get("EAGER_IMPORT", ""):
573
+ for attr in __all__:
574
+ __getattr__(attr)
575
+
576
+ # WARNING: any content below this statement is generated automatically. Any manual edit
577
+ # will be lost when re-generating this file !
578
+ #
579
+ # To update the static imports, please run the following command and commit the changes.
580
+ # ```
581
+ # # Use script
582
+ # python utils/check_static_imports.py --update-file
583
+ #
584
+ # # Or run style on codebase
585
+ # make style
586
+ # ```
587
+ if TYPE_CHECKING: # pragma: no cover
588
+ from ._commit_scheduler import CommitScheduler # noqa: F401
589
+ from ._inference_endpoints import (
590
+ InferenceEndpoint, # noqa: F401
591
+ InferenceEndpointError, # noqa: F401
592
+ InferenceEndpointStatus, # noqa: F401
593
+ InferenceEndpointTimeoutError, # noqa: F401
594
+ InferenceEndpointType, # noqa: F401
595
+ )
596
+ from ._login import (
597
+ auth_list, # noqa: F401
598
+ auth_switch, # noqa: F401
599
+ interpreter_login, # noqa: F401
600
+ login, # noqa: F401
601
+ logout, # noqa: F401
602
+ notebook_login, # noqa: F401
603
+ )
604
+ from ._multi_commits import (
605
+ MultiCommitException, # noqa: F401
606
+ plan_multi_commits, # noqa: F401
607
+ )
608
+ from ._snapshot_download import snapshot_download # noqa: F401
609
+ from ._space_api import (
610
+ SpaceHardware, # noqa: F401
611
+ SpaceRuntime, # noqa: F401
612
+ SpaceStage, # noqa: F401
613
+ SpaceStorage, # noqa: F401
614
+ SpaceVariable, # noqa: F401
615
+ )
616
+ from ._tensorboard_logger import HFSummaryWriter # noqa: F401
617
+ from ._webhooks_payload import (
618
+ WebhookPayload, # noqa: F401
619
+ WebhookPayloadComment, # noqa: F401
620
+ WebhookPayloadDiscussion, # noqa: F401
621
+ WebhookPayloadDiscussionChanges, # noqa: F401
622
+ WebhookPayloadEvent, # noqa: F401
623
+ WebhookPayloadMovedTo, # noqa: F401
624
+ WebhookPayloadRepo, # noqa: F401
625
+ WebhookPayloadUrl, # noqa: F401
626
+ WebhookPayloadWebhook, # noqa: F401
627
+ )
628
+ from ._webhooks_server import (
629
+ WebhooksServer, # noqa: F401
630
+ webhook_endpoint, # noqa: F401
631
+ )
632
+ from .community import (
633
+ Discussion, # noqa: F401
634
+ DiscussionComment, # noqa: F401
635
+ DiscussionCommit, # noqa: F401
636
+ DiscussionEvent, # noqa: F401
637
+ DiscussionStatusChange, # noqa: F401
638
+ DiscussionTitleChange, # noqa: F401
639
+ DiscussionWithDetails, # noqa: F401
640
+ )
641
+ from .constants import (
642
+ CONFIG_NAME, # noqa: F401
643
+ FLAX_WEIGHTS_NAME, # noqa: F401
644
+ HUGGINGFACE_CO_URL_HOME, # noqa: F401
645
+ HUGGINGFACE_CO_URL_TEMPLATE, # noqa: F401
646
+ PYTORCH_WEIGHTS_NAME, # noqa: F401
647
+ REPO_TYPE_DATASET, # noqa: F401
648
+ REPO_TYPE_MODEL, # noqa: F401
649
+ REPO_TYPE_SPACE, # noqa: F401
650
+ TF2_WEIGHTS_NAME, # noqa: F401
651
+ TF_WEIGHTS_NAME, # noqa: F401
652
+ )
653
+ from .fastai_utils import (
654
+ _save_pretrained_fastai, # noqa: F401
655
+ from_pretrained_fastai, # noqa: F401
656
+ push_to_hub_fastai, # noqa: F401
657
+ )
658
+ from .file_download import (
659
+ _CACHED_NO_EXIST, # noqa: F401
660
+ HfFileMetadata, # noqa: F401
661
+ get_hf_file_metadata, # noqa: F401
662
+ hf_hub_download, # noqa: F401
663
+ hf_hub_url, # noqa: F401
664
+ try_to_load_from_cache, # noqa: F401
665
+ )
666
+ from .hf_api import (
667
+ Collection, # noqa: F401
668
+ CollectionItem, # noqa: F401
669
+ CommitInfo, # noqa: F401
670
+ CommitOperation, # noqa: F401
671
+ CommitOperationAdd, # noqa: F401
672
+ CommitOperationCopy, # noqa: F401
673
+ CommitOperationDelete, # noqa: F401
674
+ DatasetInfo, # noqa: F401
675
+ GitCommitInfo, # noqa: F401
676
+ GitRefInfo, # noqa: F401
677
+ GitRefs, # noqa: F401
678
+ HfApi, # noqa: F401
679
+ ModelInfo, # noqa: F401
680
+ RepoUrl, # noqa: F401
681
+ SpaceInfo, # noqa: F401
682
+ User, # noqa: F401
683
+ UserLikes, # noqa: F401
684
+ WebhookInfo, # noqa: F401
685
+ WebhookWatchedItem, # noqa: F401
686
+ accept_access_request, # noqa: F401
687
+ add_collection_item, # noqa: F401
688
+ add_space_secret, # noqa: F401
689
+ add_space_variable, # noqa: F401
690
+ auth_check, # noqa: F401
691
+ cancel_access_request, # noqa: F401
692
+ change_discussion_status, # noqa: F401
693
+ comment_discussion, # noqa: F401
694
+ create_branch, # noqa: F401
695
+ create_collection, # noqa: F401
696
+ create_commit, # noqa: F401
697
+ create_commits_on_pr, # noqa: F401
698
+ create_discussion, # noqa: F401
699
+ create_inference_endpoint, # noqa: F401
700
+ create_pull_request, # noqa: F401
701
+ create_repo, # noqa: F401
702
+ create_tag, # noqa: F401
703
+ create_webhook, # noqa: F401
704
+ dataset_info, # noqa: F401
705
+ delete_branch, # noqa: F401
706
+ delete_collection, # noqa: F401
707
+ delete_collection_item, # noqa: F401
708
+ delete_file, # noqa: F401
709
+ delete_folder, # noqa: F401
710
+ delete_inference_endpoint, # noqa: F401
711
+ delete_repo, # noqa: F401
712
+ delete_space_secret, # noqa: F401
713
+ delete_space_storage, # noqa: F401
714
+ delete_space_variable, # noqa: F401
715
+ delete_tag, # noqa: F401
716
+ delete_webhook, # noqa: F401
717
+ disable_webhook, # noqa: F401
718
+ duplicate_space, # noqa: F401
719
+ edit_discussion_comment, # noqa: F401
720
+ enable_webhook, # noqa: F401
721
+ file_exists, # noqa: F401
722
+ get_collection, # noqa: F401
723
+ get_dataset_tags, # noqa: F401
724
+ get_discussion_details, # noqa: F401
725
+ get_full_repo_name, # noqa: F401
726
+ get_inference_endpoint, # noqa: F401
727
+ get_model_tags, # noqa: F401
728
+ get_paths_info, # noqa: F401
729
+ get_repo_discussions, # noqa: F401
730
+ get_safetensors_metadata, # noqa: F401
731
+ get_space_runtime, # noqa: F401
732
+ get_space_variables, # noqa: F401
733
+ get_token_permission, # noqa: F401
734
+ get_user_overview, # noqa: F401
735
+ get_webhook, # noqa: F401
736
+ grant_access, # noqa: F401
737
+ like, # noqa: F401
738
+ list_accepted_access_requests, # noqa: F401
739
+ list_collections, # noqa: F401
740
+ list_datasets, # noqa: F401
741
+ list_inference_endpoints, # noqa: F401
742
+ list_liked_repos, # noqa: F401
743
+ list_metrics, # noqa: F401
744
+ list_models, # noqa: F401
745
+ list_organization_members, # noqa: F401
746
+ list_papers, # noqa: F401
747
+ list_pending_access_requests, # noqa: F401
748
+ list_rejected_access_requests, # noqa: F401
749
+ list_repo_commits, # noqa: F401
750
+ list_repo_files, # noqa: F401
751
+ list_repo_likers, # noqa: F401
752
+ list_repo_refs, # noqa: F401
753
+ list_repo_tree, # noqa: F401
754
+ list_spaces, # noqa: F401
755
+ list_user_followers, # noqa: F401
756
+ list_user_following, # noqa: F401
757
+ list_webhooks, # noqa: F401
758
+ merge_pull_request, # noqa: F401
759
+ model_info, # noqa: F401
760
+ move_repo, # noqa: F401
761
+ paper_info, # noqa: F401
762
+ parse_safetensors_file_metadata, # noqa: F401
763
+ pause_inference_endpoint, # noqa: F401
764
+ pause_space, # noqa: F401
765
+ preupload_lfs_files, # noqa: F401
766
+ reject_access_request, # noqa: F401
767
+ rename_discussion, # noqa: F401
768
+ repo_exists, # noqa: F401
769
+ repo_info, # noqa: F401
770
+ repo_type_and_id_from_hf_id, # noqa: F401
771
+ request_space_hardware, # noqa: F401
772
+ request_space_storage, # noqa: F401
773
+ restart_space, # noqa: F401
774
+ resume_inference_endpoint, # noqa: F401
775
+ revision_exists, # noqa: F401
776
+ run_as_future, # noqa: F401
777
+ scale_to_zero_inference_endpoint, # noqa: F401
778
+ set_space_sleep_time, # noqa: F401
779
+ space_info, # noqa: F401
780
+ super_squash_history, # noqa: F401
781
+ unlike, # noqa: F401
782
+ update_collection_item, # noqa: F401
783
+ update_collection_metadata, # noqa: F401
784
+ update_inference_endpoint, # noqa: F401
785
+ update_repo_settings, # noqa: F401
786
+ update_repo_visibility, # noqa: F401
787
+ update_webhook, # noqa: F401
788
+ upload_file, # noqa: F401
789
+ upload_folder, # noqa: F401
790
+ upload_large_folder, # noqa: F401
791
+ whoami, # noqa: F401
792
+ )
793
+ from .hf_file_system import (
794
+ HfFileSystem, # noqa: F401
795
+ HfFileSystemFile, # noqa: F401
796
+ HfFileSystemResolvedPath, # noqa: F401
797
+ HfFileSystemStreamFile, # noqa: F401
798
+ )
799
+ from .hub_mixin import (
800
+ ModelHubMixin, # noqa: F401
801
+ PyTorchModelHubMixin, # noqa: F401
802
+ )
803
+ from .inference._client import (
804
+ InferenceClient, # noqa: F401
805
+ InferenceTimeoutError, # noqa: F401
806
+ )
807
+ from .inference._generated._async_client import AsyncInferenceClient # noqa: F401
808
+ from .inference._generated.types import (
809
+ AudioClassificationInput, # noqa: F401
810
+ AudioClassificationOutputElement, # noqa: F401
811
+ AudioClassificationOutputTransform, # noqa: F401
812
+ AudioClassificationParameters, # noqa: F401
813
+ AudioToAudioInput, # noqa: F401
814
+ AudioToAudioOutputElement, # noqa: F401
815
+ AutomaticSpeechRecognitionEarlyStoppingEnum, # noqa: F401
816
+ AutomaticSpeechRecognitionGenerationParameters, # noqa: F401
817
+ AutomaticSpeechRecognitionInput, # noqa: F401
818
+ AutomaticSpeechRecognitionOutput, # noqa: F401
819
+ AutomaticSpeechRecognitionOutputChunk, # noqa: F401
820
+ AutomaticSpeechRecognitionParameters, # noqa: F401
821
+ ChatCompletionInput, # noqa: F401
822
+ ChatCompletionInputFunctionDefinition, # noqa: F401
823
+ ChatCompletionInputFunctionName, # noqa: F401
824
+ ChatCompletionInputGrammarType, # noqa: F401
825
+ ChatCompletionInputMessage, # noqa: F401
826
+ ChatCompletionInputMessageChunk, # noqa: F401
827
+ ChatCompletionInputStreamOptions, # noqa: F401
828
+ ChatCompletionInputToolType, # noqa: F401
829
+ ChatCompletionInputURL, # noqa: F401
830
+ ChatCompletionOutput, # noqa: F401
831
+ ChatCompletionOutputComplete, # noqa: F401
832
+ ChatCompletionOutputFunctionDefinition, # noqa: F401
833
+ ChatCompletionOutputLogprob, # noqa: F401
834
+ ChatCompletionOutputLogprobs, # noqa: F401
835
+ ChatCompletionOutputMessage, # noqa: F401
836
+ ChatCompletionOutputToolCall, # noqa: F401
837
+ ChatCompletionOutputTopLogprob, # noqa: F401
838
+ ChatCompletionOutputUsage, # noqa: F401
839
+ ChatCompletionStreamOutput, # noqa: F401
840
+ ChatCompletionStreamOutputChoice, # noqa: F401
841
+ ChatCompletionStreamOutputDelta, # noqa: F401
842
+ ChatCompletionStreamOutputDeltaToolCall, # noqa: F401
843
+ ChatCompletionStreamOutputFunction, # noqa: F401
844
+ ChatCompletionStreamOutputLogprob, # noqa: F401
845
+ ChatCompletionStreamOutputLogprobs, # noqa: F401
846
+ ChatCompletionStreamOutputTopLogprob, # noqa: F401
847
+ ChatCompletionStreamOutputUsage, # noqa: F401
848
+ DepthEstimationInput, # noqa: F401
849
+ DepthEstimationOutput, # noqa: F401
850
+ DocumentQuestionAnsweringInput, # noqa: F401
851
+ DocumentQuestionAnsweringInputData, # noqa: F401
852
+ DocumentQuestionAnsweringOutputElement, # noqa: F401
853
+ DocumentQuestionAnsweringParameters, # noqa: F401
854
+ FeatureExtractionInput, # noqa: F401
855
+ FillMaskInput, # noqa: F401
856
+ FillMaskOutputElement, # noqa: F401
857
+ FillMaskParameters, # noqa: F401
858
+ ImageClassificationInput, # noqa: F401
859
+ ImageClassificationOutputElement, # noqa: F401
860
+ ImageClassificationOutputTransform, # noqa: F401
861
+ ImageClassificationParameters, # noqa: F401
862
+ ImageSegmentationInput, # noqa: F401
863
+ ImageSegmentationOutputElement, # noqa: F401
864
+ ImageSegmentationParameters, # noqa: F401
865
+ ImageToImageInput, # noqa: F401
866
+ ImageToImageOutput, # noqa: F401
867
+ ImageToImageParameters, # noqa: F401
868
+ ImageToImageTargetSize, # noqa: F401
869
+ ImageToTextEarlyStoppingEnum, # noqa: F401
870
+ ImageToTextGenerationParameters, # noqa: F401
871
+ ImageToTextInput, # noqa: F401
872
+ ImageToTextOutput, # noqa: F401
873
+ ImageToTextParameters, # noqa: F401
874
+ ObjectDetectionBoundingBox, # noqa: F401
875
+ ObjectDetectionInput, # noqa: F401
876
+ ObjectDetectionOutputElement, # noqa: F401
877
+ ObjectDetectionParameters, # noqa: F401
878
+ QuestionAnsweringInput, # noqa: F401
879
+ QuestionAnsweringInputData, # noqa: F401
880
+ QuestionAnsweringOutputElement, # noqa: F401
881
+ QuestionAnsweringParameters, # noqa: F401
882
+ SentenceSimilarityInput, # noqa: F401
883
+ SentenceSimilarityInputData, # noqa: F401
884
+ SummarizationInput, # noqa: F401
885
+ SummarizationOutput, # noqa: F401
886
+ SummarizationParameters, # noqa: F401
887
+ TableQuestionAnsweringInput, # noqa: F401
888
+ TableQuestionAnsweringInputData, # noqa: F401
889
+ TableQuestionAnsweringOutputElement, # noqa: F401
890
+ Text2TextGenerationInput, # noqa: F401
891
+ Text2TextGenerationOutput, # noqa: F401
892
+ Text2TextGenerationParameters, # noqa: F401
893
+ TextClassificationInput, # noqa: F401
894
+ TextClassificationOutputElement, # noqa: F401
895
+ TextClassificationOutputTransform, # noqa: F401
896
+ TextClassificationParameters, # noqa: F401
897
+ TextGenerationInput, # noqa: F401
898
+ TextGenerationInputGenerateParameters, # noqa: F401
899
+ TextGenerationInputGrammarType, # noqa: F401
900
+ TextGenerationOutput, # noqa: F401
901
+ TextGenerationOutputBestOfSequence, # noqa: F401
902
+ TextGenerationOutputDetails, # noqa: F401
903
+ TextGenerationOutputPrefillToken, # noqa: F401
904
+ TextGenerationOutputToken, # noqa: F401
905
+ TextGenerationStreamOutput, # noqa: F401
906
+ TextGenerationStreamOutputStreamDetails, # noqa: F401
907
+ TextGenerationStreamOutputToken, # noqa: F401
908
+ TextToAudioEarlyStoppingEnum, # noqa: F401
909
+ TextToAudioGenerationParameters, # noqa: F401
910
+ TextToAudioInput, # noqa: F401
911
+ TextToAudioOutput, # noqa: F401
912
+ TextToAudioParameters, # noqa: F401
913
+ TextToImageInput, # noqa: F401
914
+ TextToImageOutput, # noqa: F401
915
+ TextToImageParameters, # noqa: F401
916
+ TextToImageTargetSize, # noqa: F401
917
+ TextToSpeechEarlyStoppingEnum, # noqa: F401
918
+ TextToSpeechGenerationParameters, # noqa: F401
919
+ TextToSpeechInput, # noqa: F401
920
+ TextToSpeechOutput, # noqa: F401
921
+ TextToSpeechParameters, # noqa: F401
922
+ TokenClassificationInput, # noqa: F401
923
+ TokenClassificationOutputElement, # noqa: F401
924
+ TokenClassificationParameters, # noqa: F401
925
+ ToolElement, # noqa: F401
926
+ TranslationInput, # noqa: F401
927
+ TranslationOutput, # noqa: F401
928
+ TranslationParameters, # noqa: F401
929
+ VideoClassificationInput, # noqa: F401
930
+ VideoClassificationOutputElement, # noqa: F401
931
+ VideoClassificationOutputTransform, # noqa: F401
932
+ VideoClassificationParameters, # noqa: F401
933
+ VisualQuestionAnsweringInput, # noqa: F401
934
+ VisualQuestionAnsweringInputData, # noqa: F401
935
+ VisualQuestionAnsweringOutputElement, # noqa: F401
936
+ VisualQuestionAnsweringParameters, # noqa: F401
937
+ ZeroShotClassificationInput, # noqa: F401
938
+ ZeroShotClassificationInputData, # noqa: F401
939
+ ZeroShotClassificationOutputElement, # noqa: F401
940
+ ZeroShotClassificationParameters, # noqa: F401
941
+ ZeroShotImageClassificationInput, # noqa: F401
942
+ ZeroShotImageClassificationInputData, # noqa: F401
943
+ ZeroShotImageClassificationOutputElement, # noqa: F401
944
+ ZeroShotImageClassificationParameters, # noqa: F401
945
+ ZeroShotObjectDetectionBoundingBox, # noqa: F401
946
+ ZeroShotObjectDetectionInput, # noqa: F401
947
+ ZeroShotObjectDetectionInputData, # noqa: F401
948
+ ZeroShotObjectDetectionOutputElement, # noqa: F401
949
+ )
950
+ from .inference_api import InferenceApi # noqa: F401
951
+ from .keras_mixin import (
952
+ KerasModelHubMixin, # noqa: F401
953
+ from_pretrained_keras, # noqa: F401
954
+ push_to_hub_keras, # noqa: F401
955
+ save_pretrained_keras, # noqa: F401
956
+ )
957
+ from .repocard import (
958
+ DatasetCard, # noqa: F401
959
+ ModelCard, # noqa: F401
960
+ RepoCard, # noqa: F401
961
+ SpaceCard, # noqa: F401
962
+ metadata_eval_result, # noqa: F401
963
+ metadata_load, # noqa: F401
964
+ metadata_save, # noqa: F401
965
+ metadata_update, # noqa: F401
966
+ )
967
+ from .repocard_data import (
968
+ CardData, # noqa: F401
969
+ DatasetCardData, # noqa: F401
970
+ EvalResult, # noqa: F401
971
+ ModelCardData, # noqa: F401
972
+ SpaceCardData, # noqa: F401
973
+ )
974
+ from .repository import Repository # noqa: F401
975
+ from .serialization import (
976
+ StateDictSplit, # noqa: F401
977
+ get_tf_storage_size, # noqa: F401
978
+ get_torch_storage_id, # noqa: F401
979
+ get_torch_storage_size, # noqa: F401
980
+ save_torch_model, # noqa: F401
981
+ save_torch_state_dict, # noqa: F401
982
+ split_state_dict_into_shards_factory, # noqa: F401
983
+ split_tf_state_dict_into_shards, # noqa: F401
984
+ split_torch_state_dict_into_shards, # noqa: F401
985
+ )
986
+ from .utils import (
987
+ CachedFileInfo, # noqa: F401
988
+ CachedRepoInfo, # noqa: F401
989
+ CachedRevisionInfo, # noqa: F401
990
+ CacheNotFound, # noqa: F401
991
+ CorruptedCacheException, # noqa: F401
992
+ DeleteCacheStrategy, # noqa: F401
993
+ HFCacheInfo, # noqa: F401
994
+ HfFolder, # noqa: F401
995
+ cached_assets_path, # noqa: F401
996
+ configure_http_backend, # noqa: F401
997
+ dump_environment_info, # noqa: F401
998
+ get_session, # noqa: F401
999
+ get_token, # noqa: F401
1000
+ logging, # noqa: F401
1001
+ scan_cache_dir, # noqa: F401
1002
+ )