| [build-system] | |
| requires = ["hatchling"] | |
| build-backend = "hatchling.build" | |
| [project] | |
| name = "gte-qwen2-7b-instruct-m2v" | |
| version = "0.1.0" | |
| description = "Model2Vec distillation pipeline for gte-Qwen2-7B-instruct" | |
| readme = "README.md" | |
| requires-python = ">=3.12" | |
| dependencies = [ | |
| "accelerate>=1.7.0", | |
| "beam-client>=0.2.155", | |
| "boto3>=1.38.23", | |
| "datasets>=3.6.0", | |
| "dotenv>=0.9.9", | |
| "editables>=0.5", | |
| "einops>=0.8.1", | |
| "flash-attn>=2.7.4.post1", | |
| "hatchling>=1.27.0", | |
| "iso639>=0.1.4", | |
| "jinja2>=3.0.0", | |
| "joblib>=1.0.0", | |
| "kaleido==1.0.0rc13", | |
| "lightning>=2.5.1.post0", | |
| "matplotlib>=3.10.3", | |
| "more-itertools>=10.5.0", | |
| "mteb>=1.14.15", | |
| "numpy>=1.26.4", | |
| "plotly>=6.1.1", | |
| "psutil>=7.0.0", | |
| "pydantic>=2.11.5", | |
| "requests>=2.32.3", | |
| "rich>=10.0.0", | |
| "safetensors>=0.3.0", | |
| "scikit-learn>=1.6.1", | |
| "seaborn>=0.13.2", | |
| "sentence-transformers>=4.1.0", | |
| "setuptools>=80.8.0", | |
| "skops>=0.11.0", | |
| "smart-open[s3]>=7.1.0", | |
| "statsmodels>=0.14.4", | |
| "tokenizers>=0.20", | |
| "torch>=2.7.0", | |
| "transformers<=4.52.1", | |
| "tqdm>=4.65.0", | |
| "typer>=0.16.0", | |
| ] | |
| [project.scripts] | |
| distiller = "distiller.__main__:app" | |
| [dependency-groups] | |
| dev = [ | |
| "mypy>=1.15.0", | |
| "ruff>=0.11.6", | |
| ] | |
| [tool.hatch.build.targets.wheel] | |
| packages = ["src/distiller"] | |
| [tool.mypy] | |
| exclude = [ | |
| ".git", | |
| ".ruff_cache", | |
| ".venv", | |
| "venv", | |
| "__pycache__", | |
| "build", | |
| "dist", | |
| "vendor", | |
| ] | |
| follow_untyped_imports = true | |
| [tool.ruff] | |
| line-length = 120 | |
| target-version = "py312" | |
| # Exclude files/directories | |
| exclude = [ | |
| ".git", | |
| ".ruff_cache", | |
| ".venv", | |
| "venv", | |
| "__pycache__", | |
| "build", | |
| "dist", | |
| "vendor", | |
| "src/distiller/model2vec", | |
| "src/distiller/tokenlearn" | |
| ] | |
| [tool.ruff.lint] | |
| # Enable all rules by default, then selectively disable | |
| select = ["ALL"] | |
| ignore = [ | |
| # Rules that conflict with other tools/preferences | |
| "D203", # one-blank-line-before-class | |
| "D212", # multi-line-summary-first-line | |
| "FBT001", # Boolean positional arg in function definition (required for typer) | |
| "FBT002", # Boolean default value in function definition (required for typer) | |
| "C901", # function too complex | |
| "PLR0911", # too many return statements | |
| "PLR0912", # too many branches | |
| "PLR0913", # too many arguments in function definition | |
| "PLR0915", # too many statements | |
| "TRY300", # Consider moving this statement to an `else` block | |
| "COM812", # Use a constant for the message in a raise statement | |
| "TC001", # Move application import into a type-checking block | |
| "ERA001", # Found commented-out code | |
| "G004", # Logging statement uses f-string | |
| "TD003", # Missing link in to-do | |
| "TRY301", # Abstract raise to an inner function | |
| # Disable rules that conflict with tab indentation | |
| "E101", # Indentation contains mixed spaces and tabs | |
| "W191", # indentation contains tabs | |
| "D206", # indent with spaces, not tabs | |
| "PD901", # Avoid using the generic variable name `df` for DataFrames | |
| "ANN401", # Dynamically typed expressions (typing.Any) are disallowed | |
| "D103", # Missing docstring in public function | |
| "BLE001", # Do not catch blind exception: `Exception` | |
| "T201", # Use `logger.info` instead of `print` | |
| "E501", # Line too long | |
| "PLR2004", | |
| "RUF001", | |
| "D100", # Missing docstring in public module | |
| "D101", # Missing docstring in public class | |
| ] | |
| [tool.ruff.lint.mccabe] | |
| max-complexity = 10 | |
| [tool.ruff.lint.pylint] | |
| max-args = 5 | |
| max-branches = 12 | |
| max-statements = 50 | |
| [tool.ruff.lint.pydocstyle] | |
| convention = "google" | |
| [tool.ruff.format] | |
| quote-style = "double" | |
| indent-style = "tab" | |
| skip-magic-trailing-comma = false | |
| line-ending = "auto" | |