File size: 3,748 Bytes
1bc7e54 ecfceb8 1bc7e54 ecfceb8 1bc7e54 ecfceb8 1bc7e54 ecfceb8 7837959 1bc7e54 ecfceb8 7837959 ecfceb8 eb5363b 1bc7e54 ecfceb8 1bc7e54 7837959 ecfceb8 1bc7e54 ecfceb8 1bc7e54 7837959 1bc7e54 7837959 ecfceb8 7837959 1bc7e54 ecfceb8 1bc7e54 ecfceb8 1bc7e54 ecfceb8 7837959 ecfceb8 1bc7e54 7837959 ecfceb8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "gte-qwen2-7b-instruct-m2v"
version = "0.1.0"
description = "Model2Vec distillation pipeline for gte-Qwen2-7B-instruct"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"accelerate>=1.7.0",
"beam-client>=0.2.155",
"boto3>=1.38.23",
"datasets>=3.6.0",
"dotenv>=0.9.9",
"editables>=0.5",
"einops>=0.8.1",
"flash-attn>=2.7.4.post1",
"hatchling>=1.27.0",
"iso639>=0.1.4",
"jinja2>=3.0.0",
"joblib>=1.0.0",
"kaleido==1.0.0rc13",
"lightning>=2.5.1.post0",
"matplotlib>=3.10.3",
"more-itertools>=10.5.0",
"mteb>=1.14.15",
"numpy>=1.26.4",
"plotly>=6.1.1",
"psutil>=7.0.0",
"pydantic>=2.11.5",
"requests>=2.32.3",
"rich>=10.0.0",
"safetensors>=0.3.0",
"scikit-learn>=1.6.1",
"seaborn>=0.13.2",
"sentence-transformers>=4.1.0",
"setuptools>=80.8.0",
"skops>=0.11.0",
"smart-open[s3]>=7.1.0",
"statsmodels>=0.14.4",
"tokenizers>=0.20",
"torch>=2.7.0",
"transformers<=4.52.1",
"tqdm>=4.65.0",
"typer>=0.16.0",
]
[project.scripts]
distiller = "distiller.__main__:app"
[dependency-groups]
dev = [
"mypy>=1.15.0",
"ruff>=0.11.6",
]
[tool.hatch.build.targets.wheel]
packages = ["src/distiller"]
[tool.mypy]
exclude = [
".git",
".ruff_cache",
".venv",
"venv",
"__pycache__",
"build",
"dist",
"vendor",
]
follow_untyped_imports = true
[tool.ruff]
line-length = 120
target-version = "py312"
# Exclude files/directories
exclude = [
".git",
".ruff_cache",
".venv",
"venv",
"__pycache__",
"build",
"dist",
"vendor",
"src/distiller/model2vec",
"src/distiller/tokenlearn"
]
[tool.ruff.lint]
# Enable all rules by default, then selectively disable
select = ["ALL"]
ignore = [
# Rules that conflict with other tools/preferences
"D203", # one-blank-line-before-class
"D212", # multi-line-summary-first-line
"FBT001", # Boolean positional arg in function definition (required for typer)
"FBT002", # Boolean default value in function definition (required for typer)
"C901", # function too complex
"PLR0911", # too many return statements
"PLR0912", # too many branches
"PLR0913", # too many arguments in function definition
"PLR0915", # too many statements
"TRY300", # Consider moving this statement to an `else` block
"COM812", # Use a constant for the message in a raise statement
"TC001", # Move application import into a type-checking block
"ERA001", # Found commented-out code
"G004", # Logging statement uses f-string
"TD003", # Missing link in to-do
"TRY301", # Abstract raise to an inner function
# Disable rules that conflict with tab indentation
"E101", # Indentation contains mixed spaces and tabs
"W191", # indentation contains tabs
"D206", # indent with spaces, not tabs
"PD901", # Avoid using the generic variable name `df` for DataFrames
"ANN401", # Dynamically typed expressions (typing.Any) are disallowed
"D103", # Missing docstring in public function
"BLE001", # Do not catch blind exception: `Exception`
"T201", # Use `logger.info` instead of `print`
"E501", # Line too long
"PLR2004",
"RUF001",
"D100", # Missing docstring in public module
"D101", # Missing docstring in public class
]
[tool.ruff.lint.mccabe]
max-complexity = 10
[tool.ruff.lint.pylint]
max-args = 5
max-branches = 12
max-statements = 50
[tool.ruff.lint.pydocstyle]
convention = "google"
[tool.ruff.format]
quote-style = "double"
indent-style = "tab"
skip-magic-trailing-comma = false
line-ending = "auto"
|