|
|
from __future__ import annotations |
|
|
import json, tempfile |
|
|
from pathlib import Path |
|
|
from dir2md.core import Config, generate_markdown_report |
|
|
|
|
|
|
|
|
def _make_repo(tmp: Path) -> Path: |
|
|
(tmp/"src").mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
long_content = "\n".join([f" print('line {i}')" for i in range(100)]) |
|
|
(tmp/"src"/"a.py").write_text(f""" |
|
|
import os |
|
|
|
|
|
class A: pass |
|
|
|
|
|
def foo(): |
|
|
{long_content} |
|
|
return 42 |
|
|
""", encoding="utf-8") |
|
|
(tmp/"src"/"b.py").write_text(""" |
|
|
import sys |
|
|
|
|
|
def bar(): |
|
|
return 43 |
|
|
""", encoding="utf-8") |
|
|
|
|
|
(tmp/"src"/"b_copy.py").write_text((tmp/"src"/"b.py").read_text(encoding="utf-8"), encoding="utf-8") |
|
|
(tmp/"README.md").write_text("# Title\n\nSome text\n", encoding="utf-8") |
|
|
return tmp |
|
|
|
|
|
|
|
|
def test_budget_and_modes(tmp_path: Path): |
|
|
root = _make_repo(tmp_path) |
|
|
cfg = Config( |
|
|
root=root, output=root/"OUT.md", include_globs=[], exclude_globs=[], omit_globs=[], |
|
|
respect_gitignore=False, follow_symlinks=False, max_bytes=200_000, max_lines=2000, |
|
|
include_contents=True, only_ext=None, add_stats=True, add_toc=False, |
|
|
llm_mode="summary", budget_tokens=200, max_file_tokens=1200, dedup_bits=16, |
|
|
sample_head=120, sample_tail=40, strip_comments=False, emit_manifest=True, |
|
|
preset="pro", explain_capsule=True, |
|
|
) |
|
|
md = generate_markdown_report(cfg) |
|
|
assert "Estimated tokens (prompt):" in md |
|
|
mpath = (root/"OUT.manifest.json") |
|
|
assert mpath.exists() |
|
|
man = json.loads(mpath.read_text(encoding="utf-8")) |
|
|
|
|
|
paths = {entry["path"] for entry in man["files"]} |
|
|
assert any(p.endswith("a.py") for p in paths) |
|
|
assert any(p.endswith("b.py") for p in paths) |
|
|
|
|
|
|
|
|
def test_ref_mode_manifest(tmp_path: Path): |
|
|
root = _make_repo(tmp_path) |
|
|
cfg = Config( |
|
|
root=root, output=root/"OUT.md", include_globs=[], exclude_globs=[], omit_globs=[], |
|
|
respect_gitignore=False, follow_symlinks=False, max_bytes=200_000, max_lines=2000, |
|
|
include_contents=True, only_ext=None, add_stats=True, add_toc=False, |
|
|
llm_mode="ref", budget_tokens=120, max_file_tokens=1200, dedup_bits=16, |
|
|
sample_head=120, sample_tail=40, strip_comments=False, emit_manifest=True, |
|
|
preset="pro", explain_capsule=False, |
|
|
) |
|
|
md = generate_markdown_report(cfg) |
|
|
man = json.loads((root/"OUT.manifest.json").read_text(encoding="utf-8")) |
|
|
assert "stats" in man |
|
|
assert "files" in man |
|
|
assert all("sha256" in e for e in man["files"]) |
|
|
|
|
|
|
|
|
def test_inline_sampling(tmp_path: Path): |
|
|
root = _make_repo(tmp_path) |
|
|
|
|
|
cfg = Config( |
|
|
root=root, output=root/"OUT.md", include_globs=[], exclude_globs=[], omit_globs=[], |
|
|
respect_gitignore=False, follow_symlinks=False, max_bytes=200_000, max_lines=50, |
|
|
include_contents=True, only_ext=None, add_stats=True, add_toc=False, |
|
|
llm_mode="inline", budget_tokens=50, max_file_tokens=30, dedup_bits=0, |
|
|
sample_head=5, sample_tail=3, strip_comments=False, emit_manifest=False, |
|
|
preset="pro", explain_capsule=True, |
|
|
) |
|
|
md = generate_markdown_report(cfg) |
|
|
assert "truncated middle" in md |
|
|
assert "why: inline" in md |
|
|
|
|
|
def test_masking(tmp_path: Path): |
|
|
root = _make_repo(tmp_path) |
|
|
|
|
|
secret_content = "My AWS key is AKIAIOSFODNN7EXAMPLE" |
|
|
(root / ".env").write_text(secret_content, encoding="utf-8") |
|
|
|
|
|
cfg = Config( |
|
|
root=root, output=root/"OUT.md", include_globs=[], exclude_globs=[], omit_globs=[], |
|
|
respect_gitignore=False, follow_symlinks=False, max_bytes=200_000, max_lines=2000, |
|
|
include_contents=True, only_ext=None, add_stats=True, add_toc=False, |
|
|
llm_mode="inline", budget_tokens=1000, max_file_tokens=1000, dedup_bits=0, |
|
|
sample_head=120, sample_tail=40, strip_comments=False, emit_manifest=False, |
|
|
preset="pro", explain_capsule=False, no_timestamp=True, |
|
|
masking_mode="basic", |
|
|
) |
|
|
md = generate_markdown_report(cfg) |
|
|
|
|
|
assert secret_content not in md |
|
|
assert "[*** MASKED_SECRET ***]" in md |
|
|
|
|
|
|
|
|
cfg.masking_mode = "off" |
|
|
md_unmasked = generate_markdown_report(cfg) |
|
|
assert secret_content in md_unmasked |
|
|
assert "[*** MASKED_SECRET ***]" not in md_unmasked |
|
|
|