GitHub Action commited on
Commit
4deef40
·
1 Parent(s): 8cb9144

Sync from GitHub with Git LFS

Browse files
Files changed (1) hide show
  1. scripts/AI_friendly.py +36 -6
scripts/AI_friendly.py CHANGED
@@ -6,7 +6,6 @@ import yaml
6
  # Корень репозитория — отталкиваемся от местоположения скрипта
7
  REPO_ROOT = Path(__file__).resolve().parent.parent
8
 
9
-
10
  ROOT_DIR = Path(".")
11
  STRUCTURED_DIR = ROOT_DIR / "structured_md"
12
  INDEX_FILE = STRUCTURED_DIR / "index.md"
@@ -46,6 +45,31 @@ FRONT_MATTER_RE = re.compile(r"^---\n(.*?)\n---\n", re.DOTALL)
46
  def is_md_file(path):
47
  return path.suffix.lower() == MD_EXT and STRUCTURED_DIR not in path.parents
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  def parse_front_matter(content):
50
  match = FRONT_MATTER_RE.match(content)
51
  if match:
@@ -99,29 +123,35 @@ def generate_json_ld(content, front_matter, ftype, title, rel_path):
99
  ).replace("}}", f',\n "url": "{url}"\n}}', 1)
100
 
101
  def mirror_md_files():
 
102
  for path in REPO_ROOT.rglob("*.md"):
103
- # пропускаем structured_md и index.md
104
- if "structured_md" in path.parts or path.name.lower() == "index.md":
105
  continue
106
 
107
  rel_path = path.relative_to(REPO_ROOT)
108
- target_path = STRUCTURED_MD / rel_path
109
  target_path.parent.mkdir(parents=True, exist_ok=True)
110
 
111
  with path.open("r", encoding="utf-8") as f:
112
  content = f.read()
113
 
114
  front_matter, clean_content = extract_front_matter(content)
115
- ftype = detect_file_type(clean_content)
116
  title = front_matter.get("title", path.stem)
117
 
118
  json_ld = generate_json_ld(clean_content, front_matter, ftype, title, rel_path)
119
 
120
  with target_path.open("w", encoding="utf-8") as f:
121
- f.write(clean_content)
 
122
  f.write("\n\n")
123
  f.write(json_ld)
124
 
 
 
 
 
125
  def generate_index(files):
126
  index_lines = ["# ИИ-дружелюбные версии файлов\n"]
127
  tree = {}
 
6
  # Корень репозитория — отталкиваемся от местоположения скрипта
7
  REPO_ROOT = Path(__file__).resolve().parent.parent
8
 
 
9
  ROOT_DIR = Path(".")
10
  STRUCTURED_DIR = ROOT_DIR / "structured_md"
11
  INDEX_FILE = STRUCTURED_DIR / "index.md"
 
45
  def is_md_file(path):
46
  return path.suffix.lower() == MD_EXT and STRUCTURED_DIR not in path.parents
47
 
48
+ def extract_front_matter(content: str):
49
+ """Возвращает (front_matter_dict, clean_content) — без YAML-шапки."""
50
+ match = FRONT_MATTER_RE.match(content)
51
+ if match:
52
+ try:
53
+ data = yaml.safe_load(match.group(1)) or {}
54
+ except Exception:
55
+ data = {}
56
+ clean = content[match.end():]
57
+ return data, clean
58
+ return {}, content
59
+
60
+ def detect_file_type(content: str, front_matter: dict | None = None) -> str:
61
+ """Определяет тип: FAQ / HowTo / Article (по front-matter или заголовкам)."""
62
+ front_matter = front_matter or {}
63
+ if "type" in front_matter:
64
+ return front_matter["type"]
65
+
66
+ # Простые эвристики по заголовкам
67
+ if re.search(r"^#\s*FAQ\b", content, re.MULTILINE) or re.search(r"^##\s*Q&A\b", content, re.MULTILINE):
68
+ return "FAQ"
69
+ if re.search(r"^#\s*HowTo\b", content, re.MULTILINE) or re.search(r"^#\s*Как\s+сделать\b", content, re.IGNORECASE | re.MULTILINE):
70
+ return "HowTo"
71
+ return "Article"
72
+
73
  def parse_front_matter(content):
74
  match = FRONT_MATTER_RE.match(content)
75
  if match:
 
123
  ).replace("}}", f',\n "url": "{url}"\n}}', 1)
124
 
125
  def mirror_md_files():
126
+ processed = []
127
  for path in REPO_ROOT.rglob("*.md"):
128
+ # пропускаем всё внутри structured_md
129
+ if "structured_md" in path.parts:
130
  continue
131
 
132
  rel_path = path.relative_to(REPO_ROOT)
133
+ target_path = STRUCTURED_DIR / rel_path
134
  target_path.parent.mkdir(parents=True, exist_ok=True)
135
 
136
  with path.open("r", encoding="utf-8") as f:
137
  content = f.read()
138
 
139
  front_matter, clean_content = extract_front_matter(content)
140
+ ftype = detect_file_type(clean_content, front_matter)
141
  title = front_matter.get("title", path.stem)
142
 
143
  json_ld = generate_json_ld(clean_content, front_matter, ftype, title, rel_path)
144
 
145
  with target_path.open("w", encoding="utf-8") as f:
146
+ # сначала оригинальный контент (без YAML-шапки), затем JSON-LD
147
+ f.write(clean_content.rstrip())
148
  f.write("\n\n")
149
  f.write(json_ld)
150
 
151
+ processed.append(rel_path)
152
+
153
+ return processed
154
+
155
  def generate_index(files):
156
  index_lines = ["# ИИ-дружелюбные версии файлов\n"]
157
  tree = {}