doc/docs/拆解脚本.py

101 lines
3.1 KiB
Python

import os
import re
def parse_markdown(content: str, filename: str):
sections = content.strip().split('---')
formulas = []
for section in sections:
formula = {
'source_file': filename
}
# 名称
name_match = re.search(r'# (.+)', section)
if name_match:
formula['name'] = name_match.group(1).strip()
# 出处
source_match = re.search(r'## 出处\s+(.+)', section)
if source_match:
formula['source'] = source_match.group(1).strip()
# 药物组成
ingredients = []
ingredients_section = re.search(r'## 药物组成\s+((?:- .+\n?)*)', section)
if ingredients_section:
for line in ingredients_section.group(1).strip().splitlines():
line = line.strip()
match = re.match(r'-?\s*([^\d\s]+)\s*([\d.]+)\s*(克|g)', line)
if match:
ingredients.append({
'name': match.group(1).strip(),
'amount': match.group(2) + ''
})
formula['ingredients'] = ingredients
# 功能与主治
func_match = re.search(r'## 功能与主治\s+(.+)', section)
if func_match:
formula['function'] = func_match.group(1).strip()
if 'name' in formula:
formulas.append(formula)
return formulas
def formula_to_markdown(formula: dict) -> str:
lines = [
f"# {formula['name']}",
"",
"## 出处",
f"{formula.get('source', '')}",
"",
"## 药物组成",
]
for ing in formula.get('ingredients', []):
lines.append(f"- {ing['name']} {ing['amount']}")
lines += [
"",
"## 功能与主治",
f"{formula.get('function', '')}",
""
]
return '\n'.join(lines)
def sanitize_filename(name: str) -> str:
return re.sub(r'[\\/:*?"<>|]', '_', name)
def process_all_markdown_files(input_dir: str, output_dir: str):
os.makedirs(output_dir, exist_ok=True)
file_list = []
for filename in os.listdir(input_dir):
if not filename.endswith('.md'):
continue
filepath = os.path.join(input_dir, filename)
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
formulas = parse_markdown(content, filename)
for formula in formulas:
name = sanitize_filename(formula['name'])
out_name = f"{name}.md"
out_path = os.path.join(output_dir, out_name)
with open(out_path, 'w', encoding='utf-8') as out_file:
out_file.write(formula_to_markdown(formula))
print(f"✅ 导出: {out_path}")
file_list.append(out_name)
list_path = os.path.join(output_dir, 'file_list.txt')
with open(list_path, 'w', encoding='utf-8') as f:
for name in file_list:
f.write(name + '\n')
print(f"\n📄 已保存文件名列表到: {list_path}")
# 示例调用
if __name__ == "__main__":
process_all_markdown_files('./莪术/功效/莪术止痛/总', './莪术/功效/莪术止痛')