101 lines
3.1 KiB
Python
101 lines
3.1 KiB
Python
import os
|
|
import re
|
|
|
|
def parse_markdown(content: str, filename: str):
|
|
sections = content.strip().split('---')
|
|
formulas = []
|
|
|
|
for section in sections:
|
|
formula = {
|
|
'source_file': filename
|
|
}
|
|
|
|
# 名称
|
|
name_match = re.search(r'# (.+)', section)
|
|
if name_match:
|
|
formula['name'] = name_match.group(1).strip()
|
|
|
|
# 出处
|
|
source_match = re.search(r'## 出处\s+(.+)', section)
|
|
if source_match:
|
|
formula['source'] = source_match.group(1).strip()
|
|
|
|
# 药物组成
|
|
ingredients = []
|
|
ingredients_section = re.search(r'## 药物组成\s+((?:- .+\n?)*)', section)
|
|
if ingredients_section:
|
|
for line in ingredients_section.group(1).strip().splitlines():
|
|
line = line.strip()
|
|
match = re.match(r'-?\s*([^\d\s]+)\s*([\d.]+)\s*(克|g)', line)
|
|
if match:
|
|
ingredients.append({
|
|
'name': match.group(1).strip(),
|
|
'amount': match.group(2) + '克'
|
|
})
|
|
formula['ingredients'] = ingredients
|
|
|
|
# 功能与主治
|
|
func_match = re.search(r'## 功能与主治\s+(.+)', section)
|
|
if func_match:
|
|
formula['function'] = func_match.group(1).strip()
|
|
|
|
if 'name' in formula:
|
|
formulas.append(formula)
|
|
|
|
return formulas
|
|
|
|
def formula_to_markdown(formula: dict) -> str:
|
|
lines = [
|
|
f"# {formula['name']}",
|
|
"",
|
|
"## 出处",
|
|
f"{formula.get('source', '')}",
|
|
"",
|
|
"## 药物组成",
|
|
]
|
|
for ing in formula.get('ingredients', []):
|
|
lines.append(f"- {ing['name']} {ing['amount']}")
|
|
lines += [
|
|
"",
|
|
"## 功能与主治",
|
|
f"{formula.get('function', '')}",
|
|
""
|
|
]
|
|
return '\n'.join(lines)
|
|
|
|
def sanitize_filename(name: str) -> str:
|
|
return re.sub(r'[\\/:*?"<>|]', '_', name)
|
|
|
|
def process_all_markdown_files(input_dir: str, output_dir: str):
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
file_list = []
|
|
|
|
for filename in os.listdir(input_dir):
|
|
if not filename.endswith('.md'):
|
|
continue
|
|
|
|
filepath = os.path.join(input_dir, filename)
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
formulas = parse_markdown(content, filename)
|
|
|
|
for formula in formulas:
|
|
name = sanitize_filename(formula['name'])
|
|
out_name = f"{name}.md"
|
|
out_path = os.path.join(output_dir, out_name)
|
|
with open(out_path, 'w', encoding='utf-8') as out_file:
|
|
out_file.write(formula_to_markdown(formula))
|
|
print(f"✅ 导出: {out_path}")
|
|
file_list.append(out_name)
|
|
|
|
list_path = os.path.join(output_dir, 'file_list.txt')
|
|
with open(list_path, 'w', encoding='utf-8') as f:
|
|
for name in file_list:
|
|
f.write(name + '\n')
|
|
print(f"\n📄 已保存文件名列表到: {list_path}")
|
|
|
|
# 示例调用
|
|
if __name__ == "__main__":
|
|
process_all_markdown_files('./莪术/功效/莪术止痛/总', './莪术/功效/莪术止痛')
|