import os import re def parse_markdown(content: str, filename: str): sections = content.strip().split('---') formulas = [] for section in sections: formula = { 'source_file': filename } # 名称 name_match = re.search(r'# (.+)', section) if name_match: formula['name'] = name_match.group(1).strip() # 出处 source_match = re.search(r'## 出处\s+(.+)', section) if source_match: formula['source'] = source_match.group(1).strip() # 药物组成 ingredients = [] ingredients_section = re.search(r'## 药物组成\s+((?:- .+\n?)*)', section) if ingredients_section: for line in ingredients_section.group(1).strip().splitlines(): line = line.strip() match = re.match(r'-?\s*([^\d\s]+)\s*([\d.]+)\s*(克|g)', line) if match: ingredients.append({ 'name': match.group(1).strip(), 'amount': match.group(2) + '克' }) formula['ingredients'] = ingredients # 功能与主治 func_match = re.search(r'## 功能与主治\s+(.+)', section) if func_match: formula['function'] = func_match.group(1).strip() if 'name' in formula: formulas.append(formula) return formulas def formula_to_markdown(formula: dict) -> str: lines = [ f"# {formula['name']}", "", "## 出处", f"{formula.get('source', '')}", "", "## 药物组成", ] for ing in formula.get('ingredients', []): lines.append(f"- {ing['name']} {ing['amount']}") lines += [ "", "## 功能与主治", f"{formula.get('function', '')}", "" ] return '\n'.join(lines) def sanitize_filename(name: str) -> str: return re.sub(r'[\\/:*?"<>|]', '_', name) def process_all_markdown_files(input_dir: str, output_dir: str): os.makedirs(output_dir, exist_ok=True) file_list = [] for filename in os.listdir(input_dir): if not filename.endswith('.md'): continue filepath = os.path.join(input_dir, filename) with open(filepath, 'r', encoding='utf-8') as f: content = f.read() formulas = parse_markdown(content, filename) for formula in formulas: name = sanitize_filename(formula['name']) out_name = f"{name}.md" out_path = os.path.join(output_dir, out_name) with open(out_path, 'w', encoding='utf-8') as out_file: out_file.write(formula_to_markdown(formula)) print(f"✅ 导出: {out_path}") file_list.append(out_name) list_path = os.path.join(output_dir, 'file_list.txt') with open(list_path, 'w', encoding='utf-8') as f: for name in file_list: f.write(name + '\n') print(f"\n📄 已保存文件名列表到: {list_path}") # 示例调用 if __name__ == "__main__": process_all_markdown_files('./莪术/功效/莪术止痛/总', './莪术/功效/莪术止痛') process_all_markdown_files('./莪术/功效/莪术活血化瘀/总', './莪术/功效/莪术活血化瘀') process_all_markdown_files('./莪术/功效/莪术消积/总', './莪术/功效/莪术消积') process_all_markdown_files('./莪术/功效/莪术行气/总', './莪术/功效/莪术行气') process_all_markdown_files('./莪术/饮片/莪术其他饮片/总', './莪术/饮片/莪术其他饮片') process_all_markdown_files('./莪术/饮片/莪术煨/总', './莪术/饮片/莪术煨') process_all_markdown_files('./莪术/饮片/莪术生品/总', './莪术/饮片/莪术生品') process_all_markdown_files('./莪术/饮片/莪术酒制/总', './莪术/饮片/莪术酒制') process_all_markdown_files('./莪术/饮片/莪术醋制/总', './莪术/饮片/莪术醋制') process_all_markdown_files('./郁金/饮片/郁金其他饮片类型/总', './郁金/饮片/郁金其他饮片类型') process_all_markdown_files('./郁金/饮片/郁金生品/总', './郁金/饮片/郁金生品') process_all_markdown_files('./郁金/饮片/郁金醋制/总', './郁金/饮片/郁金醋制') process_all_markdown_files('./郁金/功效/郁金利胆退黄/总', './郁金/功效/郁金利胆退黄') process_all_markdown_files('./郁金/功效/郁金止痛/总', './郁金/功效/郁金止痛') process_all_markdown_files('./郁金/功效/郁金活血/总', './郁金/功效/郁金活血') process_all_markdown_files('./郁金/功效/郁金清心凉血/总', './郁金/功效/郁金清心凉血') process_all_markdown_files('./郁金/功效/郁金行气解郁/总', './郁金/功效/郁金行气解郁')