"""Extract QUY_TRINH docx text for manual workflow modeling.""" from pathlib import Path import docx SRC = Path("D:/Dropbox/CONG_VIEC/SOLUTION/QUY_TRINH/QT TRINH KY HOP DONG TP-NCC.docx") OUT = Path(__file__).parent.parent / "docs" / "workflow-raw.md" d = docx.Document(str(SRC)) parts = [] for p in d.paragraphs: if p.text.strip(): parts.append(p.text) for i, tbl in enumerate(d.tables): parts.append(f"\n--- TABLE {i+1} ({len(tbl.rows)}r x {len(tbl.columns)}c) ---") for row in tbl.rows: cells = [c.text.strip().replace("\n", " | ") for c in row.cells] parts.append(" || ".join(cells)) OUT.parent.mkdir(parents=True, exist_ok=True) OUT.write_text("\n".join(parts), encoding="utf-8") print(f"OK -> {OUT} ({OUT.stat().st_size} bytes)")