Coverage for src / pyTRLCConverter / docx_converter.py: 91%

158 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-02 12:20 +0000

1"""Converter to Word docx format. 

2 

3 Author: Norbert Schulz (norbert.schulz@newtec.de) 

4""" 

5 

6# pyTRLCConverter - A tool to convert TRLC files to specific formats. 

7# Copyright (c) 2024 - 2026 NewTec GmbH 

8# 

9# This file is part of pyTRLCConverter program. 

10# 

11# The pyTRLCConverter program is free software: you can redistribute it and/or modify it under 

12# the terms of the GNU General Public License as published by the Free Software Foundation, 

13# either version 3 of the License, or (at your option) any later version. 

14# 

15# The pyTRLCConverter program is distributed in the hope that it will be useful, but 

16# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 

17# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License along with pyTRLCConverter. 

20# If not, see <https://www.gnu.org/licenses/>. 

21 

22# Imports ********************************************************************** 

23import os 

24from typing import Optional, Any 

25import docx 

26from docx.blkcntnr import BlockItemContainer 

27from docx.text.paragraph import Paragraph 

28from docx.oxml import OxmlElement 

29from docx.oxml.ns import qn 

30from docx.enum.style import WD_STYLE_TYPE 

31from marko import Markdown 

32from trlc.ast import Implicit_Null, Record_Object, Record_Reference, String_Literal, Array_Aggregate, Expression 

33from pyTRLCConverter.base_converter import BaseConverter 

34from pyTRLCConverter.marko.md2docx_renderer import Md2DocxRenderer 

35from pyTRLCConverter.ret import Ret 

36from pyTRLCConverter.trlc_helper import TrlcAstWalker 

37from pyTRLCConverter.logger import log_verbose 

38 

39# Variables ******************************************************************** 

40 

41# Classes ********************************************************************** 

42 

43class DocxConverter(BaseConverter): 

44 """Converter to docx format. 

45 

46 The following Word docx objects are used: 

47 

48 - **Document**: Represents the entire Word document. 

49 - **Paragraph**: A block of text in the document with its own formatting properties. 

50 - **Run**: A contiguous run of text with the same formatting within a paragraph. 

51 - **Table**: A two-dimensional structure for presenting data in rows and columns. 

52 """ 

53 

54 OUTPUT_FILE_NAME_DEFAULT = "output.docx" 

55 

56 def __init__(self, args: Any) -> None: 

57 # lobster-trace: SwRequirements.sw_req_no_prj_spec 

58 # lobster-trace: SwRequirements.sw_req_docx 

59 # lobster-trace: SwRequirements.sw_req_docx_template 

60 """ 

61 Initialize the docx converter. 

62 

63 Args: 

64 args (Any): The parsed program arguments. 

65 """ 

66 super().__init__(args) 

67 

68 if args.template is not None: 

69 log_verbose(f"Loading template file {args.template}.") 

70 

71 self._docx = docx.Document(docx=args.template) 

72 

73 # Ensure default table style is present in the document. 

74 if not 'Table Grid' in self._docx.styles: 

75 self._docx.styles.add_style('Table Grid', WD_STYLE_TYPE.TABLE, builtin=True) 

76 

77 # The AST walker meta data for processing the record object fields. 

78 # This will hold the information about the current package, type and attribute being processed. 

79 self._ast_meta_data = None 

80 

81 # Current list item indentation level. 

82 self._list_item_indent_level = 0 

83 

84 # Docx block item container to add content to during conversion and markdown rendering. 

85 self._block_item_container: Optional[BlockItemContainer] = None 

86 

87 @staticmethod 

88 def get_subcommand() -> str: 

89 # lobster-trace: SwRequirements.sw_req_docx 

90 """ Return subcommand token for this converter. 

91 

92 Returns: 

93 Ret: Status 

94 """ 

95 return "docx" 

96 

97 @staticmethod 

98 def get_description() -> str: 

99 # lobster-trace: SwRequirements.sw_req_docx 

100 """ Return converter description. 

101  

102 Returns: 

103 Ret: Status 

104 """ 

105 return "Convert into docx format." 

106 

107 @classmethod 

108 def register(cls, args_parser: Any) -> None: 

109 # lobster-trace: SwRequirements.sw_req_docx 

110 """Register converter specific argument parser. 

111 

112 Args: 

113 args_parser (Any): Argument parser 

114 """ 

115 super().register(args_parser) 

116 

117 assert BaseConverter._parser is not None 

118 

119 BaseConverter._parser.add_argument( 

120 "-t", 

121 "--template", 

122 type=str, 

123 default=None, 

124 required=False, 

125 help="Load the given docx file as a template to append to." 

126 ) 

127 BaseConverter._parser.add_argument( 

128 "-n", 

129 "--name", 

130 type=str, 

131 default=DocxConverter.OUTPUT_FILE_NAME_DEFAULT, 

132 required=False, 

133 help="Name of the generated output file inside the output folder " \ 

134 f"(default = {DocxConverter.OUTPUT_FILE_NAME_DEFAULT})." 

135 ) 

136 

137 def convert_section(self, section: str, level: int) -> Ret: 

138 # lobster-trace: SwRequirements.sw_req_docx_section 

139 """Process the given section item. 

140 

141 Args: 

142 section (str): The section name 

143 level (int): The section indentation level 

144 

145 Returns: 

146 Ret: Status 

147 """ 

148 assert self._docx is not None 

149 

150 self._docx.add_heading(section, level) 

151 

152 return Ret.OK 

153 

154 def convert_record_object_generic(self, record: Record_Object, level: int, translation: Optional[dict]) -> Ret: 

155 # lobster-trace: SwRequirements.sw_req_docx_record 

156 """ 

157 Process the given record object in a generic way. 

158 

159 The handler is called by the base converter if no specific handler is 

160 defined for the record type. 

161 

162 Args: 

163 record (Record_Object): The record object. 

164 level (int): The record level. 

165 translation (Optional[dict]): Translation dictionary for the record object. 

166 If None, no translation is applied. 

167 

168  

169 Returns: 

170 Ret: Status 

171 """ 

172 return self._convert_record_object(record, level, translation) 

173 

174 def finish(self) -> Ret: 

175 # lobster-trace: SwRequirements.sw_req_docx_file 

176 """Finish the conversion. 

177 

178 Returns: 

179 Ret: Status 

180 """ 

181 result = Ret.ERROR 

182 

183 if self._docx is not None: 

184 output_file_name = self._args.name 

185 if 0 < len(self._args.out): 

186 output_file_name = os.path.join(self._args.out, self._args.name) 

187 

188 log_verbose(f"Writing docx {output_file_name}.") 

189 self._docx.save(output_file_name) 

190 self._docx = None 

191 result = Ret.OK 

192 

193 return result 

194 

195 def _on_implict_null(self, _: Implicit_Null) -> None: 

196 # lobster-trace: SwRequirements.sw_req_docx_record 

197 """ 

198 Process the given implicit null value.  

199 """ 

200 assert self._block_item_container is not None 

201 self._block_item_container.add_paragraph(self._empty_attribute_value) 

202 

203 def _on_record_reference(self, record_reference: Record_Reference) -> None: 

204 # lobster-trace: SwRequirements.sw_req_docx_record 

205 # lobster-trace: SwRequirements.sw_req_docx_reference 

206 """ 

207 Process the given record reference value and return a hyperlink paragraph. 

208 

209 Args: 

210 record_reference (Record_Reference): The record reference value.  

211 """ 

212 assert record_reference.target is not None 

213 assert self._block_item_container is not None 

214 

215 paragraph = self._block_item_container.add_paragraph() 

216 

217 DocxConverter.docx_add_link_to_bookmark(paragraph, 

218 record_reference.target.name, 

219 f"{record_reference.package.name}.{record_reference.target.name}") 

220 

221 def _on_string_literal(self, string_literal: String_Literal) -> None: 

222 # lobster-trace: SwRequirements.sw_req_docx_render_md 

223 """ 

224 Process the given string literal value. 

225 

226 Args: 

227 string_literal (String_Literal): The string literal value. 

228 """ 

229 assert self._block_item_container is not None 

230 

231 is_handled = False 

232 

233 if self._ast_meta_data is not None: 

234 package_name = self._ast_meta_data.get("package_name", "") 

235 type_name = self._ast_meta_data.get("type_name", "") 

236 attribute_name = self._ast_meta_data.get("attribute_name", "") 

237 

238 self._render(package_name, type_name, attribute_name, string_literal.to_string()) 

239 is_handled = True 

240 

241 if is_handled is False: 

242 self._block_item_container.add_paragraph(string_literal.to_string()) 

243 

244 # pylint: disable-next=unused-argument 

245 def _on_array_aggregate_begin(self, array_aggregate: Array_Aggregate) -> None: 

246 # lobster-trace: SwRequirements.sw_req_docx_record 

247 """ 

248 Handle the beginning of a list. 

249 

250 Args: 

251 array_aggregate (Array_Aggregate): The AST node. 

252 """ 

253 self._list_item_indent_level += 1 

254 

255 # pylint: disable-next=unused-argument 

256 def _on_list_item(self, expression: Expression, item_result: Any) -> Any: 

257 # lobster-trace: SwRequirements.sw_req_docx_record 

258 """ 

259 Handle the list item by adding a bullet point. 

260 

261 Args: 

262 expression (Expression): The AST node. 

263 item_result (Union[list[DocumentObject],DocumentObject]): The result of processing the list item. 

264 

265 Returns: 

266 Any: The processed list item. 

267 """ 

268 assert self._block_item_container is not None 

269 

270 # Add list item style to last added paragraph. 

271 last_paragraph = self._block_item_container.paragraphs[-1] 

272 

273 style = 'List Bullet' 

274 

275 if 1 < self._list_item_indent_level: 

276 style += f' {self._list_item_indent_level}' 

277 

278 last_paragraph.style = style 

279 

280 return item_result 

281 

282 # pylint: disable-next=unused-argument 

283 def _on_array_aggregate_finish(self, array_aggregate: Array_Aggregate) -> None: 

284 # lobster-trace: SwRequirements.sw_req_docx_record 

285 """ 

286 Handle the end of a list. 

287 

288 Args: 

289 array_aggregate (Array_Aggregate): The AST node. 

290 """ 

291 self._list_item_indent_level -= 1 

292 

293 def _other_dispatcher(self, expression: Expression) -> None: 

294 # lobster-trace: SwRequirements.sw_req_docx_record 

295 """ 

296 Dispatcher for all other expressions. 

297 

298 Args: 

299 expression (Expression): The expression to process. 

300 """ 

301 assert self._block_item_container is not None 

302 self._block_item_container.add_paragraph(expression.to_string()) 

303 

304 def _get_trlc_ast_walker(self) -> TrlcAstWalker: 

305 # lobster-trace: SwRequirements.sw_req_docx_record 

306 """ 

307 If a record object contains a record reference, the record reference will be converted to 

308 a hyperlink. 

309 If a record object contains an array of record references, the array will be converted to 

310 a list of links. 

311 Otherwise the record object fields attribute values will be written to the table. 

312 

313 Returns: 

314 TrlcAstWalker: The TRLC AST walker. 

315 """ 

316 trlc_ast_walker = TrlcAstWalker() 

317 trlc_ast_walker.add_dispatcher( 

318 Implicit_Null, 

319 None, 

320 self._on_implict_null, 

321 None 

322 ) 

323 trlc_ast_walker.add_dispatcher( 

324 Record_Reference, 

325 None, 

326 self._on_record_reference, 

327 None 

328 ) 

329 trlc_ast_walker.add_dispatcher( 

330 String_Literal, 

331 None, 

332 self._on_string_literal, 

333 None 

334 ) 

335 trlc_ast_walker.add_dispatcher( 

336 Array_Aggregate, 

337 self._on_array_aggregate_begin, 

338 None, 

339 self._on_array_aggregate_finish 

340 ) 

341 trlc_ast_walker.set_other_dispatcher(self._other_dispatcher) 

342 trlc_ast_walker.set_list_item_dispatcher(self._on_list_item) 

343 

344 return trlc_ast_walker 

345 

346 def _render(self, package_name: str, type_name: str, attribute_name: str, attribute_value: str) -> None: 

347 # lobster-trace: SwRequirements.sw_req_docx_render_md 

348 """Render the attribute value depened on its format. 

349 

350 Args: 

351 package_name (str): The package name. 

352 type_name (str): The type name. 

353 attribute_name (str): The attribute name. 

354 attribute_value (str): The attribute value. 

355 """ 

356 assert self._block_item_container is not None 

357 

358 # If the attribute is marked as CommonMark Markdown format, convert it. 

359 if self._render_cfg.is_format_md(package_name, type_name, attribute_name) is True: 

360 Md2DocxRenderer.block_item_container = self._block_item_container 

361 markdown = Markdown(renderer=Md2DocxRenderer) 

362 markdown.convert(attribute_value) 

363 else: 

364 self._block_item_container.add_paragraph(attribute_value) 

365 

366 def _convert_record_object(self, record: Record_Object, level: int, translation: Optional[dict]) -> Ret: 

367 # lobster-trace: SwRequirements.sw_req_docx_record 

368 """ 

369 Process the given record object. 

370 

371 Args: 

372 record (Record_Object): The record object. 

373 level (int): The record level. 

374 translation (Optional[dict]): Translation dictionary for the record object. 

375 If None, no translation is applied. 

376 

377 Returns: 

378 Ret: Status 

379 """ 

380 assert self._docx is not None 

381 

382 heading = self._docx.add_heading(f"{record.name} ({record.n_typ.name})", level + 1) 

383 DocxConverter.docx_add_bookmark(heading, record.name) 

384 

385 table = self._docx.add_table(rows=1, cols=2) 

386 table.style = 'Table Grid' 

387 table.autofit = True 

388 

389 # Set table headers 

390 header_cells = table.rows[0].cells 

391 header_cells[0].text = "Element" 

392 header_cells[1].text = "Value" 

393 

394 # Walk through the record object fields and write the table rows. 

395 trlc_ast_walker = self._get_trlc_ast_walker() 

396 

397 for name, value in record.field.items(): 

398 attribute_name = self._translate_attribute_name(translation, name) 

399 

400 cells = table.add_row().cells 

401 cells[0].text = attribute_name 

402 

403 self._ast_meta_data = { 

404 "package_name": record.n_package.name, 

405 "type_name": record.n_typ.name, 

406 "attribute_name": name 

407 } 

408 self._block_item_container = cells[1] 

409 trlc_ast_walker.walk(value) 

410 

411 # Remove first empty paragraph added by default to the table cell. 

412 if 1 < len(cells[1].paragraphs): 

413 first_paragraph = cells[1].paragraphs[0] 

414 

415 if first_paragraph.text == "": 

416 p_element = first_paragraph._element # pylint: disable=protected-access 

417 p_element.getparent().remove(p_element) 

418 p_element._p = p_element._element = None # pylint: disable=protected-access 

419 

420 # Add a paragraph with the record object location 

421 paragraph = self._docx.add_paragraph() 

422 paragraph.add_run(f"from {record.location.file_name}:{record.location.line_no}").italic = True 

423 

424 return Ret.OK 

425 

426 @staticmethod 

427 def docx_add_bookmark(paragraph: Paragraph, bookmark_name: str) -> None: 

428 # lobster-trace: SwRequirements.sw_req_docx_record 

429 """ 

430 Adds a bookmark to a paragraph. 

431 

432 Args: 

433 paragraph (Paragraph): The paragraph to add the bookmark to. 

434 bookmark_name (str): The name of the bookmark. 

435 """ 

436 element = paragraph._p # pylint: disable=protected-access 

437 

438 # Create a bookmark start element. 

439 bookmark_start = OxmlElement('w:bookmarkStart') 

440 bookmark_start.set(qn('w:id'), '0') # ID must be unique 

441 bookmark_start.set(qn('w:name'), bookmark_name) 

442 

443 # Create a bookmark end element. 

444 bookmark_end = OxmlElement('w:bookmarkEnd') 

445 bookmark_end.set(qn('w:id'), '0') 

446 

447 # Add the bookmark to the paragraph. 

448 element.insert(0, bookmark_start) 

449 element.append(bookmark_end) 

450 

451 @staticmethod 

452 def docx_add_link_to_bookmark(paragraph: Paragraph, bookmark_name: str, link_text: str) -> None: 

453 # lobster-trace: SwRequirements.sw_req_docx_reference 

454 """ 

455 Add a hyperlink to a bookmark in a paragraph. 

456 

457 Args: 

458 paragraph (Paragraph): The paragraph to add the hyperlink to. 

459 bookmark_name (str): The name of the bookmark. 

460 link_text (str): The text to display for the hyperlink. 

461 """ 

462 # Create hyperlink element pointing to the bookmark. 

463 hyperlink = OxmlElement('w:hyperlink') 

464 hyperlink.set(qn('w:anchor'), bookmark_name) 

465 

466 # Create a run and run properties for the hyperlink. 

467 new_run = OxmlElement('w:r') 

468 run_properties = OxmlElement('w:rPr') 

469 

470 # Use the built-in Hyperlink run style so Word will display it correctly (blue/underline). 

471 r_style = OxmlElement('w:rStyle') 

472 r_style.set(qn('w:val'), 'Hyperlink') 

473 run_properties.append(r_style) 

474 

475 new_run.append(run_properties) 

476 

477 # Add the text node inside the run (w:t). 

478 text_element = OxmlElement('w:t') 

479 text_element.text = link_text 

480 new_run.append(text_element) 

481 

482 hyperlink.append(new_run) 

483 

484 # Append the hyperlink element directly to the paragraph XML so Word renders it. 

485 paragraph._p.append(hyperlink) # pylint: disable=protected-access 

486 

487# Functions ******************************************************************** 

488 

489# Main *************************************************************************