Coverage for src / pyTRLCConverter / docx_converter.py: 80%

158 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-21 12:06 +0000

1"""Converter to Word docx format. 

2 

3 Author: Norbert Schulz (norbert.schulz@newtec.de) 

4""" 

5 

6# pyTRLCConverter - A tool to convert TRLC files to specific formats. 

7# Copyright (c) 2024 - 2025 NewTec GmbH 

8# 

9# This file is part of pyTRLCConverter program. 

10# 

11# The pyTRLCConverter program is free software: you can redistribute it and/or modify it under 

12# the terms of the GNU General Public License as published by the Free Software Foundation, 

13# either version 3 of the License, or (at your option) any later version. 

14# 

15# The pyTRLCConverter program is distributed in the hope that it will be useful, but 

16# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 

17# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License along with pyTRLCConverter. 

20# If not, see <https://www.gnu.org/licenses/>. 

21 

22# Imports ********************************************************************** 

23import os 

24from typing import Optional, Any 

25import docx 

26from docx.blkcntnr import BlockItemContainer 

27from docx.text.paragraph import Paragraph 

28from docx.oxml import OxmlElement 

29from docx.oxml.ns import qn 

30from docx.enum.style import WD_STYLE_TYPE 

31from marko import Markdown 

32from trlc.ast import Implicit_Null, Record_Object, Record_Reference, String_Literal, Array_Aggregate, Expression 

33from pyTRLCConverter.base_converter import BaseConverter 

34from pyTRLCConverter.marko.docx_renderer import DocxRenderer 

35from pyTRLCConverter.ret import Ret 

36from pyTRLCConverter.trlc_helper import TrlcAstWalker 

37from pyTRLCConverter.logger import log_verbose 

38 

39# Variables ******************************************************************** 

40 

41# Classes ********************************************************************** 

42 

43class DocxConverter(BaseConverter): 

44 """ 

45 Converter to docx format. 

46 

47 The following Word docx objects are used: 

48 - Document: Represents the entire Word document. You can create a new document or load an existing one. 

49 - Paragraph: A block of text in the document. It has its own formatting properties. 

50 - Run: A contiguous run of text with the same formatting. You can change the formatting of a run 

51 independently within a paragraph. 

52 - Table: A two-dimensional structure for presenting data in rows and columns. 

53 """ 

54 

55 OUTPUT_FILE_NAME_DEFAULT = "output.docx" 

56 

57 def __init__(self, args: Any) -> None: 

58 # lobster-trace: SwRequirements.sw_req_no_prj_spec 

59 # lobster-trace: SwRequirements.sw_req_docx 

60 # lobster-trace: SwRequirements.sw_req_docx_template 

61 """ 

62 Initialize the docx converter. 

63 

64 Args: 

65 args (Any): The parsed program arguments. 

66 """ 

67 super().__init__(args) 

68 

69 if args.template is not None: 

70 log_verbose(f"Loading template file {args.template}.") 

71 

72 self._docx = docx.Document(docx=args.template) 

73 

74 # Ensure default table style is present in the document. 

75 if not 'Table Grid' in self._docx.styles: 

76 self._docx.styles.add_style('Table Grid', WD_STYLE_TYPE.TABLE, builtin=True) 

77 

78 # The AST walker meta data for processing the record object fields. 

79 # This will hold the information about the current package, type and attribute being processed. 

80 self._ast_meta_data = None 

81 

82 # Current list item indentation level. 

83 self._list_item_indent_level = 0 

84 

85 # Docx block item container to add content to during conversion and markdown rendering. 

86 self._block_item_container: Optional[BlockItemContainer] = None 

87 

88 @staticmethod 

89 def get_subcommand() -> str: 

90 # lobster-trace: SwRequirements.sw_req_docx 

91 """ Return subcommand token for this converter. 

92 

93 Returns: 

94 Ret: Status 

95 """ 

96 return "docx" 

97 

98 @staticmethod 

99 def get_description() -> str: 

100 # lobster-trace: SwRequirements.sw_req_docx 

101 """ Return converter description. 

102  

103 Returns: 

104 Ret: Status 

105 """ 

106 return "Convert into docx format." 

107 

108 @classmethod 

109 def register(cls, args_parser: Any) -> None: 

110 # lobster-trace: SwRequirements.sw_req_docx 

111 """Register converter specific argument parser. 

112 

113 Args: 

114 args_parser (Any): Argument parser 

115 """ 

116 super().register(args_parser) 

117 

118 assert BaseConverter._parser is not None 

119 

120 BaseConverter._parser.add_argument( 

121 "-t", 

122 "--template", 

123 type=str, 

124 default=None, 

125 required=False, 

126 help="Load the given docx file as a template to append to." 

127 ) 

128 BaseConverter._parser.add_argument( 

129 "-n", 

130 "--name", 

131 type=str, 

132 default=DocxConverter.OUTPUT_FILE_NAME_DEFAULT, 

133 required=False, 

134 help="Name of the generated output file inside the output folder " \ 

135 f"(default = {DocxConverter.OUTPUT_FILE_NAME_DEFAULT})." 

136 ) 

137 

138 def convert_section(self, section: str, level: int) -> Ret: 

139 # lobster-trace: SwRequirements.sw_req_docx_section 

140 """Process the given section item. 

141 

142 Args: 

143 section (str): The section name 

144 level (int): The section indentation level 

145 

146 Returns: 

147 Ret: Status 

148 """ 

149 assert self._docx is not None 

150 

151 self._docx.add_heading(section, level) 

152 

153 return Ret.OK 

154 

155 def convert_record_object_generic(self, record: Record_Object, level: int, translation: Optional[dict]) -> Ret: 

156 # lobster-trace: SwRequirements.sw_req_docx_record 

157 """ 

158 Process the given record object in a generic way. 

159 

160 The handler is called by the base converter if no specific handler is 

161 defined for the record type. 

162 

163 Args: 

164 record (Record_Object): The record object. 

165 level (int): The record level. 

166 translation (Optional[dict]): Translation dictionary for the record object. 

167 If None, no translation is applied. 

168 

169  

170 Returns: 

171 Ret: Status 

172 """ 

173 return self._convert_record_object(record, level, translation) 

174 

175 def finish(self) -> Ret: 

176 # lobster-trace: SwRequirements.sw_req_docx_file 

177 """Finish the conversion. 

178 

179 Returns: 

180 Ret: Status 

181 """ 

182 result = Ret.ERROR 

183 

184 if self._docx is not None: 

185 output_file_name = self._args.name 

186 if 0 < len(self._args.out): 

187 output_file_name = os.path.join(self._args.out, self._args.name) 

188 

189 log_verbose(f"Writing docx {output_file_name}.") 

190 self._docx.save(output_file_name) 

191 self._docx = None 

192 result = Ret.OK 

193 

194 return result 

195 

196 def _on_implict_null(self, _: Implicit_Null) -> None: 

197 # lobster-trace: SwRequirements.sw_req_docx_record 

198 """ 

199 Process the given implicit null value.  

200 """ 

201 assert self._block_item_container is not None 

202 self._block_item_container.add_paragraph(self._empty_attribute_value) 

203 

204 def _on_record_reference(self, record_reference: Record_Reference) -> None: 

205 # lobster-trace: SwRequirements.sw_req_docx_record 

206 """ 

207 Process the given record reference value and return a hyperlink paragraph. 

208 

209 Args: 

210 record_reference (Record_Reference): The record reference value.  

211 """ 

212 assert record_reference.target is not None 

213 assert self._block_item_container is not None 

214 

215 paragraph = self._block_item_container.add_paragraph() 

216 

217 DocxConverter.docx_add_link_to_bookmark(paragraph, 

218 record_reference.target.name, 

219 f"{record_reference.package.name}.{record_reference.target.name}") 

220 

221 def _on_string_literal(self, string_literal: String_Literal) -> None: 

222 # lobster-trace: SwRequirements.sw_req_docx_string_format 

223 # lobster-trace: SwRequirements.sw_req_docx_render_md 

224 """ 

225 Process the given string literal value. 

226 

227 Args: 

228 string_literal (String_Literal): The string literal value. 

229 """ 

230 assert self._block_item_container is not None 

231 

232 is_handled = False 

233 

234 if self._ast_meta_data is not None: 

235 package_name = self._ast_meta_data.get("package_name", "") 

236 type_name = self._ast_meta_data.get("type_name", "") 

237 attribute_name = self._ast_meta_data.get("attribute_name", "") 

238 

239 self._render(package_name, type_name, attribute_name, string_literal.to_string()) 

240 is_handled = True 

241 

242 if is_handled is False: 

243 self._block_item_container.add_paragraph(string_literal.to_string()) 

244 

245 # pylint: disable-next=unused-argument 

246 def _on_array_aggregate_begin(self, array_aggregate: Array_Aggregate) -> None: 

247 """ 

248 Handle the beginning of a list. 

249 

250 Args: 

251 array_aggregate (Array_Aggregate): The AST node. 

252 """ 

253 self._list_item_indent_level += 1 

254 

255 # pylint: disable-next=unused-argument 

256 def _on_list_item(self, expression: Expression, item_result: Any) -> Any: 

257 # lobster-trace: SwRequirements.sw_req_docx_record 

258 """ 

259 Handle the list item by adding a bullet point. 

260 

261 Args: 

262 expression (Expression): The AST node. 

263 item_result (Union[list[DocumentObject],DocumentObject]): The result of processing the list item. 

264 

265 Returns: 

266 Any: The processed list item. 

267 """ 

268 assert self._block_item_container is not None 

269 

270 # Add list item style to last added paragraph. 

271 last_paragraph = self._block_item_container.paragraphs[-1] 

272 

273 style = 'List Bullet' 

274 

275 if 1 < self._list_item_indent_level: 

276 style += f' {self._list_item_indent_level}' 

277 

278 last_paragraph.style = style 

279 

280 return item_result 

281 

282 # pylint: disable-next=unused-argument 

283 def _on_array_aggregate_finish(self, array_aggregate: Array_Aggregate) -> None: 

284 """ 

285 Handle the end of a list. 

286 

287 Args: 

288 array_aggregate (Array_Aggregate): The AST node. 

289 """ 

290 self._list_item_indent_level -= 1 

291 

292 def _other_dispatcher(self, expression: Expression) -> None: 

293 """ 

294 Dispatcher for all other expressions. 

295 

296 Args: 

297 expression (Expression): The expression to process. 

298 """ 

299 assert self._block_item_container is not None 

300 self._block_item_container.add_paragraph(expression.to_string()) 

301 

302 def _get_trlc_ast_walker(self) -> TrlcAstWalker: 

303 # lobster-trace: SwRequirements.sw_req_docx_record 

304 # lobster-trace: SwRequirements.sw_req_docx_string_format 

305 """ 

306 If a record object contains a record reference, the record reference will be converted to 

307 a hyperlink. 

308 If a record object contains an array of record references, the array will be converted to 

309 a list of links. 

310 Otherwise the record object fields attribute values will be written to the table. 

311 

312 Returns: 

313 TrlcAstWalker: The TRLC AST walker. 

314 """ 

315 trlc_ast_walker = TrlcAstWalker() 

316 trlc_ast_walker.add_dispatcher( 

317 Implicit_Null, 

318 None, 

319 self._on_implict_null, 

320 None 

321 ) 

322 trlc_ast_walker.add_dispatcher( 

323 Record_Reference, 

324 None, 

325 self._on_record_reference, 

326 None 

327 ) 

328 trlc_ast_walker.add_dispatcher( 

329 String_Literal, 

330 None, 

331 self._on_string_literal, 

332 None 

333 ) 

334 trlc_ast_walker.add_dispatcher( 

335 Array_Aggregate, 

336 self._on_array_aggregate_begin, 

337 None, 

338 self._on_array_aggregate_finish 

339 ) 

340 trlc_ast_walker.set_other_dispatcher(self._other_dispatcher) 

341 trlc_ast_walker.set_list_item_dispatcher(self._on_list_item) 

342 

343 return trlc_ast_walker 

344 

345 def _render(self, package_name: str, type_name: str, attribute_name: str, attribute_value: str) -> None: 

346 # lobster-trace: SwRequirements.sw_req_rst_string_format 

347 # lobster-trace: SwRequirements.sw_req_docx_render_md 

348 """Render the attribute value depened on its format. 

349 

350 Args: 

351 package_name (str): The package name. 

352 type_name (str): The type name. 

353 attribute_name (str): The attribute name. 

354 attribute_value (str): The attribute value. 

355 """ 

356 assert self._block_item_container is not None 

357 

358 # If the attribute is marked as markdown format, convert it. 

359 if self._render_cfg.is_format_md(package_name, type_name, attribute_name) is True: 

360 DocxRenderer.block_item_container = self._block_item_container 

361 markdown = Markdown(renderer=DocxRenderer) 

362 markdown.convert(attribute_value) 

363 else: 

364 self._block_item_container.add_paragraph(attribute_value) 

365 

366 def _convert_record_object(self, record: Record_Object, level: int, translation: Optional[dict]) -> Ret: 

367 # lobster-trace: SwRequirements.sw_req_docx_record 

368 """ 

369 Process the given record object. 

370 

371 Args: 

372 record (Record_Object): The record object. 

373 level (int): The record level. 

374 translation (Optional[dict]): Translation dictionary for the record object. 

375 If None, no translation is applied. 

376 

377 Returns: 

378 Ret: Status 

379 """ 

380 assert self._docx is not None 

381 

382 heading = self._docx.add_heading(f"{record.name} ({record.n_typ.name})", level + 1) 

383 DocxConverter.docx_add_bookmark(heading, record.name) 

384 

385 table = self._docx.add_table(rows=1, cols=2) 

386 table.style = 'Table Grid' 

387 table.autofit = True 

388 

389 # Set table headers 

390 header_cells = table.rows[0].cells 

391 header_cells[0].text = "Element" 

392 header_cells[1].text = "Value" 

393 

394 # Walk through the record object fields and write the table rows. 

395 trlc_ast_walker = self._get_trlc_ast_walker() 

396 

397 for name, value in record.field.items(): 

398 attribute_name = self._translate_attribute_name(translation, name) 

399 

400 cells = table.add_row().cells 

401 cells[0].text = attribute_name 

402 

403 self._ast_meta_data = { 

404 "package_name": record.n_package.name, 

405 "type_name": record.n_typ.name, 

406 "attribute_name": name 

407 } 

408 self._block_item_container = cells[1] 

409 trlc_ast_walker.walk(value) 

410 

411 # Remove first empty paragraph added by default to the table cell. 

412 if 1 < len(cells[1].paragraphs): 

413 first_paragraph = cells[1].paragraphs[0] 

414 

415 if first_paragraph.text == "": 

416 p_element = first_paragraph._element # pylint: disable=protected-access 

417 p_element.getparent().remove(p_element) 

418 p_element._p = p_element._element = None # pylint: disable=protected-access 

419 

420 # Add a paragraph with the record object location 

421 paragraph = self._docx.add_paragraph() 

422 paragraph.add_run(f"from {record.location.file_name}:{record.location.line_no}").italic = True 

423 

424 return Ret.OK 

425 

426 @staticmethod 

427 def docx_add_bookmark(paragraph: Paragraph, bookmark_name: str) -> None: 

428 """ 

429 Adds a bookmark to a paragraph. 

430 

431 Args: 

432 paragraph (Paragraph): The paragraph to add the bookmark to. 

433 bookmark_name (str): The name of the bookmark. 

434 """ 

435 element = paragraph._p # pylint: disable=protected-access 

436 

437 # Create a bookmark start element. 

438 bookmark_start = OxmlElement('w:bookmarkStart') 

439 bookmark_start.set(qn('w:id'), '0') # ID must be unique 

440 bookmark_start.set(qn('w:name'), bookmark_name) 

441 

442 # Create a bookmark end element. 

443 bookmark_end = OxmlElement('w:bookmarkEnd') 

444 bookmark_end.set(qn('w:id'), '0') 

445 

446 # Add the bookmark to the paragraph. 

447 element.insert(0, bookmark_start) 

448 element.append(bookmark_end) 

449 

450 @staticmethod 

451 def docx_add_link_to_bookmark(paragraph: Paragraph, bookmark_name: str, link_text: str) -> None: 

452 """ 

453 Add a hyperlink to a bookmark in a paragraph. 

454 

455 Args: 

456 paragraph (Paragraph): The paragraph to add the hyperlink to. 

457 bookmark_name (str): The name of the bookmark. 

458 link_text (str): The text to display for the hyperlink. 

459 """ 

460 # Create hyperlink element pointing to the bookmark. 

461 hyperlink = OxmlElement('w:hyperlink') 

462 hyperlink.set(qn('w:anchor'), bookmark_name) 

463 

464 # Create a run and run properties for the hyperlink. 

465 new_run = OxmlElement('w:r') 

466 run_properties = OxmlElement('w:rPr') 

467 

468 # Use the built-in Hyperlink run style so Word will display it correctly (blue/underline). 

469 r_style = OxmlElement('w:rStyle') 

470 r_style.set(qn('w:val'), 'Hyperlink') 

471 run_properties.append(r_style) 

472 

473 new_run.append(run_properties) 

474 

475 # Add the text node inside the run (w:t). 

476 text_element = OxmlElement('w:t') 

477 text_element.text = link_text 

478 new_run.append(text_element) 

479 

480 hyperlink.append(new_run) 

481 

482 # Append the hyperlink element directly to the paragraph XML so Word renders it. 

483 paragraph._p.append(hyperlink) # pylint: disable=protected-access 

484 

485# Functions ******************************************************************** 

486 

487# Main *************************************************************************