Coverage for src / pyTRLCConverter / docx_converter.py: 80%
158 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-21 12:06 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-21 12:06 +0000
1"""Converter to Word docx format.
3 Author: Norbert Schulz (norbert.schulz@newtec.de)
4"""
6# pyTRLCConverter - A tool to convert TRLC files to specific formats.
7# Copyright (c) 2024 - 2025 NewTec GmbH
8#
9# This file is part of pyTRLCConverter program.
10#
11# The pyTRLCConverter program is free software: you can redistribute it and/or modify it under
12# the terms of the GNU General Public License as published by the Free Software Foundation,
13# either version 3 of the License, or (at your option) any later version.
14#
15# The pyTRLCConverter program is distributed in the hope that it will be useful, but
16# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License along with pyTRLCConverter.
20# If not, see <https://www.gnu.org/licenses/>.
22# Imports **********************************************************************
23import os
24from typing import Optional, Any
25import docx
26from docx.blkcntnr import BlockItemContainer
27from docx.text.paragraph import Paragraph
28from docx.oxml import OxmlElement
29from docx.oxml.ns import qn
30from docx.enum.style import WD_STYLE_TYPE
31from marko import Markdown
32from trlc.ast import Implicit_Null, Record_Object, Record_Reference, String_Literal, Array_Aggregate, Expression
33from pyTRLCConverter.base_converter import BaseConverter
34from pyTRLCConverter.marko.docx_renderer import DocxRenderer
35from pyTRLCConverter.ret import Ret
36from pyTRLCConverter.trlc_helper import TrlcAstWalker
37from pyTRLCConverter.logger import log_verbose
39# Variables ********************************************************************
41# Classes **********************************************************************
43class DocxConverter(BaseConverter):
44 """
45 Converter to docx format.
47 The following Word docx objects are used:
48 - Document: Represents the entire Word document. You can create a new document or load an existing one.
49 - Paragraph: A block of text in the document. It has its own formatting properties.
50 - Run: A contiguous run of text with the same formatting. You can change the formatting of a run
51 independently within a paragraph.
52 - Table: A two-dimensional structure for presenting data in rows and columns.
53 """
55 OUTPUT_FILE_NAME_DEFAULT = "output.docx"
57 def __init__(self, args: Any) -> None:
58 # lobster-trace: SwRequirements.sw_req_no_prj_spec
59 # lobster-trace: SwRequirements.sw_req_docx
60 # lobster-trace: SwRequirements.sw_req_docx_template
61 """
62 Initialize the docx converter.
64 Args:
65 args (Any): The parsed program arguments.
66 """
67 super().__init__(args)
69 if args.template is not None:
70 log_verbose(f"Loading template file {args.template}.")
72 self._docx = docx.Document(docx=args.template)
74 # Ensure default table style is present in the document.
75 if not 'Table Grid' in self._docx.styles:
76 self._docx.styles.add_style('Table Grid', WD_STYLE_TYPE.TABLE, builtin=True)
78 # The AST walker meta data for processing the record object fields.
79 # This will hold the information about the current package, type and attribute being processed.
80 self._ast_meta_data = None
82 # Current list item indentation level.
83 self._list_item_indent_level = 0
85 # Docx block item container to add content to during conversion and markdown rendering.
86 self._block_item_container: Optional[BlockItemContainer] = None
88 @staticmethod
89 def get_subcommand() -> str:
90 # lobster-trace: SwRequirements.sw_req_docx
91 """ Return subcommand token for this converter.
93 Returns:
94 Ret: Status
95 """
96 return "docx"
98 @staticmethod
99 def get_description() -> str:
100 # lobster-trace: SwRequirements.sw_req_docx
101 """ Return converter description.
103 Returns:
104 Ret: Status
105 """
106 return "Convert into docx format."
108 @classmethod
109 def register(cls, args_parser: Any) -> None:
110 # lobster-trace: SwRequirements.sw_req_docx
111 """Register converter specific argument parser.
113 Args:
114 args_parser (Any): Argument parser
115 """
116 super().register(args_parser)
118 assert BaseConverter._parser is not None
120 BaseConverter._parser.add_argument(
121 "-t",
122 "--template",
123 type=str,
124 default=None,
125 required=False,
126 help="Load the given docx file as a template to append to."
127 )
128 BaseConverter._parser.add_argument(
129 "-n",
130 "--name",
131 type=str,
132 default=DocxConverter.OUTPUT_FILE_NAME_DEFAULT,
133 required=False,
134 help="Name of the generated output file inside the output folder " \
135 f"(default = {DocxConverter.OUTPUT_FILE_NAME_DEFAULT})."
136 )
138 def convert_section(self, section: str, level: int) -> Ret:
139 # lobster-trace: SwRequirements.sw_req_docx_section
140 """Process the given section item.
142 Args:
143 section (str): The section name
144 level (int): The section indentation level
146 Returns:
147 Ret: Status
148 """
149 assert self._docx is not None
151 self._docx.add_heading(section, level)
153 return Ret.OK
155 def convert_record_object_generic(self, record: Record_Object, level: int, translation: Optional[dict]) -> Ret:
156 # lobster-trace: SwRequirements.sw_req_docx_record
157 """
158 Process the given record object in a generic way.
160 The handler is called by the base converter if no specific handler is
161 defined for the record type.
163 Args:
164 record (Record_Object): The record object.
165 level (int): The record level.
166 translation (Optional[dict]): Translation dictionary for the record object.
167 If None, no translation is applied.
170 Returns:
171 Ret: Status
172 """
173 return self._convert_record_object(record, level, translation)
175 def finish(self) -> Ret:
176 # lobster-trace: SwRequirements.sw_req_docx_file
177 """Finish the conversion.
179 Returns:
180 Ret: Status
181 """
182 result = Ret.ERROR
184 if self._docx is not None:
185 output_file_name = self._args.name
186 if 0 < len(self._args.out):
187 output_file_name = os.path.join(self._args.out, self._args.name)
189 log_verbose(f"Writing docx {output_file_name}.")
190 self._docx.save(output_file_name)
191 self._docx = None
192 result = Ret.OK
194 return result
196 def _on_implict_null(self, _: Implicit_Null) -> None:
197 # lobster-trace: SwRequirements.sw_req_docx_record
198 """
199 Process the given implicit null value.
200 """
201 assert self._block_item_container is not None
202 self._block_item_container.add_paragraph(self._empty_attribute_value)
204 def _on_record_reference(self, record_reference: Record_Reference) -> None:
205 # lobster-trace: SwRequirements.sw_req_docx_record
206 """
207 Process the given record reference value and return a hyperlink paragraph.
209 Args:
210 record_reference (Record_Reference): The record reference value.
211 """
212 assert record_reference.target is not None
213 assert self._block_item_container is not None
215 paragraph = self._block_item_container.add_paragraph()
217 DocxConverter.docx_add_link_to_bookmark(paragraph,
218 record_reference.target.name,
219 f"{record_reference.package.name}.{record_reference.target.name}")
221 def _on_string_literal(self, string_literal: String_Literal) -> None:
222 # lobster-trace: SwRequirements.sw_req_docx_string_format
223 # lobster-trace: SwRequirements.sw_req_docx_render_md
224 """
225 Process the given string literal value.
227 Args:
228 string_literal (String_Literal): The string literal value.
229 """
230 assert self._block_item_container is not None
232 is_handled = False
234 if self._ast_meta_data is not None:
235 package_name = self._ast_meta_data.get("package_name", "")
236 type_name = self._ast_meta_data.get("type_name", "")
237 attribute_name = self._ast_meta_data.get("attribute_name", "")
239 self._render(package_name, type_name, attribute_name, string_literal.to_string())
240 is_handled = True
242 if is_handled is False:
243 self._block_item_container.add_paragraph(string_literal.to_string())
245 # pylint: disable-next=unused-argument
246 def _on_array_aggregate_begin(self, array_aggregate: Array_Aggregate) -> None:
247 """
248 Handle the beginning of a list.
250 Args:
251 array_aggregate (Array_Aggregate): The AST node.
252 """
253 self._list_item_indent_level += 1
255 # pylint: disable-next=unused-argument
256 def _on_list_item(self, expression: Expression, item_result: Any) -> Any:
257 # lobster-trace: SwRequirements.sw_req_docx_record
258 """
259 Handle the list item by adding a bullet point.
261 Args:
262 expression (Expression): The AST node.
263 item_result (Union[list[DocumentObject],DocumentObject]): The result of processing the list item.
265 Returns:
266 Any: The processed list item.
267 """
268 assert self._block_item_container is not None
270 # Add list item style to last added paragraph.
271 last_paragraph = self._block_item_container.paragraphs[-1]
273 style = 'List Bullet'
275 if 1 < self._list_item_indent_level:
276 style += f' {self._list_item_indent_level}'
278 last_paragraph.style = style
280 return item_result
282 # pylint: disable-next=unused-argument
283 def _on_array_aggregate_finish(self, array_aggregate: Array_Aggregate) -> None:
284 """
285 Handle the end of a list.
287 Args:
288 array_aggregate (Array_Aggregate): The AST node.
289 """
290 self._list_item_indent_level -= 1
292 def _other_dispatcher(self, expression: Expression) -> None:
293 """
294 Dispatcher for all other expressions.
296 Args:
297 expression (Expression): The expression to process.
298 """
299 assert self._block_item_container is not None
300 self._block_item_container.add_paragraph(expression.to_string())
302 def _get_trlc_ast_walker(self) -> TrlcAstWalker:
303 # lobster-trace: SwRequirements.sw_req_docx_record
304 # lobster-trace: SwRequirements.sw_req_docx_string_format
305 """
306 If a record object contains a record reference, the record reference will be converted to
307 a hyperlink.
308 If a record object contains an array of record references, the array will be converted to
309 a list of links.
310 Otherwise the record object fields attribute values will be written to the table.
312 Returns:
313 TrlcAstWalker: The TRLC AST walker.
314 """
315 trlc_ast_walker = TrlcAstWalker()
316 trlc_ast_walker.add_dispatcher(
317 Implicit_Null,
318 None,
319 self._on_implict_null,
320 None
321 )
322 trlc_ast_walker.add_dispatcher(
323 Record_Reference,
324 None,
325 self._on_record_reference,
326 None
327 )
328 trlc_ast_walker.add_dispatcher(
329 String_Literal,
330 None,
331 self._on_string_literal,
332 None
333 )
334 trlc_ast_walker.add_dispatcher(
335 Array_Aggregate,
336 self._on_array_aggregate_begin,
337 None,
338 self._on_array_aggregate_finish
339 )
340 trlc_ast_walker.set_other_dispatcher(self._other_dispatcher)
341 trlc_ast_walker.set_list_item_dispatcher(self._on_list_item)
343 return trlc_ast_walker
345 def _render(self, package_name: str, type_name: str, attribute_name: str, attribute_value: str) -> None:
346 # lobster-trace: SwRequirements.sw_req_rst_string_format
347 # lobster-trace: SwRequirements.sw_req_docx_render_md
348 """Render the attribute value depened on its format.
350 Args:
351 package_name (str): The package name.
352 type_name (str): The type name.
353 attribute_name (str): The attribute name.
354 attribute_value (str): The attribute value.
355 """
356 assert self._block_item_container is not None
358 # If the attribute is marked as markdown format, convert it.
359 if self._render_cfg.is_format_md(package_name, type_name, attribute_name) is True:
360 DocxRenderer.block_item_container = self._block_item_container
361 markdown = Markdown(renderer=DocxRenderer)
362 markdown.convert(attribute_value)
363 else:
364 self._block_item_container.add_paragraph(attribute_value)
366 def _convert_record_object(self, record: Record_Object, level: int, translation: Optional[dict]) -> Ret:
367 # lobster-trace: SwRequirements.sw_req_docx_record
368 """
369 Process the given record object.
371 Args:
372 record (Record_Object): The record object.
373 level (int): The record level.
374 translation (Optional[dict]): Translation dictionary for the record object.
375 If None, no translation is applied.
377 Returns:
378 Ret: Status
379 """
380 assert self._docx is not None
382 heading = self._docx.add_heading(f"{record.name} ({record.n_typ.name})", level + 1)
383 DocxConverter.docx_add_bookmark(heading, record.name)
385 table = self._docx.add_table(rows=1, cols=2)
386 table.style = 'Table Grid'
387 table.autofit = True
389 # Set table headers
390 header_cells = table.rows[0].cells
391 header_cells[0].text = "Element"
392 header_cells[1].text = "Value"
394 # Walk through the record object fields and write the table rows.
395 trlc_ast_walker = self._get_trlc_ast_walker()
397 for name, value in record.field.items():
398 attribute_name = self._translate_attribute_name(translation, name)
400 cells = table.add_row().cells
401 cells[0].text = attribute_name
403 self._ast_meta_data = {
404 "package_name": record.n_package.name,
405 "type_name": record.n_typ.name,
406 "attribute_name": name
407 }
408 self._block_item_container = cells[1]
409 trlc_ast_walker.walk(value)
411 # Remove first empty paragraph added by default to the table cell.
412 if 1 < len(cells[1].paragraphs):
413 first_paragraph = cells[1].paragraphs[0]
415 if first_paragraph.text == "":
416 p_element = first_paragraph._element # pylint: disable=protected-access
417 p_element.getparent().remove(p_element)
418 p_element._p = p_element._element = None # pylint: disable=protected-access
420 # Add a paragraph with the record object location
421 paragraph = self._docx.add_paragraph()
422 paragraph.add_run(f"from {record.location.file_name}:{record.location.line_no}").italic = True
424 return Ret.OK
426 @staticmethod
427 def docx_add_bookmark(paragraph: Paragraph, bookmark_name: str) -> None:
428 """
429 Adds a bookmark to a paragraph.
431 Args:
432 paragraph (Paragraph): The paragraph to add the bookmark to.
433 bookmark_name (str): The name of the bookmark.
434 """
435 element = paragraph._p # pylint: disable=protected-access
437 # Create a bookmark start element.
438 bookmark_start = OxmlElement('w:bookmarkStart')
439 bookmark_start.set(qn('w:id'), '0') # ID must be unique
440 bookmark_start.set(qn('w:name'), bookmark_name)
442 # Create a bookmark end element.
443 bookmark_end = OxmlElement('w:bookmarkEnd')
444 bookmark_end.set(qn('w:id'), '0')
446 # Add the bookmark to the paragraph.
447 element.insert(0, bookmark_start)
448 element.append(bookmark_end)
450 @staticmethod
451 def docx_add_link_to_bookmark(paragraph: Paragraph, bookmark_name: str, link_text: str) -> None:
452 """
453 Add a hyperlink to a bookmark in a paragraph.
455 Args:
456 paragraph (Paragraph): The paragraph to add the hyperlink to.
457 bookmark_name (str): The name of the bookmark.
458 link_text (str): The text to display for the hyperlink.
459 """
460 # Create hyperlink element pointing to the bookmark.
461 hyperlink = OxmlElement('w:hyperlink')
462 hyperlink.set(qn('w:anchor'), bookmark_name)
464 # Create a run and run properties for the hyperlink.
465 new_run = OxmlElement('w:r')
466 run_properties = OxmlElement('w:rPr')
468 # Use the built-in Hyperlink run style so Word will display it correctly (blue/underline).
469 r_style = OxmlElement('w:rStyle')
470 r_style.set(qn('w:val'), 'Hyperlink')
471 run_properties.append(r_style)
473 new_run.append(run_properties)
475 # Add the text node inside the run (w:t).
476 text_element = OxmlElement('w:t')
477 text_element.text = link_text
478 new_run.append(text_element)
480 hyperlink.append(new_run)
482 # Append the hyperlink element directly to the paragraph XML so Word renders it.
483 paragraph._p.append(hyperlink) # pylint: disable=protected-access
485# Functions ********************************************************************
487# Main *************************************************************************