Coverage for src / pyTRLCConverter / docx_converter.py: 91%
158 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-02 12:20 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-02 12:20 +0000
1"""Converter to Word docx format.
3 Author: Norbert Schulz (norbert.schulz@newtec.de)
4"""
6# pyTRLCConverter - A tool to convert TRLC files to specific formats.
7# Copyright (c) 2024 - 2026 NewTec GmbH
8#
9# This file is part of pyTRLCConverter program.
10#
11# The pyTRLCConverter program is free software: you can redistribute it and/or modify it under
12# the terms of the GNU General Public License as published by the Free Software Foundation,
13# either version 3 of the License, or (at your option) any later version.
14#
15# The pyTRLCConverter program is distributed in the hope that it will be useful, but
16# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License along with pyTRLCConverter.
20# If not, see <https://www.gnu.org/licenses/>.
22# Imports **********************************************************************
23import os
24from typing import Optional, Any
25import docx
26from docx.blkcntnr import BlockItemContainer
27from docx.text.paragraph import Paragraph
28from docx.oxml import OxmlElement
29from docx.oxml.ns import qn
30from docx.enum.style import WD_STYLE_TYPE
31from marko import Markdown
32from trlc.ast import Implicit_Null, Record_Object, Record_Reference, String_Literal, Array_Aggregate, Expression
33from pyTRLCConverter.base_converter import BaseConverter
34from pyTRLCConverter.marko.md2docx_renderer import Md2DocxRenderer
35from pyTRLCConverter.ret import Ret
36from pyTRLCConverter.trlc_helper import TrlcAstWalker
37from pyTRLCConverter.logger import log_verbose
39# Variables ********************************************************************
41# Classes **********************************************************************
43class DocxConverter(BaseConverter):
44 """Converter to docx format.
46 The following Word docx objects are used:
48 - **Document**: Represents the entire Word document.
49 - **Paragraph**: A block of text in the document with its own formatting properties.
50 - **Run**: A contiguous run of text with the same formatting within a paragraph.
51 - **Table**: A two-dimensional structure for presenting data in rows and columns.
52 """
54 OUTPUT_FILE_NAME_DEFAULT = "output.docx"
56 def __init__(self, args: Any) -> None:
57 # lobster-trace: SwRequirements.sw_req_no_prj_spec
58 # lobster-trace: SwRequirements.sw_req_docx
59 # lobster-trace: SwRequirements.sw_req_docx_template
60 """
61 Initialize the docx converter.
63 Args:
64 args (Any): The parsed program arguments.
65 """
66 super().__init__(args)
68 if args.template is not None:
69 log_verbose(f"Loading template file {args.template}.")
71 self._docx = docx.Document(docx=args.template)
73 # Ensure default table style is present in the document.
74 if not 'Table Grid' in self._docx.styles:
75 self._docx.styles.add_style('Table Grid', WD_STYLE_TYPE.TABLE, builtin=True)
77 # The AST walker meta data for processing the record object fields.
78 # This will hold the information about the current package, type and attribute being processed.
79 self._ast_meta_data = None
81 # Current list item indentation level.
82 self._list_item_indent_level = 0
84 # Docx block item container to add content to during conversion and markdown rendering.
85 self._block_item_container: Optional[BlockItemContainer] = None
87 @staticmethod
88 def get_subcommand() -> str:
89 # lobster-trace: SwRequirements.sw_req_docx
90 """ Return subcommand token for this converter.
92 Returns:
93 Ret: Status
94 """
95 return "docx"
97 @staticmethod
98 def get_description() -> str:
99 # lobster-trace: SwRequirements.sw_req_docx
100 """ Return converter description.
102 Returns:
103 Ret: Status
104 """
105 return "Convert into docx format."
107 @classmethod
108 def register(cls, args_parser: Any) -> None:
109 # lobster-trace: SwRequirements.sw_req_docx
110 """Register converter specific argument parser.
112 Args:
113 args_parser (Any): Argument parser
114 """
115 super().register(args_parser)
117 assert BaseConverter._parser is not None
119 BaseConverter._parser.add_argument(
120 "-t",
121 "--template",
122 type=str,
123 default=None,
124 required=False,
125 help="Load the given docx file as a template to append to."
126 )
127 BaseConverter._parser.add_argument(
128 "-n",
129 "--name",
130 type=str,
131 default=DocxConverter.OUTPUT_FILE_NAME_DEFAULT,
132 required=False,
133 help="Name of the generated output file inside the output folder " \
134 f"(default = {DocxConverter.OUTPUT_FILE_NAME_DEFAULT})."
135 )
137 def convert_section(self, section: str, level: int) -> Ret:
138 # lobster-trace: SwRequirements.sw_req_docx_section
139 """Process the given section item.
141 Args:
142 section (str): The section name
143 level (int): The section indentation level
145 Returns:
146 Ret: Status
147 """
148 assert self._docx is not None
150 self._docx.add_heading(section, level)
152 return Ret.OK
154 def convert_record_object_generic(self, record: Record_Object, level: int, translation: Optional[dict]) -> Ret:
155 # lobster-trace: SwRequirements.sw_req_docx_record
156 """
157 Process the given record object in a generic way.
159 The handler is called by the base converter if no specific handler is
160 defined for the record type.
162 Args:
163 record (Record_Object): The record object.
164 level (int): The record level.
165 translation (Optional[dict]): Translation dictionary for the record object.
166 If None, no translation is applied.
169 Returns:
170 Ret: Status
171 """
172 return self._convert_record_object(record, level, translation)
174 def finish(self) -> Ret:
175 # lobster-trace: SwRequirements.sw_req_docx_file
176 """Finish the conversion.
178 Returns:
179 Ret: Status
180 """
181 result = Ret.ERROR
183 if self._docx is not None:
184 output_file_name = self._args.name
185 if 0 < len(self._args.out):
186 output_file_name = os.path.join(self._args.out, self._args.name)
188 log_verbose(f"Writing docx {output_file_name}.")
189 self._docx.save(output_file_name)
190 self._docx = None
191 result = Ret.OK
193 return result
195 def _on_implict_null(self, _: Implicit_Null) -> None:
196 # lobster-trace: SwRequirements.sw_req_docx_record
197 """
198 Process the given implicit null value.
199 """
200 assert self._block_item_container is not None
201 self._block_item_container.add_paragraph(self._empty_attribute_value)
203 def _on_record_reference(self, record_reference: Record_Reference) -> None:
204 # lobster-trace: SwRequirements.sw_req_docx_record
205 # lobster-trace: SwRequirements.sw_req_docx_reference
206 """
207 Process the given record reference value and return a hyperlink paragraph.
209 Args:
210 record_reference (Record_Reference): The record reference value.
211 """
212 assert record_reference.target is not None
213 assert self._block_item_container is not None
215 paragraph = self._block_item_container.add_paragraph()
217 DocxConverter.docx_add_link_to_bookmark(paragraph,
218 record_reference.target.name,
219 f"{record_reference.package.name}.{record_reference.target.name}")
221 def _on_string_literal(self, string_literal: String_Literal) -> None:
222 # lobster-trace: SwRequirements.sw_req_docx_render_md
223 """
224 Process the given string literal value.
226 Args:
227 string_literal (String_Literal): The string literal value.
228 """
229 assert self._block_item_container is not None
231 is_handled = False
233 if self._ast_meta_data is not None:
234 package_name = self._ast_meta_data.get("package_name", "")
235 type_name = self._ast_meta_data.get("type_name", "")
236 attribute_name = self._ast_meta_data.get("attribute_name", "")
238 self._render(package_name, type_name, attribute_name, string_literal.to_string())
239 is_handled = True
241 if is_handled is False:
242 self._block_item_container.add_paragraph(string_literal.to_string())
244 # pylint: disable-next=unused-argument
245 def _on_array_aggregate_begin(self, array_aggregate: Array_Aggregate) -> None:
246 # lobster-trace: SwRequirements.sw_req_docx_record
247 """
248 Handle the beginning of a list.
250 Args:
251 array_aggregate (Array_Aggregate): The AST node.
252 """
253 self._list_item_indent_level += 1
255 # pylint: disable-next=unused-argument
256 def _on_list_item(self, expression: Expression, item_result: Any) -> Any:
257 # lobster-trace: SwRequirements.sw_req_docx_record
258 """
259 Handle the list item by adding a bullet point.
261 Args:
262 expression (Expression): The AST node.
263 item_result (Union[list[DocumentObject],DocumentObject]): The result of processing the list item.
265 Returns:
266 Any: The processed list item.
267 """
268 assert self._block_item_container is not None
270 # Add list item style to last added paragraph.
271 last_paragraph = self._block_item_container.paragraphs[-1]
273 style = 'List Bullet'
275 if 1 < self._list_item_indent_level:
276 style += f' {self._list_item_indent_level}'
278 last_paragraph.style = style
280 return item_result
282 # pylint: disable-next=unused-argument
283 def _on_array_aggregate_finish(self, array_aggregate: Array_Aggregate) -> None:
284 # lobster-trace: SwRequirements.sw_req_docx_record
285 """
286 Handle the end of a list.
288 Args:
289 array_aggregate (Array_Aggregate): The AST node.
290 """
291 self._list_item_indent_level -= 1
293 def _other_dispatcher(self, expression: Expression) -> None:
294 # lobster-trace: SwRequirements.sw_req_docx_record
295 """
296 Dispatcher for all other expressions.
298 Args:
299 expression (Expression): The expression to process.
300 """
301 assert self._block_item_container is not None
302 self._block_item_container.add_paragraph(expression.to_string())
304 def _get_trlc_ast_walker(self) -> TrlcAstWalker:
305 # lobster-trace: SwRequirements.sw_req_docx_record
306 """
307 If a record object contains a record reference, the record reference will be converted to
308 a hyperlink.
309 If a record object contains an array of record references, the array will be converted to
310 a list of links.
311 Otherwise the record object fields attribute values will be written to the table.
313 Returns:
314 TrlcAstWalker: The TRLC AST walker.
315 """
316 trlc_ast_walker = TrlcAstWalker()
317 trlc_ast_walker.add_dispatcher(
318 Implicit_Null,
319 None,
320 self._on_implict_null,
321 None
322 )
323 trlc_ast_walker.add_dispatcher(
324 Record_Reference,
325 None,
326 self._on_record_reference,
327 None
328 )
329 trlc_ast_walker.add_dispatcher(
330 String_Literal,
331 None,
332 self._on_string_literal,
333 None
334 )
335 trlc_ast_walker.add_dispatcher(
336 Array_Aggregate,
337 self._on_array_aggregate_begin,
338 None,
339 self._on_array_aggregate_finish
340 )
341 trlc_ast_walker.set_other_dispatcher(self._other_dispatcher)
342 trlc_ast_walker.set_list_item_dispatcher(self._on_list_item)
344 return trlc_ast_walker
346 def _render(self, package_name: str, type_name: str, attribute_name: str, attribute_value: str) -> None:
347 # lobster-trace: SwRequirements.sw_req_docx_render_md
348 """Render the attribute value depened on its format.
350 Args:
351 package_name (str): The package name.
352 type_name (str): The type name.
353 attribute_name (str): The attribute name.
354 attribute_value (str): The attribute value.
355 """
356 assert self._block_item_container is not None
358 # If the attribute is marked as CommonMark Markdown format, convert it.
359 if self._render_cfg.is_format_md(package_name, type_name, attribute_name) is True:
360 Md2DocxRenderer.block_item_container = self._block_item_container
361 markdown = Markdown(renderer=Md2DocxRenderer)
362 markdown.convert(attribute_value)
363 else:
364 self._block_item_container.add_paragraph(attribute_value)
366 def _convert_record_object(self, record: Record_Object, level: int, translation: Optional[dict]) -> Ret:
367 # lobster-trace: SwRequirements.sw_req_docx_record
368 """
369 Process the given record object.
371 Args:
372 record (Record_Object): The record object.
373 level (int): The record level.
374 translation (Optional[dict]): Translation dictionary for the record object.
375 If None, no translation is applied.
377 Returns:
378 Ret: Status
379 """
380 assert self._docx is not None
382 heading = self._docx.add_heading(f"{record.name} ({record.n_typ.name})", level + 1)
383 DocxConverter.docx_add_bookmark(heading, record.name)
385 table = self._docx.add_table(rows=1, cols=2)
386 table.style = 'Table Grid'
387 table.autofit = True
389 # Set table headers
390 header_cells = table.rows[0].cells
391 header_cells[0].text = "Element"
392 header_cells[1].text = "Value"
394 # Walk through the record object fields and write the table rows.
395 trlc_ast_walker = self._get_trlc_ast_walker()
397 for name, value in record.field.items():
398 attribute_name = self._translate_attribute_name(translation, name)
400 cells = table.add_row().cells
401 cells[0].text = attribute_name
403 self._ast_meta_data = {
404 "package_name": record.n_package.name,
405 "type_name": record.n_typ.name,
406 "attribute_name": name
407 }
408 self._block_item_container = cells[1]
409 trlc_ast_walker.walk(value)
411 # Remove first empty paragraph added by default to the table cell.
412 if 1 < len(cells[1].paragraphs):
413 first_paragraph = cells[1].paragraphs[0]
415 if first_paragraph.text == "":
416 p_element = first_paragraph._element # pylint: disable=protected-access
417 p_element.getparent().remove(p_element)
418 p_element._p = p_element._element = None # pylint: disable=protected-access
420 # Add a paragraph with the record object location
421 paragraph = self._docx.add_paragraph()
422 paragraph.add_run(f"from {record.location.file_name}:{record.location.line_no}").italic = True
424 return Ret.OK
426 @staticmethod
427 def docx_add_bookmark(paragraph: Paragraph, bookmark_name: str) -> None:
428 # lobster-trace: SwRequirements.sw_req_docx_record
429 """
430 Adds a bookmark to a paragraph.
432 Args:
433 paragraph (Paragraph): The paragraph to add the bookmark to.
434 bookmark_name (str): The name of the bookmark.
435 """
436 element = paragraph._p # pylint: disable=protected-access
438 # Create a bookmark start element.
439 bookmark_start = OxmlElement('w:bookmarkStart')
440 bookmark_start.set(qn('w:id'), '0') # ID must be unique
441 bookmark_start.set(qn('w:name'), bookmark_name)
443 # Create a bookmark end element.
444 bookmark_end = OxmlElement('w:bookmarkEnd')
445 bookmark_end.set(qn('w:id'), '0')
447 # Add the bookmark to the paragraph.
448 element.insert(0, bookmark_start)
449 element.append(bookmark_end)
451 @staticmethod
452 def docx_add_link_to_bookmark(paragraph: Paragraph, bookmark_name: str, link_text: str) -> None:
453 # lobster-trace: SwRequirements.sw_req_docx_reference
454 """
455 Add a hyperlink to a bookmark in a paragraph.
457 Args:
458 paragraph (Paragraph): The paragraph to add the hyperlink to.
459 bookmark_name (str): The name of the bookmark.
460 link_text (str): The text to display for the hyperlink.
461 """
462 # Create hyperlink element pointing to the bookmark.
463 hyperlink = OxmlElement('w:hyperlink')
464 hyperlink.set(qn('w:anchor'), bookmark_name)
466 # Create a run and run properties for the hyperlink.
467 new_run = OxmlElement('w:r')
468 run_properties = OxmlElement('w:rPr')
470 # Use the built-in Hyperlink run style so Word will display it correctly (blue/underline).
471 r_style = OxmlElement('w:rStyle')
472 r_style.set(qn('w:val'), 'Hyperlink')
473 run_properties.append(r_style)
475 new_run.append(run_properties)
477 # Add the text node inside the run (w:t).
478 text_element = OxmlElement('w:t')
479 text_element.text = link_text
480 new_run.append(text_element)
482 hyperlink.append(new_run)
484 # Append the hyperlink element directly to the paragraph XML so Word renders it.
485 paragraph._p.append(hyperlink) # pylint: disable=protected-access
487# Functions ********************************************************************
489# Main *************************************************************************