Coverage for src/pyTRLCConverter/docx

1"""Converter to Word docx format.

3 Author: Norbert Schulz (norbert.schulz@newtec.de)

4"""

6# pyTRLCConverter - A tool to convert TRLC files to specific formats.

9# This file is part of pyTRLCConverter program.

10#

11# The pyTRLCConverter program is free software: you can redistribute it and/or modify it under

12# the terms of the GNU General Public License as published by the Free Software Foundation,

13# either version 3 of the License, or (at your option) any later version.

14#

15# The pyTRLCConverter program is distributed in the hope that it will be useful, but

16# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

17# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

18#

19# You should have received a copy of the GNU General Public License along with pyTRLCConverter.

20# If not, see <https://www.gnu.org/licenses/>.

22# Imports **********************************************************************

23import os

24from typing import Optional, Any

25import docx

26from docx.blkcntnr import BlockItemContainer

27from docx.text.paragraph import Paragraph

28from docx.oxml import OxmlElement

29from docx.oxml.ns import qn

30from docx.enum.style import WD_STYLE_TYPE

31from marko import Markdown

32from trlc.ast import Implicit_Null, Record_Object, Record_Reference, String_Literal, Array_Aggregate, Expression

33from pyTRLCConverter.base_converter import BaseConverter

34from pyTRLCConverter.marko.md2docx_renderer import Md2DocxRenderer

35from pyTRLCConverter.ret import Ret

36from pyTRLCConverter.trlc_helper import TrlcAstWalker

37from pyTRLCConverter.logger import log_verbose

39# Variables ********************************************************************

41# Classes **********************************************************************

43class DocxConverter(BaseConverter):

44 """Converter to docx format.

46 The following Word docx objects are used:

48 - **Document**: Represents the entire Word document.

49 - **Paragraph**: A block of text in the document with its own formatting properties.

50 - **Run**: A contiguous run of text with the same formatting within a paragraph.

51 - **Table**: A two-dimensional structure for presenting data in rows and columns.

52 """

54 OUTPUT_FILE_NAME_DEFAULT = "output.docx"

56 def __init__(self, args: Any) -> None:

57 # lobster-trace: SwRequirements.sw_req_no_prj_spec

58 # lobster-trace: SwRequirements.sw_req_docx

59 # lobster-trace: SwRequirements.sw_req_docx_template

60 """

61 Initialize the docx converter.

63 Args:

64 args (Any): The parsed program arguments.

65 """

66 super().__init__(args)

68 if args.template is not None:

69 log_verbose(f"Loading template file {args.template}.")

71 self._docx = docx.Document(docx=args.template)

73 # Ensure default table style is present in the document.

74 if not 'Table Grid' in self._docx.styles:

75 self._docx.styles.add_style('Table Grid', WD_STYLE_TYPE.TABLE, builtin=True)

77 # The AST walker meta data for processing the record object fields.

78 # This will hold the information about the current package, type and attribute being processed.

79 self._ast_meta_data = None

81 # Current list item indentation level.

82 self._list_item_indent_level = 0

84 # Docx block item container to add content to during conversion and markdown rendering.

85 self._block_item_container: Optional[BlockItemContainer] = None

87 @staticmethod

88 def get_subcommand() -> str:

89 # lobster-trace: SwRequirements.sw_req_docx

90 """ Return subcommand token for this converter.

92 Returns:

93 Ret: Status

94 """

95 return "docx"

97 @staticmethod

98 def get_description() -> str:

99 # lobster-trace: SwRequirements.sw_req_docx

100 """ Return converter description.

101

102 Returns:

103 Ret: Status

104 """

105 return "Convert into docx format."

106

107 @classmethod

108 def register(cls, args_parser: Any) -> None:

109 # lobster-trace: SwRequirements.sw_req_docx

110 """Register converter specific argument parser.

111

112 Args:

113 args_parser (Any): Argument parser

114 """

115 super().register(args_parser)

116

117 assert BaseConverter._parser is not None

118

119 BaseConverter._parser.add_argument(

120 "-t",

121 "--template",

122 type=str,

123 default=None,

124 required=False,

125 help="Load the given docx file as a template to append to."

126 )

127 BaseConverter._parser.add_argument(

128 "-n",

129 "--name",

130 type=str,

131 default=DocxConverter.OUTPUT_FILE_NAME_DEFAULT,

132 required=False,

133 help="Name of the generated output file inside the output folder " \

134 f"(default = {DocxConverter.OUTPUT_FILE_NAME_DEFAULT})."

135 )

136

137 def convert_section(self, section: str, level: int) -> Ret:

138 # lobster-trace: SwRequirements.sw_req_docx_section

139 """Process the given section item.

140

141 Args:

142 section (str): The section name

143 level (int): The section indentation level

144

145 Returns:

146 Ret: Status

147 """

148 assert self._docx is not None

149

150 self._docx.add_heading(section, level)

151

152 return Ret.OK

153

154 def convert_record_object_generic(self, record: Record_Object, level: int, translation: Optional[dict]) -> Ret:

155 # lobster-trace: SwRequirements.sw_req_docx_record

156 """

157 Process the given record object in a generic way.

158

159 The handler is called by the base converter if no specific handler is

160 defined for the record type.

161

162 Args:

163 record (Record_Object): The record object.

164 level (int): The record level.

165 translation (Optional[dict]): Translation dictionary for the record object.

166 If None, no translation is applied.

167

168

169 Returns:

170 Ret: Status

171 """

172 return self._convert_record_object(record, level, translation)

173

174 def finish(self) -> Ret:

175 # lobster-trace: SwRequirements.sw_req_docx_file

176 """Finish the conversion.

177

178 Returns:

179 Ret: Status

180 """

181 result = Ret.ERROR

182

183 if self._docx is not None:

184 output_file_name = self._args.name

185 if 0 < len(self._args.out):

186 output_file_name = os.path.join(self._args.out, self._args.name)

187

188 log_verbose(f"Writing docx {output_file_name}.")

189 self._docx.save(output_file_name)

190 self._docx = None

191 result = Ret.OK

192

193 return result

194

195 def _on_implict_null(self, _: Implicit_Null) -> None:

196 # lobster-trace: SwRequirements.sw_req_docx_record

197 """

198 Process the given implicit null value.

199 """

200 assert self._block_item_container is not None

201 self._block_item_container.add_paragraph(self._empty_attribute_value)

202

203 def _on_record_reference(self, record_reference: Record_Reference) -> None:

204 # lobster-trace: SwRequirements.sw_req_docx_record

205 # lobster-trace: SwRequirements.sw_req_docx_reference

206 """

207 Process the given record reference value and return a hyperlink paragraph.

208

209 Args:

210 record_reference (Record_Reference): The record reference value.

211 """

212 assert record_reference.target is not None

213 assert self._block_item_container is not None

214

215 paragraph = self._block_item_container.add_paragraph()

216

217 DocxConverter.docx_add_link_to_bookmark(paragraph,

218 record_reference.target.name,

219 f"{record_reference.package.name}.{record_reference.target.name}")

220

221 def _on_string_literal(self, string_literal: String_Literal) -> None:

222 # lobster-trace: SwRequirements.sw_req_docx_render_md

223 """

224 Process the given string literal value.

225

226 Args:

227 string_literal (String_Literal): The string literal value.

228 """

229 assert self._block_item_container is not None

230

231 is_handled = False

232

233 if self._ast_meta_data is not None:

234 package_name = self._ast_meta_data.get("package_name", "")

235 type_name = self._ast_meta_data.get("type_name", "")

236 attribute_name = self._ast_meta_data.get("attribute_name", "")

237

238 self._render(package_name, type_name, attribute_name, string_literal.to_string())

239 is_handled = True

240

241 if is_handled is False:

242 self._block_item_container.add_paragraph(string_literal.to_string())

243

244 # pylint: disable-next=unused-argument

245 def _on_array_aggregate_begin(self, array_aggregate: Array_Aggregate) -> None:

246 # lobster-trace: SwRequirements.sw_req_docx_record

247 """

248 Handle the beginning of a list.

249

250 Args:

251 array_aggregate (Array_Aggregate): The AST node.

252 """

253 self._list_item_indent_level += 1

254

255 # pylint: disable-next=unused-argument

256 def _on_list_item(self, expression: Expression, item_result: Any) -> Any:

257 # lobster-trace: SwRequirements.sw_req_docx_record

258 """

259 Handle the list item by adding a bullet point.

260

261 Args:

262 expression (Expression): The AST node.

263 item_result (Union[list[DocumentObject],DocumentObject]): The result of processing the list item.

264

265 Returns:

266 Any: The processed list item.

267 """

268 assert self._block_item_container is not None

269

270 # Add list item style to last added paragraph.

271 last_paragraph = self._block_item_container.paragraphs[-1]

272

273 style = 'List Bullet'

274

275 if 1 < self._list_item_indent_level:

276 style += f' {self._list_item_indent_level}'

277

278 last_paragraph.style = style

279

280 return item_result

281

282 # pylint: disable-next=unused-argument

283 def _on_array_aggregate_finish(self, array_aggregate: Array_Aggregate) -> None:

284 # lobster-trace: SwRequirements.sw_req_docx_record

285 """

286 Handle the end of a list.

287

288 Args:

289 array_aggregate (Array_Aggregate): The AST node.

290 """

291 self._list_item_indent_level -= 1

292

293 def _other_dispatcher(self, expression: Expression) -> None:

294 # lobster-trace: SwRequirements.sw_req_docx_record

295 """

296 Dispatcher for all other expressions.

297

298 Args:

299 expression (Expression): The expression to process.

300 """

301 assert self._block_item_container is not None

302 self._block_item_container.add_paragraph(expression.to_string())

303

304 def _get_trlc_ast_walker(self) -> TrlcAstWalker:

305 # lobster-trace: SwRequirements.sw_req_docx_record

306 """

307 If a record object contains a record reference, the record reference will be converted to

308 a hyperlink.

309 If a record object contains an array of record references, the array will be converted to

310 a list of links.

311 Otherwise the record object fields attribute values will be written to the table.

312

313 Returns:

314 TrlcAstWalker: The TRLC AST walker.

315 """

316 trlc_ast_walker = TrlcAstWalker()

317 trlc_ast_walker.add_dispatcher(

318 Implicit_Null,

319 None,

320 self._on_implict_null,

321 None

322 )

323 trlc_ast_walker.add_dispatcher(

324 Record_Reference,

325 None,

326 self._on_record_reference,

327 None

328 )

329 trlc_ast_walker.add_dispatcher(

330 String_Literal,

331 None,

332 self._on_string_literal,

333 None

334 )

335 trlc_ast_walker.add_dispatcher(

336 Array_Aggregate,

337 self._on_array_aggregate_begin,

338 None,

339 self._on_array_aggregate_finish

340 )

341 trlc_ast_walker.set_other_dispatcher(self._other_dispatcher)

342 trlc_ast_walker.set_list_item_dispatcher(self._on_list_item)

343

344 return trlc_ast_walker

345

346 def _render(self, package_name: str, type_name: str, attribute_name: str, attribute_value: str) -> None:

347 # lobster-trace: SwRequirements.sw_req_docx_render_md

348 """Render the attribute value depened on its format.

349

350 Args:

351 package_name (str): The package name.

352 type_name (str): The type name.

353 attribute_name (str): The attribute name.

354 attribute_value (str): The attribute value.

355 """

356 assert self._block_item_container is not None

357

358 # If the attribute is marked as CommonMark Markdown format, convert it.

359 if self._render_cfg.is_format_md(package_name, type_name, attribute_name) is True:

360 Md2DocxRenderer.block_item_container = self._block_item_container

361 markdown = Markdown(renderer=Md2DocxRenderer)

362 markdown.convert(attribute_value)

363 else:

364 self._block_item_container.add_paragraph(attribute_value)

365

366 def _convert_record_object(self, record: Record_Object, level: int, translation: Optional[dict]) -> Ret:

367 # lobster-trace: SwRequirements.sw_req_docx_record

368 """

369 Process the given record object.

370

371 Args:

372 record (Record_Object): The record object.

373 level (int): The record level.

374 translation (Optional[dict]): Translation dictionary for the record object.

375 If None, no translation is applied.

376

377 Returns:

378 Ret: Status

379 """

380 assert self._docx is not None

381

382 heading = self._docx.add_heading(f"{record.name} ({record.n_typ.name})", level + 1)

383 DocxConverter.docx_add_bookmark(heading, record.name)

384

385 table = self._docx.add_table(rows=1, cols=2)

386 table.style = 'Table Grid'

387 table.autofit = True

388

389 # Set table headers

390 header_cells = table.rows[0].cells

391 header_cells[0].text = "Element"

392 header_cells[1].text = "Value"

393

394 # Walk through the record object fields and write the table rows.

395 trlc_ast_walker = self._get_trlc_ast_walker()

396

397 for name, value in record.field.items():

398 attribute_name = self._translate_attribute_name(translation, name)

399

400 cells = table.add_row().cells

401 cells[0].text = attribute_name

402

403 self._ast_meta_data = {

404 "package_name": record.n_package.name,

405 "type_name": record.n_typ.name,

406 "attribute_name": name

407 }

408 self._block_item_container = cells[1]

409 trlc_ast_walker.walk(value)

410

411 # Remove first empty paragraph added by default to the table cell.

412 if 1 < len(cells[1].paragraphs):

413 first_paragraph = cells[1].paragraphs[0]

414

415 if first_paragraph.text == "":

416 p_element = first_paragraph._element # pylint: disable=protected-access

417 p_element.getparent().remove(p_element)

418 p_element._p = p_element._element = None # pylint: disable=protected-access

419

420 # Add a paragraph with the record object location

421 paragraph = self._docx.add_paragraph()

422 paragraph.add_run(f"from {record.location.file_name}:{record.location.line_no}").italic = True

423

424 return Ret.OK

425

426 @staticmethod

427 def docx_add_bookmark(paragraph: Paragraph, bookmark_name: str) -> None:

428 # lobster-trace: SwRequirements.sw_req_docx_record

429 """

430 Adds a bookmark to a paragraph.

431

432 Args:

433 paragraph (Paragraph): The paragraph to add the bookmark to.

434 bookmark_name (str): The name of the bookmark.

435 """

436 element = paragraph._p # pylint: disable=protected-access

437

438 # Create a bookmark start element.

439 bookmark_start = OxmlElement('w:bookmarkStart')

440 bookmark_start.set(qn('w:id'), '0') # ID must be unique

441 bookmark_start.set(qn('w:name'), bookmark_name)

442

443 # Create a bookmark end element.

444 bookmark_end = OxmlElement('w:bookmarkEnd')

445 bookmark_end.set(qn('w:id'), '0')

446

447 # Add the bookmark to the paragraph.

448 element.insert(0, bookmark_start)

449 element.append(bookmark_end)

450

451 @staticmethod

452 def docx_add_link_to_bookmark(paragraph: Paragraph, bookmark_name: str, link_text: str) -> None:

453 # lobster-trace: SwRequirements.sw_req_docx_reference

454 """

455 Add a hyperlink to a bookmark in a paragraph.

456

457 Args:

458 paragraph (Paragraph): The paragraph to add the hyperlink to.

459 bookmark_name (str): The name of the bookmark.

460 link_text (str): The text to display for the hyperlink.

461 """

462 # Create hyperlink element pointing to the bookmark.

463 hyperlink = OxmlElement('w:hyperlink')

464 hyperlink.set(qn('w:anchor'), bookmark_name)

465

466 # Create a run and run properties for the hyperlink.

467 new_run = OxmlElement('w:r')

468 run_properties = OxmlElement('w:rPr')

469

470 # Use the built-in Hyperlink run style so Word will display it correctly (blue/underline).

471 r_style = OxmlElement('w:rStyle')

472 r_style.set(qn('w:val'), 'Hyperlink')

473 run_properties.append(r_style)

474

475 new_run.append(run_properties)

476

477 # Add the text node inside the run (w:t).

478 text_element = OxmlElement('w:t')

479 text_element.text = link_text

480 new_run.append(text_element)

481

482 hyperlink.append(new_run)

483

484 # Append the hyperlink element directly to the paragraph XML so Word renders it.

485 paragraph._p.append(hyperlink) # pylint: disable=protected-access

486

487# Functions ********************************************************************

488

489# Main *************************************************************************

Coverage for src / pyTRLCConverter / docx_converter.py: 91%

158 statements