Commit 10048d4b authored by ankraft's avatar ankraft
Browse files

Added converting bold and italic text in paragraphs, headers and tables.

parent 68c2f30b
Loading
Loading
Loading
Loading
+7 −3
Original line number Original line Diff line number Diff line
@@ -13,8 +13,8 @@ python3 -m pip install -r requirements.txt
## Usage
## Usage
- Create a directory with the Word document in it. The Word document **must** be in *docx* format. This can be achieved by opening the document with *Word* and save it in *docx* format to another file.
- Create a directory with the Word document in it. The Word document **must** be in *docx* format. This can be achieved by opening the document with *Word* and save it in *docx* format to another file.
- Create a configuration file with the same base name as the Word document + *.ini* extension. This file may contain different configurations as the standard *config.ini* file provided. 
- Create a configuration file with the same base name as the Word document + *.ini* extension. This file may contain different configurations as the standard *config.ini* file provided. 
  - Alternativaly, a file named *config.ini* will apply to all files in that directory.
- Alternatively, a file named *config.ini* will apply to all files in that directory.
  - It is only necessary to add the settings that are different from the *config.ini* file in the projects root directoy. That file will always act as a fallback.
	- It is only necessary to add the settings that are different from the *config.ini* file in the project's root directory. That file will always act as a fallback.
- Run the converter as follows:
- Run the converter as follows:
```
```
python3 spec2md.py <path-to-word-document>
python3 spec2md.py <path-to-word-document>
@@ -25,3 +25,7 @@ python3 spec2md.py <path-to-word-document>
### The converter doesn't seem to generate image files.
### The converter doesn't seem to generate image files.


Is *LibreOffice* already running? If yes, then close it.
Is *LibreOffice* already running? If yes, then close it.

## Changes

- **2023-07-27** - Added converting bold and italic text in paragraphs, headers and tables.
 No newline at end of file
+11 −7
Original line number Original line Diff line number Diff line
#
#
# This file is autogenerated by pip-compile with python 3.10
# This file is autogenerated by pip-compile with Python 3.10
# To update, run:
# by the following command:
#
#
#    pip-compile
#    pip-compile
#
#
commonmark==0.9.1
lxml==4.9.3
    # via
    #   oneM2M-spec-2-MD-converter (setup.py)
    #   python-docx
markdown-it-py==3.0.0
    # via rich
    # via rich
lxml==4.9.1
mdurl==0.1.2
    # via python-docx
    # via markdown-it-py
pygments==2.13.0
pygments==2.15.1
    # via rich
    # via rich
python-docx==0.8.11
python-docx==0.8.11
    # via oneM2M-spec-2-MD-converter (setup.py)
    # via oneM2M-spec-2-MD-converter (setup.py)
rich==12.5.1
rich==13.4.2
    # via oneM2M-spec-2-MD-converter (setup.py)
    # via oneM2M-spec-2-MD-converter (setup.py)
+1 −0
Original line number Original line Diff line number Diff line
@@ -9,6 +9,7 @@ setup(
	description='Convert oneM2M specifications to Markdown',
	description='Convert oneM2M specifications to Markdown',
	packages=find_packages(),
	packages=find_packages(),
	install_requires=[
	install_requires=[
        'lxml',
		'rich',
		'rich',
		'python-docx',
		'python-docx',
	 ]
	 ]
+29 −8
Original line number Original line Diff line number Diff line
@@ -24,8 +24,7 @@ from rich.progress import Progress, TextColumn, BarColumn
from rich.console import Console
from rich.console import Console
from rich import inspect
from rich import inspect
import configparser, zipfile
import configparser, zipfile
from xml.etree import ElementTree as ET
from lxml import etree as ET



class Style(IntEnum):
class Style(IntEnum):
	example = auto()
	example = auto()
@@ -78,7 +77,9 @@ _captionMarker = '__CAPTION__'
console = Console()
console = Console()
_print:Callable = print
_print:Callable = print



# Some predefined tags and attributes
wns = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'
_val = f'{{{wns}}}val'


class SectionNumbers(object):
class SectionNumbers(object):


@@ -286,7 +287,7 @@ def processDocuments(documents:list[str], outDirectory:str) -> None:
			return tag
			return tag




		def getTextFromXML(elem:Paragraph) -> str:
		def getTextFromXML(elem:Paragraph|_Cell) -> str:


			#	Not-used document tags.
			#	Not-used document tags.
			_ignoredTags = ( 'AlternateContent',
			_ignoredTags = ( 'AlternateContent',
@@ -322,7 +323,18 @@ def processDocuments(documents:list[str], outDirectory:str) -> None:
						for x in element:
						for x in element:
							_result += _parseXML(x)
							_result += _parseXML(x)
					case 't':
					case 't':
						_result += str(toMD(str(element.text)))
						bold = ''
						italics = ''
						for e in element.getparent():
							if strippedTag(e.tag) == 'rPr':	# paragraph style
								for ep in e:
									match strippedTag(ep.tag):
										case 'b' if ep.attrib.get(_val, 'true') == 'true':
											bold = '**'
										case 'i' if ep.attrib.get(_val, 'true') == 'true':
											italics = '_'
						_result += f'{bold}{italics}{str(toMD(str(element.text)))}{italics}{bold}'

					case 'br':
					case 'br':
						_result += _linebreak
						_result += _linebreak
					case 'bookmarkStart' | 'bookmarkEnd':		# TODO ?
					case 'bookmarkStart' | 'bookmarkEnd':		# TODO ?
@@ -381,7 +393,16 @@ def processDocuments(documents:list[str], outDirectory:str) -> None:
				return _result
				return _result


			#_print(ET.fromstring(elem._p.xml))
			#_print(ET.fromstring(elem._p.xml))
			match elem:
				case Paragraph():	# type: ignore[misc]
					return _parseXML(ET.fromstring(elem._p.xml))
					return _parseXML(ET.fromstring(elem._p.xml))
				case _Cell():		# type: ignore[misc]
					result = ''
					for p in elem.paragraphs:
						result += _parseXML(ET.fromstring(p._p.xml))
					return result
				case _:
					return ''






@@ -620,7 +641,7 @@ def processDocuments(documents:list[str], outDirectory:str) -> None:
						for row in elem.rows:
						for row in elem.rows:
							cells:list[str] = []
							cells:list[str] = []
							for cell in row.cells:
							for cell in row.cells:
								cells.append(f'{toMD(cell.text)} ')	# add at least a space
								cells.append(f'{getTextFromXML(cell)} ')	# add at least a space
							rows.append(cells)
							rows.append(cells)
							nrRows += 1
							nrRows += 1
						
						
@@ -706,7 +727,7 @@ def processDocuments(documents:list[str], outDirectory:str) -> None:


			for i in range(len(lines)):
			for i in range(len(lines)):
				line = lines[i]
				line = lines[i]
				lines[i] = re.sub(_referenceExpression, _repl, line)
				lines[i] = re.sub(_referenceExpression, _repl, line)	# type:ignore[arg-type]


			#
			#
			#	Write produced Markdown file
			#	Write produced Markdown file