Loading md_to_docx_converter/src/to_html/postprocessing.py +32 −0 Original line number Diff line number Diff line Loading @@ -278,6 +278,36 @@ def format_examples_and_notes(soup: BeautifulSoup): return soup def format_tables(soup: BeautifulSoup) -> BeautifulSoup: """ Formats tables in the HTML content to ensure proper structure and styling. Args: soup (BeautifulSoup): The BeautifulSoup object representing the HTML content. Returns: BeautifulSoup: The modified BeautifulSoup object with formatted tables. """ # add stripes to table rows tables = soup.find_all("table") for table in tables: tbody = table.find("tbody") trs = tbody.find_all("tr") if tbody else [] tdsFirstRow = trs[0].find_all("td") if trs else [] isNewRow = True # if the grid tables use colspans, we apply stripes by checking the number of tds in each row # In a series of rows that belong to the same logical row, only the first row will have the full number of tds for tr in trs: tds = tr.find_all("td") if len(tds) == len(tdsFirstRow): isNewRow = not isNewRow if isNewRow: existing_classes = tr.get('class', []) if "bg-striped-row" not in existing_classes: tr['class'] = existing_classes + ['bg-striped-row'] return soup def add_links_to_references_in_text(soup): def reform_broken_links_in_text(soup: BeautifulSoup): """ Loading Loading @@ -743,8 +773,10 @@ def postprocess(html_dir: str): with open(file_path, "r", encoding="utf-8") as html: soup = BeautifulSoup(html, "html.parser") soup = remove_code_blocks_with_only_images(soup) soup = format_examples_and_notes(soup) soup = format_tables(soup) if ( new_filename.replace(".html", "") in files_with_references Loading Loading
md_to_docx_converter/src/to_html/postprocessing.py +32 −0 Original line number Diff line number Diff line Loading @@ -278,6 +278,36 @@ def format_examples_and_notes(soup: BeautifulSoup): return soup def format_tables(soup: BeautifulSoup) -> BeautifulSoup: """ Formats tables in the HTML content to ensure proper structure and styling. Args: soup (BeautifulSoup): The BeautifulSoup object representing the HTML content. Returns: BeautifulSoup: The modified BeautifulSoup object with formatted tables. """ # add stripes to table rows tables = soup.find_all("table") for table in tables: tbody = table.find("tbody") trs = tbody.find_all("tr") if tbody else [] tdsFirstRow = trs[0].find_all("td") if trs else [] isNewRow = True # if the grid tables use colspans, we apply stripes by checking the number of tds in each row # In a series of rows that belong to the same logical row, only the first row will have the full number of tds for tr in trs: tds = tr.find_all("td") if len(tds) == len(tdsFirstRow): isNewRow = not isNewRow if isNewRow: existing_classes = tr.get('class', []) if "bg-striped-row" not in existing_classes: tr['class'] = existing_classes + ['bg-striped-row'] return soup def add_links_to_references_in_text(soup): def reform_broken_links_in_text(soup: BeautifulSoup): """ Loading Loading @@ -743,8 +773,10 @@ def postprocess(html_dir: str): with open(file_path, "r", encoding="utf-8") as html: soup = BeautifulSoup(html, "html.parser") soup = remove_code_blocks_with_only_images(soup) soup = format_examples_and_notes(soup) soup = format_tables(soup) if ( new_filename.replace(".html", "") in files_with_references Loading