Commit c3a058de authored by Marco Cavalli's avatar Marco Cavalli
Browse files

fix: prevent custom styles to be applied twice

parent d2b22cba
Loading
Loading
Loading
Loading
+51 −78
Original line number Diff line number Diff line
@@ -277,10 +277,12 @@ def format_examples_and_notes(doc: Doc):
                    paragraph.text = f"\t{paragraph.text}"
                elif paragraph.style.name in STYLES_FOR_EXAMPLES_NOTES:
                    if paragraph.style.name == "Source Code":
                        paragraph.style = "EX Source Code"
                        # paragraph.style = "EX Source Code"
                        paragraph.paragraph_format.left_indent = Pt(85.04)  # 3 cm in points
                    
                    if paragraph.style.name == "Compact":
                        paragraph.style = "EX Compact"
                        # paragraph.style = "EX Compact"
                        paragraph.paragraph_format.left_indent = Pt(103.68)  # 3.66 cm in points
                else:
                    # No longer in example or note
                    in_example_or_note = False
@@ -465,33 +467,8 @@ def add_tagged_styles_and_formatting(doc: Doc):
                and start_match.group(1) == end_match.group(1)
            )

            # if has_both_tags:
            #     style_name = start_match.group(1)

            #     # Separate text portions according to whether they should be styled or not
            #     text_before_start = text[: start_match.start()]
            #     text_to_style = text[start_match.end() : end_match.start()]
            #     text_after_end = text[end_match.end() :]

            #     run.text = text_before_start  # Nothing should happen to the text before the tag

            #     styled_run = paragraph.add_run(text_to_style)
            #     styled_run = apply_formatting_and_styling(styled_run, style_name)

            #     if text_after_end.strip():  # Add any remaining text as another run
            #         paragraph.add_run(text_after_end)
            #         runs = (
            #             paragraph.runs
            #         )  # Refresh runs now that a new run has been added

            #     i += 1
            #     continue

            if has_both_tags:
                style_name = start_match.group(1)
                start_tag = "{{{" + style_name + "}}}"
                end_tag = "{{{/" + style_name + "}}}"
                text = text.replace(start_tag + start_tag, start_tag).replace(end_tag + end_tag, end_tag)

                # Separate text portions according to whether they should be styled or not
                text_before_start = text[: start_match.start()]
@@ -502,75 +479,69 @@ def add_tagged_styles_and_formatting(doc: Doc):

                styled_run = paragraph.add_run(text_to_style)
                styled_run = apply_formatting_and_styling(styled_run, style_name)
                i += 1
                paragraph._p.insert(
                    i+1, styled_run._element
                )  # Insert the styled run after the current run

                if text_after_end.strip():  # Add any remaining text as another run
                    paragraph.add_run(text_after_end)
                    i += 1
                    paragraph._p.insert(
                        i+1, paragraph.runs[-1]._element
                    )
                    runs = (
                        paragraph.runs
                    )  # Refresh runs now that a new run has been added

                i += 1
                continue

            if not has_start_tag and has_end_tag:
                # Separate text portions according to whether they should be styled or not
                text_before_end = text[: end_match.start()]
                text_after_end = text[end_match.end() :]
            # if not has_start_tag and has_end_tag:
            #     # Separate text portions according to whether they should be styled or not
            #     text_before_end = text[: end_match.start()]
            #     text_after_end = text[end_match.end() :]

                # Keep track of the text to style and this run
                text_to_style += text_before_end
                style_runs.append(run)
            #     # Keep track of the text to style and this run
            #     text_to_style += text_before_end
            #     style_runs.append(run)

                # Reset the old runs and create a new styled run with the accumulated text to style
                for style_run in style_runs:
                    style_run.text = ""
            #     # Reset the old runs and create a new styled run with the accumulated text to style
            #     for style_run in style_runs:
            #         style_run.text = ""

                styled_run = paragraph.add_run(text_to_style)
                styled_run = apply_formatting_and_styling(styled_run, active_style)
            #     styled_run = paragraph.add_run(text_to_style)
            #     styled_run = apply_formatting_and_styling(styled_run, active_style)

                if text_after_end.strip():  # Add any remaining text as another run
                    paragraph.add_run(text_after_end)
                    runs = (
                        paragraph.runs
                    )  # Refresh runs now that a new run has been added
            #     if text_after_end.strip():  # Add any remaining text as another run
            #         paragraph.add_run(text_after_end)
            #         runs = (
            #             paragraph.runs
            #         )  # Refresh runs now that a new run has been added

                # Prepare for other tags
                active_style = None
                style_runs = []
                text_to_style = ""
            #     # Prepare for other tags
            #     active_style = None
            #     style_runs = []
            #     text_to_style = ""

                i += 1
                continue
            #     i += 1
            #     continue

            if not active_style and has_start_tag and not has_end_tag:
                active_style = start_match.group(1)
            # if not active_style and has_start_tag and not has_end_tag:
            #     active_style = start_match.group(1)

                # Separate text portions according to whether they should be styled or not
                text_before_start = text[: start_match.start()]
                text_after_start = text[start_match.end() :]
            #     # Separate text portions according to whether they should be styled or not
            #     text_before_start = text[: start_match.start()]
            #     text_after_start = text[start_match.end() :]

                # Keep just the text before the the tag and start keeping track of the text to style
                run.text = text_before_start
                text_to_style += text_after_start
            #     # Keep just the text before the the tag and start keeping track of the text to style
            #     run.text = text_before_start
            #     text_to_style += text_after_start

                style_runs.append(run)
            #     style_runs.append(run)

                i += 1
                continue
            #     i += 1
            #     continue

            if active_style:  # Inside a tag
                # Simply keep track of this run and its text
                text_to_style += text
                style_runs.append(run)
            # if active_style:  # Inside a tag
            #     # Simply keep track of this run and its text
            #     text_to_style += text
            #     style_runs.append(run)

                i += 1
                continue
            #     i += 1
            #     continue

            i += 1  # No tag here, so just go on to the next run

@@ -582,6 +553,8 @@ def set_keep_with_next_false(doc: Doc):

    for paragraph in iter_paragraphs(doc):
        paragraph.paragraph_format.keep_with_next = False
        paragraph.paragraph_format.space_after = Pt(12)


    return doc

+1 −0
Original line number Diff line number Diff line
@@ -432,6 +432,7 @@ def handle_examples_and_notes(soup: BeautifulSoup):

                    grandchild.clear()
                    grandchild.append(tagged_text)
                    grandchild.unwrap()

        return soup

+13 −0
Original line number Diff line number Diff line
@@ -638,6 +638,18 @@ def fix_custom_tags(soup: BeautifulSoup):
            a.string = a.string.replace("root", new_id_prefix)
    return soup

def fix_lists(soup: BeautifulSoup):
    """
    Fix lists that have been improperly nested due to markdown conversion.
    """
    listitems = soup.find_all(["li"])

    for item in listitems:
        children = list(item.children)
        if len(children) == 1 and children[0].name in ["p"]:
            children[0].unwrap()
    
    return soup

def extract_images_from_html(soup: BeautifulSoup) -> dict:
    """
@@ -818,6 +830,7 @@ def postprocess(html_dir: str):
        soup = replace_dash_characters(soup)
        soup = move_figure_id_to_FL_elements(soup)
        soup = fix_custom_tags(soup)
        soup = fix_lists(soup)
        
        images, soup = extract_images_from_html(soup)
        for image_id, image_src in images.items():