Commit efaf5545 authored by Marco Cavalli's avatar Marco Cavalli
Browse files

fix: prevent pandoc to html encode < and > in json blocks

parent 4c89010f
Loading
Loading
Loading
Loading
+12 −4
Original line number Diff line number Diff line
@@ -73,9 +73,17 @@ def unwrap_gt_lt_code_tags(soup: BeautifulSoup):
    During preprocessing, sections of text marked by a beginning `<` and an ending `>` needed to be enclosed in code blocks for Pandoc to preserve the text.
    """
    # codes = soup.find_all("code", lambda tag: tag.parent and tag.parent.name != "pre")
    codes = soup.select("code:not(pre > code):not(em > code)")
    codes = soup.select("code:not(em > code)")

    for code in codes:
        if code.parent and code.parent.name == "pre":
            span_text_only_children = code.find_all(
                lambda tag: isinstance(tag, Tag) and tag.name == "span" and len(tag.contents) == 1 and isinstance(tag.contents[0], NavigableString)
            )
            for child in span_text_only_children:
                text = NavigableString(html.unescape(child.get_text()))
                child.contents[0].replace_with(text)
        else:
            text = NavigableString(html.unescape(code.get_text()))
            code.insert_before(text)
            code.decompose()