Commit 805e55dc authored by Miguel Angel Reina Ortega's avatar Miguel Angel Reina Ortega
Browse files

Rewrite the way changes are extracted into md files

parent dc8eb70b
Loading
Loading
Loading
Loading
+197 −4
Original line number Original line Diff line number Diff line
@@ -6,7 +6,7 @@
#	(c) 2023 by Miguel Angel Reina Ortega
#	(c) 2023 by Miguel Angel Reina Ortega
#	License: BSD 3-Clause License. See the LICENSE file for further details.
#	License: BSD 3-Clause License. See the LICENSE file for further details.
#
#

from typing import Tuple
import argparse, os, re, sys
import argparse, os, re, sys
from rich import print
from rich import print
from rich.progress import Progress, TextColumn, TimeElapsedColumn
from rich.progress import Progress, TextColumn, TimeElapsedColumn
@@ -341,16 +341,209 @@ https://forge.etsi.org/rep/cdm/pipeline-scripts/-/blob/main/common/Dockerfile.st
    #clauseMDlines.insert(j, "\n\n<br />")
    #clauseMDlines.insert(j, "\n\n<br />")
    writeMDFile(progress, clauseMDlines, changed_clause.clause_nr.replace(" ","") + '.md', outDirectory)
    writeMDFile(progress, clauseMDlines, changed_clause.clause_nr.replace(" ","") + '.md', outDirectory)



def integrate_changes(progress: Progress, mdLines: list[str], mr: MR) -> list[Tuple[str, bool]]:
    '''
    Integrate changes from merge request to the target document
https://forge.etsi.org/rep/cdm/pipeline-scripts/-/blob/main/common/Dockerfile.stfubuntu    Returns a list of Clauses, start index and end index
    '''

    _taskID = progress.add_task('[blue]Integrate changes', start=False, total=0)

    spec_with_changes: list[Tuple[str, bool]] = []

    changed_clauses: list[Clause] = []
    empty = ""
    changed_clause = Clause(empty, 0, 0, empty)

    for patched_file in mr.patch_set:
        index_source = 1
        if patched_file.source_file.startswith("a/TS") or patched_file.source_file.startswith("a/TR"):
            logging.debug(f"Looking at changes in {patched_file.source_file}")
            lines_added = 0
            lines_removed = 0
            previous_change_lines_added = 0
            for change in patched_file:
                logging.debug(f'Change from patch details: source_start: {change.source_start} - target_start: {change.target_start}')
                while index_source < change.source_start:
                    if mdLines[index_source-1].startswith("#"):
                        spec_with_changes.append((mdLines[index_source-1]+"\n\n", False))
                    else:
                        spec_with_changes.append((mdLines[index_source - 1]+"\n", False))
                    index_source += 1

                # Sanity check
                if change.target_start == index_source + lines_added - lines_removed:
                    logging.debug(
                        f'Change applied correctly, indexes on track. Added {lines_added} lines and removed {lines_removed} lines')
                else:
                    logging.debug(f'Something is wrong ...')
                    break

                change_lines_added = 0
                change_lines_removed = 0
                for line in change:
                    #print(index_source, len(mdLines))
                    if line.is_added:
                        spec_with_changes.append((addedLine(line), True))
                        change_lines_added += 1
                    elif line.is_removed:
                        spec_with_changes.append((removedLine(line), True))
                        index_source += 1
                        change_lines_removed += 1
                    else:
                        if line.value.startswith("#"):
                            spec_with_changes.append((line.value+"\n\n", False))
                        else:
                            spec_with_changes.append((line.value + "\n", False))
                        #spec_with_changes.append((mdLines[index_source-1], False))
                        index_source += 1

                lines_added += change_lines_added
                lines_removed += change_lines_removed

            logging.debug(f'Applied changes. Total added lines: {lines_added}.Total removed lines {lines_removed}')

    return spec_with_changes

def addedLine(line: str) -> str:
    if line.value.strip().startswith("|"):  # It is a table
        tableElements = line.value.strip().split("|")
        modifiedElements: list[str] = []
        for element in tableElements:
            if not element.strip() == '':
                modifiedElements.append("<span class=\"underline\">" + element.strip() + "</span>")
            # modifiedRow = "|" + "|".join(modifiedElements) + "|" + "\n"
            else:
                modifiedElements.append(" ")
        modifiedRow = "|".join(modifiedElements)
        return modifiedRow + "\n"
    else:
        if not line.value.strip() == '':
            if line.value.startswith("!["):  # It is a figure
                return line.value.strip() + "\n\n" #TODO How a figure should be marked if it is modified
                # clauseMDlines.insert(j, "<span class=\"underline\">" + line.value.strip() + "</span>\n") #Track change OK Caption Not OK
            else:
                return "<span class=\"underline\">" + line.value.strip() + "</span>" + "\n\n"  # it works for simple lines, not for lines in a list
            # clauseMDlines.insert(j, "<mark>" + line.value.strip("\n") + "</mark>\n\n")
            # if (j + 1) <= len(clauseMDlines):
            #    clauseMDlines.pop(j+1)
        else:
            return line.value  # Add an extra line not marked as added
            #return "\n"  # Add an extra line not marked as added

def removedLine(line: str) -> str:
    if line.value.strip().startswith("|"):  # It is a table
        tableElements = line.value.strip().split("|")
        modifiedElements: list[str] = []
        for element in tableElements:
            if not element.strip() == '':
                modifiedElements.append("~~" + element.strip() + "~~ ")
            # modifiedRow = "|" + "|".join(modifiedElements) + "|" + "\n"
            else:
                modifiedElements.append(" ")
        modifiedRow = "|".join(modifiedElements)
        return modifiedRow + "\n"
    else:
        if not line.value.strip() == '':
            return "~~" + line.value.strip() + "~~" + "\n\n"

    return line.value

def find_clauses_with_changes(progress: Progress, mdLines_changes: list[Tuple[str, bool]]) -> Tuple[list[Clause], list[Clause]]:
    '''
    Scans the body of the document to find all clauses
    Returns a list of Clauses, start index and end index
    '''

    _taskID = progress.add_task('[blue]Find clauses with changes', start=False, total=0)

    clauseregex = re.compile('^#+\s(\d+(\.\d+)*|Annex \w(\.\d+)*|\w*(\.\d+)*).*')
    clauses: list[Clause] = []
    changed_clauses: list[Clause] = []
    changeInClause = False
    empty = ""
    clause = Clause(empty, 1, 1, "0")

    # for line in mdLines:
    #    if line.startswith('#'):
    #        # Clause 0 (from start to first clause) found
    #        clause.to_id = index - 1
    #        clauses.append(clause)
    #        break
    #    index = index + 1

    index = 1
    for line, change in mdLines_changes:
        if change:
            changeInClause = True
        if line.startswith('#'):
            matches = re.findall(clauseregex, line)  # Match heading
            if matches:  # It may be the end of the clause or the start of a subclause
                if index - 2 == clause.from_id:  # It is a subclause
                    clause.from_id = index
                    clause.raw = line
                    clause.clause_nr = matches[0][0]
                else:  # It is the end of the clause
                    clause.to_id = index - 1
                    clauses.append(clause)
                    if changeInClause:
                        changed_clauses.append(clause)
                    clause = Clause(line, index, index, matches[0][0])
                    changeInClause = False
            else:  # it is last clause
                print("Unknown heading")

        index = index + 1

        # Append last clause (usually History)
    clause.to_id = index - 1
    clauses.append(clause)
    if changeInClause:
        changed_clauses.append(clause)

    logging.debug(f"Number of clauses: {len(clauses)}")
    for clause in clauses:
        logging.debug(clause.clause_nr)
        logging.debug(clause.from_id)
        logging.debug(clause.to_id)

    return (clauses, changed_clauses)

def saveChangedClauses(progress: Progress, outDirectory:str, changed_clauses: list[Clause], mdLines: list[str]):
    for clause in changed_clauses:
        clauseMDLines = []
        i = clause.from_id
        while i < clause.to_id:
            clauseMDLines.append(mdLines[i-1])
            i += 1
        writeMDFile(progress, clauseMDLines, clause.clause_nr.replace(" ", "") + '.md', outDirectory)


def process(document:str, outDirectory:str, mr:MR) -> None:
def process(document:str, outDirectory:str, mr:MR) -> None:
    with Progress(TextColumn('{task.description}'),  TimeElapsedColumn()) as progress:
    with Progress(TextColumn('{task.description}'),  TimeElapsedColumn()) as progress:
                #old process
                #sourceText = mr.retrieve_text(mr.target_branch, document)
                #sourceMdLines = sourceText.splitlines(keepends=False)
                #clauses = find_all_clauses(progress, sourceMdLines)
                #changed_clauses = find_changed_clauses(progress, sourceMdLines, clauses, mr, outDirectory)
                #Export list of changed clauses
                #with open(f'{outDirectory}/changedClauses.txt', "w", encoding='utf-8', errors='replace') as f:
                #    f.write("\n".join([clause.clause_nr for clause in changed_clauses]))
                #    f.close()

                #new process
                sourceText = mr.retrieve_text(mr.target_branch, document)
                sourceText = mr.retrieve_text(mr.target_branch, document)
                sourceMdLines = sourceText.splitlines(keepends=False)
                sourceMdLines = sourceText.splitlines(keepends=False)
                clauses = find_all_clauses(progress, sourceMdLines)
                targetMdLines_changes = integrate_changes(progress,sourceMdLines, mr)
                changed_clauses = find_changed_clauses(progress, sourceMdLines, clauses, mr, outDirectory)
                #print(targetMdLines)
                all_clauses, changed_clauses = find_clauses_with_changes(progress, targetMdLines_changes)
                # Export list of changed clauses
                # Export list of changed clauses
                with open(f'{outDirectory}/changedClauses.txt', "w", encoding='utf-8', errors='replace') as f:
                with open(f'{outDirectory}/changedClauses.txt', "w", encoding='utf-8', errors='replace') as f:
                    f.write("\n".join([clause.clause_nr for clause in changed_clauses]))
                    f.write("\n".join([clause.clause_nr for clause in changed_clauses]))
                    f.close()
                    f.close()
                saveChangedClauses(progress, outDirectory, changed_clauses, [l[0] for l in targetMdLines_changes])



def main(args=None):
def main(args=None):
    # Parse command line arguments
    # Parse command line arguments