check_xsd.py 6.38 KB
Newer Older
canterburym's avatar
canterburym committed
import logging

import glob
import sys
import argparse
from pathlib import Path
from pprint import pprint
import os

from lxml import etree
from xml.etree.ElementTree import ParseError
from xmlschema import XMLSchema, XMLSchemaParseError
canterburym's avatar
canterburym committed
from xmlschema.validators.exceptions import XMLSchemaModelError
canterburym's avatar
canterburym committed


def BuildSchemaDictonary (fileList):
    if len(fileList) == 0:
        logging.info("No schema files provided")
        return []

    logging.info("Schema locations:")
    schemaLocations = []
    for schemaFile in fileList:
        try:
            xs = XMLSchema(schemaFile, validation='skip')
            schemaLocations.append((xs.default_namespace, str(Path(schemaFile).resolve())))
            logging.info(" [ {0}  ->  {1} ]".format(xs.default_namespace, schemaFile))
        except ParseError as ex:
            logging.warning (" [ {0} failed to parse:  {1} ]".format(schemaFile, ex))
    return schemaLocations


def BuildSchema (coreFile, fileList = None):
    schemaLocations = []
    if fileList and len(fileList) > 0:
        schemaLocations = BuildSchemaDictonary(fileList)

    coreSchema = XMLSchema(str(Path(coreFile)), locations=schemaLocations)
    return coreSchema

def ValidateSingleFile (schemaFile):
    try:
        xs = XMLSchema(schemaFile, validation='skip')
    except ParseError as ex:
        logging.warning (" [ {0} failed to parse:  {1} ]".format(schemaFile, ex))
        return ex
    return None


def ValidateXSDFiles (fileList):
    if len(fileList) == 0:
        logging.info("No schema files provided")
        return {}
    
    schemaLocations = BuildSchemaDictonary(fileList)
    errors = {}

    logging.info("Schema validation:")
    for schemaFile in fileList:
        try:
            schema = XMLSchema(schemaFile, locations = schemaLocations)
            logging.info(schemaFile + ": OK")
            errors[schemaFile] = []
canterburym's avatar
canterburym committed
        except (XMLSchemaParseError, XMLSchemaModelError) as ex:
canterburym's avatar
canterburym committed
            if (ex.schema_url) and (ex.schema_url != ex.origin_url):
                logging.info("  Error {1} comes from {0}, suppressing".format(ex.schema_url, ex.message))
                errors[schemaFile] = []
            else:
                logging.warning(schemaFile + ": Failed validation ({0})".format(ex))
                errors[schemaFile] = [ex.message]
    return errors


def ValidateInstanceDocuments (coreFile, supportingSchemas, instanceDocs):
    if (instanceDocs is None) or len(instanceDocs) == 0:
        logging.warning ("No instance documents provided")
        return []

    schema = BuildSchema(coreFile, supportingSchemas)
    errors = []
    for instanceDoc in instanceDocs:
        try:
            schema.validate(instanceDoc)
            logging.info ("{0} passed validation".format(instanceDoc))
        except Exception as ex:
            logging.error ("{0} failed validation: {1}".format(instanceDoc, ex))
    return errors
    


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("-v", "--verbosity", help="verbosity level", action="count", default=0)
canterburym's avatar
canterburym committed
    parser.add_argument("-r", "--recurse",   help="recurse into subdirectories", action="store_true")
    parser.add_argument("-s", "--skipbuild", help="skip the (lengthy) build checks", action="store_true")
canterburym's avatar
canterburym committed
    parser.add_argument("input", help="include a directory or file", action="append", nargs="+")
    args = parser.parse_args()

    logging.getLogger().setLevel(logging.WARNING)
    if (args.verbosity >= 1):
        logging.getLogger().setLevel(logging.INFO)
    if (args.verbosity >= 2):
        logging.getLogger().setLevel(logging.DEBUG)
        logging.debug("Very verbose selected")

    logging.debug(f"Path: {args.input}")
canterburym's avatar
canterburym committed

    if (args.recurse):
        logging.info ("Recursion enabled")
    if (args.skipbuild):
        logging.info("Skipping build checks")

canterburym's avatar
canterburym committed
    includeFileList = []
    for path in args.input[0]:
        p = Path(path)
        if not p.exists():
            logging.error(f"Include path {path} not found")
            exit(1)
        if p.is_dir():
            logging.debug(f"Expanding directory")
canterburym's avatar
canterburym committed
            if (args.recurse):
                files = p.rglob("*.xsd")
            else:
                files = p.glob("*.xsd")
            for f in files:
                logging.info(f"...Including {f}")
                includeFileList.append(str(f.absolute()))
canterburym's avatar
canterburym committed
        else:
            logging.info(f"Including {p.absolute()}")
canterburym's avatar
canterburym committed
            includeFileList.append(str(p.absolute()))
canterburym's avatar
canterburym committed
    
    syntaxErrors = 0
    
canterburym's avatar
canterburym committed
    print ("====================================================")
canterburym's avatar
canterburym committed
    print ("XSD syntax checks:")
canterburym's avatar
canterburym committed
    print ("----------------------------------------------------")
    errors = {}
canterburym's avatar
canterburym committed
    for file in includeFileList:
        error = ValidateSingleFile(file)
        if (error):
            print (f"  {file} : Syntax error [{error}]")
canterburym's avatar
canterburym committed
            errors[file] = error
canterburym's avatar
canterburym committed
        else:
            print (f"  {file} : OK")

canterburym's avatar
canterburym committed
    print ("----------------------------------------------------")
    print (f"{syntaxErrors} syntax errors detected")
    if (len(errors.keys()) > 0):
        for fileName, error in errors.items():
            print(f"  {fileName}: {error}")
canterburym's avatar
canterburym committed
        exit(syntaxErrors)

canterburym's avatar
canterburym committed
    if (args.skipbuild):
        print ("Skipping build checks")
        exit(0)

    print ("")
    print ("====================================================")
    print ("XSD build checks (this may take a while):")
    print ("----------------------------------------------------")
canterburym's avatar
canterburym committed
    results = ValidateXSDFiles(includeFileList)
    errorCount = 0
    for fileName, errors in results.items():
        if len(errors) > 0:
            errorCount += len(errors)
            print (f"  {fileName}: {len(errors)} errors")
            for error in errors:
                if isinstance(error, XMLSchemaParseError):
                    print (error.msg)
                else:
                    print (f"      {str(error.strip())}")
        else:
            print (f"  {fileName}: OK")

canterburym's avatar
canterburym committed
    print ("----------------------------------------------------")
canterburym's avatar
canterburym committed
    print (f"{errorCount} build errors detected")
canterburym's avatar
canterburym committed
    for fileName, errors in results.items():
        if len(errors) > 0:
            errorCount += len(errors)
            print (f"  {fileName}: {len(errors)} errors")
            for error in errors:
                if isinstance(error, XMLSchemaParseError):
                    print (str(error))
                else:
                    print (f"      {str(error.strip())}")

canterburym's avatar
canterburym committed
    exit(errorCount)