asn2md.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse # parse arguments
import os.path  # getting extension from file
import sys      # output and stuff
import re       # for regular expressions
import copy     # for copy
if (sys.version_info > (3, 0)):
	import urllib.parse   #
else:
	import urllib   #

## extract doxygen-tag namespace
RE_MODULE   = re.compile( r'^\s*([A-Z][\w-]*)\s*({.*?})?\s*DEFINITIONS.*?::=\s*?BEGIN(.*)END', re.VERBOSE | re.MULTILINE | re.DOTALL)

RE_SPACES   = re.compile(r'\s+')

RE_COMMENTS = re.compile(r'^\s*--.*?\n|--.*?(?:--|$)|/\*.*?\*/[\t ]*\n?', re.MULTILINE|re.DOTALL)

RE_BASIC_TYPES = re.compile(r'^OCTET\s+STRING|BIT\s+STRING|BOOLEAN|INTEGER|FLOAT|SEQUENCE|SET|NULL')

#RE_TYPE_BODY_1 = re.compile(r'.*?{(.*)}\s*WITH', re.MULTILINE|re.DOTALL)
#RE_TYPE_BODY_2 = re.compile(r'.*?{(.*)}\s*(?:WITH.*|\(.*?\)|\s*$)', re.MULTILINE|re.DOTALL)
RE_TYPE_BODY = re.compile(r'.*?{(.*)}\s*(?:WITH.*|\(.*?\)|\s*$)', re.MULTILINE|re.DOTALL)

#RE_FIELDS = re.compile(r'^\s*(?:/\*\*.*?\*/)|^\s*([\w-]+?)\s+(OCTET\s+STRING|BIT\s+STRING|[A-Z][.\w-]+)?(.*?)(?:,((?:\s*--!?<.*?\n)*)|((?:--!?<.*?\n)*)$)', re.MULTILINE | re.DOTALL| re.VERBOSE)
RE_FIELDS = re.compile(r'^\s*/\*.*?\*/|^\s*--\!.*?\n|^[\s&]*([\w-]+)\s+(OCTET\s+STRING|BIT\s+STRING|[A-Z][\w-]+)?((?:{[^}]*}|\([^)]*\)|.)*?)(?:,|(--)|$)', re.MULTILINE | re.DOTALL)

RE_EXPORTS = re.compile(r'^\s*EXPORTS.*?;', re.DOTALL | re.MULTILINE)

RE_IMPORTS = re.compile(r'^\s*IMPORTS\s*(.*?);', re.DOTALL | re.MULTILINE)

RE_IMPORT_ELEMENTS = re.compile(r'^([,\s\w-]*?)FROM\s*([\w-]+)\s*({[^}]*}(?:\s+WITH\s+SUCCESSORS)?)?', re.MULTILINE)

RE_IMPORT_ELEMENT_TYPE = re.compile(r'[^,\s]+')

RE_DOXY_ASN_COMMENTS = re.compile(r'^\s*--[-!#](:?$|\s(.*))', re.MULTILINE)

RE_DOXY_C_COMMENTS = re.compile(r'^\s*/\*\*\s(.*?)\*/', re.MULTILINE | re.DOTALL)

RE_DOXY_C_COMMENTS_I = re.compile(r'\s*\*+')
RE_STRIPSTAR = re.compile(r'^\s*\*', re.MULTILINE)


RE_DOXY_REF = re.compile(r'@ref\s+([\w-]+)')
RE_DOXY_CLASS = re.compile(r'@(?:class|struct|details):?\s+([\w-]+)')
RE_DOXY_DETAILS = re.compile(r'@details:?\s+[\w-]+')
RE_DOXY_STRIP_SINGLE_TAG = re.compile(r'@(?:brief|url)\s+')
RE_DOXY_STRIP_TAG = re.compile(r'\s*@(?:class|struct|details):?\s+[\w-]+')
RE_DOXY_UNIT = re.compile(r'^\s*@unit:?\s+(.+)\n', re.MULTILINE)
RE_DOXY_REVISION = re.compile(r'^\s*@revision:?\s+(.+)\n', re.MULTILINE)
RE_DOXY_BRIEF = re.compile(r'^\s*@brief[\s:]+(.+)\n', re.MULTILINE)
RE_DOXY_CATEGORY = re.compile(r'^\s*@category[\s:]+(.+)\n', re.MULTILINE)
RE_DOXY_PARAM = re.compile(r'^\s*@(?:param|field):?\s+([\w-]+)\s*(.*?)\n\s*$', re.MULTILINE | re.DOTALL)

RE_DOXY_SECTION = re.compile(r"^\s*@(brief|note|(class|struct|param|field|details)\s+([-\w]+)):?(.*?)(?=\n\s*@|\n\s*\n|\Z)", re.MULTILINE | re.DOTALL)

# RE_TYPE = re.compile(r'(([A-Z][\w-]*)\s*::=[\w \t]+(?:{+(.*?)}+)?.*?)\n\s*\n', re.MULTILINE | re.DOTALL)
RE_TYPE = re.compile(r'^\s*([A-Z][\w-]*)?\s*([{} \t:\w-]*?)?::=([\w \t]+.*?)\n\s*\n', re.MULTILINE | re.DOTALL)
RE_OPTIONS = re.compile(r'^\s*@options[\s:]+(.+)', re.MULTILINE)

extTypes = {}
cpos = 0
o_args = []

def urlquote(s):
	if (sys.version_info > (3, 0)):
		return urllib.parse.quote_plus(s)
	else:
		return 	urllib.quote_plus(s)

def indentLines(content:str, indent):
	ret=''
	lines = content.splitlines()
	for l in lines:
		ret += ''.ljust(indent or 0) + l +'\n'
	return ret

def parseText(content, indent=None):

	def repl_ref(m):
		return '[**{0}**]({1}#{0})'.format(m.group(1), extTypes.get(m.group(1),''))
	content = RE_DOXY_REF.sub(repl_ref, content)
	
	content = RE_DOXY_STRIP_TAG.sub('', content)
	
	content = RE_DOXY_STRIP_SINGLE_TAG.sub('', content)

	return indentLines(content, indent)

def parseInlineComments(content:str, indent=None):
	# keep into account only '--<' comments
	lines = content.splitlines()
	content = ''
	for l in lines:
		l = l.lstrip()
		if l.startswith('--< '):
			content += l[4:] + '\n'
		elif l.startswith('--!< '):
			content += l[5:] + '\n'
		else:
			continue
	return parseText(content, indent)

def parseDoxyComments(content:str):
	# keep only '--! ' and /** */ comments
	# convert '--! ' comments to C-style
	content = RE_DOXY_ASN_COMMENTS.sub(r'/** *\g<1>*/', content)
	ret = ''
	for m in RE_DOXY_C_COMMENTS.finditer(content):
		ret += RE_STRIPSTAR.sub('', m.group(1))
	return ret

def parseModule(mname, content):
	global cpos
	cpos = 0
	ret = ''
	m = RE_IMPORTS.search(content)
	if m is not None:
		pos = 0
		if m.group(1) is not None:
			ret += '## Imports:\n'
			s = m.group(1)
			for fm in RE_IMPORT_ELEMENTS.finditer(s):
				imName = fm.group(2)
				for im in RE_IMPORT_ELEMENT_TYPE.finditer(fm.group(1)):
					extTypes[im.group(0)] = imName+'.md'
				ret += ' * **[{0}]({0}.md)** *{1}*<br/>\n'.format(imName, RE_SPACES.sub(' ', fm.group(3) or ''))
				ret += parseText(parseDoxyComments(s[pos:fm.start()])+'\n', 2)
				pos = fm.end()
			ret += parseText(parseDoxyComments(s[pos:]))
		cpos = m.end()

	m = RE_EXPORTS.search(content)
	if m is not None:
		if cpos < m.end():
			cpos = m.end()
	
	# parse types
	def repl_type (m, doc):
		title = t = m.group(1) # type name  
		f_params = {}
		s_unit = ''
		s_category = ''
		s_note = ''
		s_revision = ''
		options = copy.copy(o_args)
		if doc : # doc is the prepending comment. Check if not None and not Empty
			doc = parseDoxyComments(doc)
			
			# parse options
			def repl_options(m):
				nonlocal options
				if m.group(1) is not None:
					for o in m.group(1).split(','):
						setattr(options, o.strip(), True)
				return ''
			doc=RE_OPTIONS.sub(repl_options, doc)
		
			def repl_section (m):
				nonlocal title
				nonlocal t
				nonlocal f_params
				nonlocal s_note
				ret = ''
				l = m.group(4).lstrip(":, \t").lstrip('\n')
				if m.group(2) is not None:
					# this can be class|struct|details|param|field
					if m.group(3) == t:
						ret = parseText(l)
					else:
						if len(l):
							f_params[m.group(3)] = parseText(l, 2)
				elif m.group(1) == 'brief':
					if o_args.brief_as_title:
						title = parseText(l)
					else:
						ret = parseText(l)
				elif m.group(1) == 'note':
					s_note = '\n>>>\n' + 'NOTE: ' + parseText(l).rstrip() + '\n>>>\n'
				else:
					ret = m.string[m.start():m.end()]
				return ret
			doc = RE_DOXY_SECTION.sub(repl_section, doc)

			def repl_category(m):
				nonlocal s_category
				s_category = '\n&nbsp;&nbsp;&nbsp;&nbsp;**Categories**: '
				for l in m.group(1).split(','):
#					s_category += '[{0}](#{1}) '.format(l.strip(), urlquote(l.strip()))
					s_category += l.strip() + ' '
				s_category += '\n'
				return ''
			doc = RE_DOXY_CATEGORY.sub(repl_category, doc)

			def repl_unit(m):
				nonlocal s_unit
				s_unit = '\n&nbsp;&nbsp;&nbsp;&nbsp;**Unit**: _' + m.group(1).strip() + '_\n'
				return ''
			doc = RE_DOXY_UNIT.sub(repl_unit, doc)

			def repl_revision(m):
				nonlocal s_revision
				s_revision = '\n&nbsp;&nbsp;&nbsp;&nbsp;**Revision**: _' + m.group(1).strip() + '_\n'
				return ''
			doc = RE_DOXY_REVISION.sub(repl_revision, doc)
		else:
			doc = ''

		ret = ''
		if t is not None:
			fields = ''
			ret = '\n### <a name="{0}"></a>{1}\n'.format(t, title) + parseText(doc)

			# parse fields and get out fields descriptions
			if m.group(3) is not None:
				# check if contain fields
				fm = RE_TYPE_BODY.search(m.group(3))
				if fm is not None and fm.group(1) is not None:
					typeBody = fm.group(1).strip()	
					if typeBody is not None:
						fTitle = ''
						field = ''
						pos = 0
						for fm in RE_FIELDS.finditer(typeBody):
							if fm.group(1) is not None:
								# add description to the previous type
								if len(field):
									fields += parseInlineComments(fm.string[pos:fm.start()], 3)
								field = ''
								f = fm.group(1).strip()
								ext = fm.group(3) or ''
								if f in f_params:
									field = f_params.pop(f) + '\n\n'
								if fm.group(2) is not None:
									fTitle = 'Fields:\n'
									if len(field) or not o_args.no_auto_fields:
										t = fm.group(2).strip()
										if RE_BASIC_TYPES.match(t) is not None:
											field = '* {0} **{1}** {2}<br>\n'.format(f, t, ext) + field
										else:
											field = '* {0} [**{1}**]({2}#{1}) {3}<br>\n'.format(f, t, extTypes.get(t,''), ext) + field
									else:
										fTitle = 'Values:\n'
										if len(field) or not o_args.no_auto_values:
											field = '* **{0}** {1}<br>\n'.format(f, ext) + field
								if len(field):
									field += parseText(fm.string[pos:fm.start()], 3)
								pos = fm.end()
								if fm.group(4) is not None:
									# keep '--' for the next round
									pos -= 2
							if len(field):
								fields += field
						if len(field):
							fields += parseInlineComments(typeBody[pos:], 3)
						# add all other fields defined as @params
						if 'force-all-fields' in options or 'force-all-fields' in o_args:
							for f in f_params:
								fields += '* {}<br>\n{}\n\n'.format(f, f_params[f])							
						if len(fields):
							ret = ret.strip() + '\n\n' + fTitle + fields
		else:
			if title:
				ret = '### {}\n\n'.format(title)
			l = parseText(parseDoxyComments(doc))
			if len(l):
				ret += l + '\n\n'	
			for p in f_params:
				ret += '* `{0}` {1}\n'.format(p, f_params[p])
			
		return ret + s_unit + s_category + s_revision + s_note + '```asn1\n' + RE_COMMENTS.sub('', m.group(0).strip()) +'\n```\n\n'

	pos = 0
	ret += '## Data Elements:\n'
	for m in RE_TYPE.finditer(content[cpos:]):
		ret += repl_type (m, m.string[pos:m.start()])
		pos = m.end()
	return ret


def parseAsn(outDir, content) :
	# iterate modules in the file
	pos= 0
	cnt = 0
	for m in RE_MODULE.finditer(content):
		ret = '# ASN.1 module {}\n OID: _{}_\n'.format(m.group(1), RE_SPACES.sub(' ', m.group(2)))
		ret += parseDoxyComments(content[pos:m.start()]) + '\n'
		if m.group(3) is not None:
			ret += parseModule(m.group(1), m.group(3))
		ret += '\n\n'
		open(outDir + '/' + m.group(1) + '.md', "w",encoding='utf-8').write(ret)
		pos = m.end()
		cnt += 1
	return cnt

def main():
	global o_args
	ap = argparse.ArgumentParser(description='ASN.1 to markdown converter')
	ap.add_argument('--out', '-o', type=str, default='.', help='output directory')
	ap.add_argument('--brief-as-title', '-B', default=False, action='store_true', help='Do not treat @brief line as type header')
	ap.add_argument('--force-all-fields', '-f', default=False,action='store_true', help='Add all fields in the list even if empty')
	ap.add_argument('--no-auto-fields', '-F', default=False,action='store_true', help='Add fields only if @param or @field is defined')
	ap.add_argument('--no-auto-values', '-V', default=False,action='store_true', help='Do not add named values or enums')
	ap.add_argument('modules', action='store', nargs='+', help='ASN.1 files')
	o_args = ap.parse_args()

	if not o_args.modules:
		ap.print_help()
		exit(1)

	cnt = 0
	for a in o_args.modules:
		try:
			content = open(a, encoding='latin-1').read()
			cnt += parseAsn(o_args.out, content)
		except IOError as e:
			sys.stderr.write(e[1]+"\n")
	print("{} modules porcessed\n".format(cnt))

if __name__ == '__main__':
	main()