Initial commit (9f4232a5) · Commits · CIM - Context Information Management / NGSI-LD API

API.docx

0 → 100644

+7.52 MiB

File added.

No diff preview for this file type.

View file

ETSI_GS_skeleton.docx

0 → 100644

+105 KiB

File added.

No diff preview for this file type.

View file

filter_1.lua

0 → 100644

+182 −0

Original line number	Diff line number	Diff line
		--THIS LUA FILTER MUST BE APPLIED TO PANDOC IN THE CORRECT ORDER

		--global table for references link
		references = {}
		annex_headers = {}

		--helper function to recognize and parse word runs with proper style
		function Run(el)
		local MARK = "@#!"
		local insideRun = false
		local tag, span
		local newContent = pandoc.List({})

		for _, elem in ipairs(el.content) do

		if elem.t == "Str" and elem.text:find(MARK) then -- elem is a string and contains MARK
		local before, after = elem.text:match("(.-)"..MARK.."(.*)") -- before and after are what is writter before or after MARK
		if before or after then -- elem is the start or the end of a word run
		insideRun = not insideRun
		if insideRun then -- we found a new run section
		if before ~= "" then --we append what's before MARK too
		newContent:insert(pandoc.Str(before))
		end
		tag = "" --reset tag
		span = pandoc.Span("") --create new span
		else --we just reached the end of a run, so we must use our span
		if before ~= "" then --append what's before MARK to the span
		span.content:insert(pandoc.Str(before))
		end
		--remove space after tag if the span is not only composed of a single space
		if #span.content > 1 then
		span.content:remove(1)
		end
		span.attr = { class = tag} -- set style
		if span.content then --do not insert empty spans
		newContent:insert(span) --insert our styled text in the parent element content
		end
		secondStart, secondEnd = after:find(MARK)
		if secondStart then --we have another run just after this one
		if after:sub(1, secondStart-1) ~= "" then --there is something between the two runs so we append it
		newContent:insert(pandoc.Str(after:sub(1, secondStart-1)))
		end
		--reset everything and set inside to True
		insideRun = true
		tag = ""
		span = pandoc.Span("")
		elseif after ~= "" then -- we append what's after MARK too
		newContent:insert(pandoc.Str(after))
		end
		end
		end
		elseif insideRun then --we incorporate everything in the span as it is
		if tag == "" then --tag is 1st string after MARK
		if elem.t == "Str" then --we need to assign tag
		tag = elem.text
		end
		else -- we need to append all the text to our span
		span.content:insert(elem)
		end
		else --we are not in a special run so we just copy the rest of the content as it is
		newContent:insert(elem)
		end
		end
		return newContent
		end

		--heleper function to apply paragraphs styles
		function Style(el)
		-- search for paragraph style
		local text = pandoc.utils.stringify(el)
		local startIndex, endIndex, tag = text:find("%[{%[%-%-(.-)%-%-%]}%]") --pattern used in preprocessing.py to mark paragraphs

		if endIndex == #text then -- paragraph styles are always tagged at the end of the paragraph
		table.remove(el.content) -- remove the tag string [{[--tag--]}] from para or header
		table.remove(el.content) -- remove the space before tag from para or header
		if tag == "TF" and el.content[1] and el.content[3] then --this is the description of an image
		return pandoc.Div(el.content, { class = tag , id = el.content[1].text.."_"..el.content[3].text:sub(1,-2)}) --id is Figure_x.x.x
		elseif tag == "TH" and el.content[1] and el.content[3] then -- this is a table title
		return pandoc.Div(el.content, { class = tag , id = el.content[1].text.."_"..el.content[3].text:sub(1,-2)}) --id is Table_x.x.x
		end
		return pandoc.Div(el.content, { class = tag }) --apply style to para header, by returning a div without the tag, but with the corresponding style class
		else
		return el
		end
		end

		--helper function to generate references
		function Reference(el)
		local text = pandoc.utils.stringify(el)
		for prefix, number in text:gmatch("%[(i?%.?)(%d+)%]") do
		if number then
		local key = (prefix or "")..number -- just the number or the prefix + number if prefix is not null
		if text:sub(2, #key+1) == key then --this is a reference in 2.1 or 2.2
		references[key] = "#"..key --we save the reerence in the global table
		return pandoc.Div(el.content, {id = key, class = el.attr["classes"][1]})
		end
		end
		end
		return el
		end

		--Pandoc filters
		if FORMAT:match 'html' then
		function Str(el)
		--rever TAB marks back to \tS
		local text = el.text:gsub("{{{{TAB}}}}", "\t")
		return pandoc.Str(text)
		end

		function Para(el)
		--search for runs marked by @#!
		el.content = Run(el)
		--search for paragraph style
		el = Style(el)
		el = Reference(el)
		return el
		end

		function Plain(el)
		--search for runs marked by @#!
		el.content = Run(el)
		--search for paragraph style
		return Style(el)
		end

		function Emph(el)
		--search for runs marked by @#!
		el.content = Run(el)
		return el
		end

		function Strong(el)
		--search for runs marked by @#!
		el.content = Run(el)
		return el
		end

		function Superscript(el)
		el.content = Run(el)
		return el
		end

		function Header(el)
		el.content = Run(el)
		-- fix annex headers
		if el.level == 8 then
		annex_headers[el.content[3].text] = "#"..el.attr.identifier -- annex letter / link correspondance
		end
		--search for style
		return Style(el)
		end


		function Image(el)
		local filePath, extension = el.src:match("(.)%.(.)$") -- image.png, jpeg or emf
		--fixes extensions
		if extension == "emf" then
		el.src = filePath..".png"
		return el
		end
		--adds yellow overlay on top of the image
		if extension == "png" then
		local overlay = pandoc.Span({}, { style = "position: absolute; top: 0; right: 0; bottom: 0; left: 0; background-color: rgba(255, 255, 0, 0.5); pointer-events: none; z-index: 1;", class = "image_overlay"})
		el.attr = { style = "width: 100%; height: auto; position: relative;"}
		return pandoc.Span({el, overlay}, { style = "position: relative; display: inline-flex;" })
		end
		end

		function Pandoc(el)
		--save references for second filter
		local fp = 'media/references.json'
		local mt = 'text/json'
		pandoc.mediabag.insert(fp, mt, pandoc.json.encode(references))
		--save annex_headers for second filter
		local fp = 'media/annex.json'
		pandoc.mediabag.insert(fp, mt, pandoc.json.encode(annex_headers))
		--save toc
		local fp = 'media/toc.json'
		pandoc.mediabag.insert(fp, mt, pandoc.json.encode(pandoc.structure.table_of_contents(el, { toc_depth = 4})))
		end
		end

filter_2.lua

0 → 100644

+262 −0

Original line number	Diff line number	Diff line
		--THIS LUA FILTER MUST BE APPLIED TO PANDOC IN THE CORRECT ORDER
		local debug = false

		local mt, references = pandoc.mediabag.fetch("media/references.json") --references generated in the first filter
		references = pandoc.json.decode(references, false) --false means "use lua tables, instead of pandoc objects"

		local mt, annex_headers = pandoc.mediabag.fetch("media/annex.json") --annex_headers generated in the first filter
		annex_headers = pandoc.json.decode(annex_headers, false) --false means "use lua tables, instead of pandoc objects"

		--pandoc generated toc
		local mt, toc = pandoc.mediabag.fetch("media/toc.json") --references generated in the first filter
		toc = pandoc.json.decode(toc, true) --true means "use pandoc objects"
		--process it to have a nicer, easier to acces, table
		local tocLinks = {}
		pandoc.walk_block(toc, {
		Link = function(el)
		if el.content[1].t == "Str" then
		local startIndex, endIndex, number = el.content[1].text:find("(%w%.?%d%.?%d%.?%d+%-?%d)")
		if number then
		tocLinks[number] = el.target --save links to the corresponding clause number
		end
		end
		end
		})


		--generates link for caluses
		function ClauseLink(text, number)
		text = text:gsub("%-", "‑") --always use non-breaking hyphens for links
		return pandoc.Link(text, tocLinks[number])
		end

		--generates link for figures
		function FigureLink(text, number)
		text = text:gsub("%-", "‑") --always use non-breaking hyphens for links
		return pandoc.Link(text, "#Figure_"..number)
		end

		--generates link for tables
		function TableLink(text, number)
		text = text:gsub("%-", "‑") --always use non-breaking hyphens for links
		return pandoc.Link(text, "#Table_"..number)
		end

		--generates link for annexes
		function AnnexLink(text, number)
		return pandoc.Link(text, annex_headers[number])
		end


		--helper function that uses the generated toc to link clauses and figures to the respetive header
		function Substitute(el, word)
		local newContent = pandoc.List({})
		local pattern = "(%w%.?%d%.?%d%.?%d+%-?%d)"
		if word == "annex" then
		pattern = "(%u)"
		end

		local i = 1
		while el.content[i] do
		elem = el.content[i]
		if elem.t == "Str" and elem.text:lower():find(word) then --check the next strings to see if we need to link it

		local startIndex, endIndex, number = elem.text:gsub("‑", "-"):find(pattern) --check if number is in the same Str elem
		if number and tocLinks[number] then --create link
		newContent:insert(pandoc.Link(elem.text:sub(1,endIndex), tocLinks[number]))
		if endIndex < #elem.text then -- if something remains (like a comma or a bracket), append it too
		newContent:insert(pandoc.Str(elem.text:sub(endIndex+1,-1)))
		end
		goto continue
		end
		local succ = el.content[i+2] --next string if it exists should be number
		if succ and succ.t == "Str" then
		number = succ.text:gsub("‑", "-"):match(pattern)
		if succ.t == "Str" and number then --we continue searching
		local succ_succ = el.content[i+4] -- next next string if it exists and is of could link to other documents
		if succ_succ and succ_succ.t == "Str" and succ_succ.text == "of" then -- this refers to another document
		if debug then print(word.." referring to another document", succ.text, succ_succ.text, el.content[i+6]) end
		else --we finally sustitute it with the proper link
		if word == "clause" and tocLinks[number] ~= nil then
		newContent:insert(ClauseLink(elem.text.." "..number, number))
		elseif word == "figure" then
		newContent:insert(FigureLink(elem.text.." "..number, number))
		elseif word == "table" then
		newContent:insert(TableLink(elem.text.." "..number, number))
		elseif word == "annex" then
		newContent:insert(AnnexLink(elem.text.." "..number, number))
		else
		if debug then print("Unkown behavior for "..word.." or link does not exists") end
		end
		if debug then print(succ.text, number) end
		text, substitutions = succ.text:gsub("‑", "-") --if we substituted we need to account for this, because nbh is not acii anf thus is more then one byte
		local startIndex, endIndex = text:find(pattern)
		if endIndex+substitutions*2 < #succ.text then -- if something remains (like a comma or a bracket), append it too
		newContent:insert(pandoc.Str(succ.text:sub(endIndex+substitutions*2+1,-1)))
		end
		i = i + 2 --we skip a space and a string because we insert them manually in the link content
		goto continue
		end
		else
		if debug then
		print("Error: type is not Str or number does not follow")
		print(elem.text, succ.t, succ.text, number)
		end
		end
		end
		end
		--append the other elements normally
		newContent:insert(elem)
		::continue::
		i = i + 1
		end
		return newContent
		end

		function MultipleClauses(el)
		local newContent = pandoc.List({})
		local pattern = "(%w%.?%d%.?%d%.?%d+%-?%d)"
		local calusesFound = false --this is true when we found the word "clauses" and remain true until we habe clauses numbers following

		local i = 1
		while el.content[i] do
		elem = el.content[i]

		if elem.t == "Str" then
		--this may be the start of a list of multiple clauses
		if elem.text:lower():find("clauses") then
		calusesFound = true
		newContent:insert(elem)
		goto continue
		end

		if calusesFound then
		--check if we found a clause number
		local startIndex, endIndex, number = elem.text:find(pattern)

		if elem.text:match("^,$") or elem.text:match("and") then
		--this is just a comma or the word 'and' separating the clauses
		--do nothing
		elseif number then
		-- we did in fact find a clause number
		--substitute with linkS
		newContent:insert(ClauseLink(elem.text:sub(startIndex, endIndex), number))
		if endIndex < #elem.text then
		--add the rest of the string too
		newContent:insert(pandoc.Str(elem.text:sub(endIndex+1, -1)))
		end
		goto continue --skip to the nex element
		else
		--we found something that is neither a comma, the word 'and' or a clause number,
		--so we can say this is not a list of multiple clauses
		calusesFound = false
		end
		end
		end

		newContent:insert(elem)
		::continue::
		i = i + 1
		end
		return newContent
		end

		--helper functions to fix Plain encapsulated div contents, this is an artefact due to how pandoc creates cutom divs in filter_1.lua
		function isAllPlain(el)
		if not el.content then --if content is empty
		return false
		end
		for _, elem in ipairs(el.content) do --check every elem for Plain type
		if elem.t ~= "Plain" then
		return false
		end
		end
		return true
		end

		function Normalize(el)
		local newContent = pandoc.List()
		if isAllPlain(el) then
		for _, elem in ipairs(el.content) do
		for _, element in ipairs(elem.content) do
		newContent:insert(element)
		end
		end
		else
		return el.content
		end
		return newContent
		end

		function Linking(el)
		--substitute clause number string with link
		el.content = MultipleClauses(el)
		el.content = Substitute(el, "clause")
		el.content = Substitute(el, "figure")
		el.content = Substitute(el, "table")
		el.content = Substitute(el, "annex")
		return el
		end

		--Pandoc filters
		if FORMAT:match 'html' then
		function Div(el)
		--normalize div
		el.content = Normalize(el)

		--substitute clause number string with link
		el.content = MultipleClauses(el)
		el.content = Substitute(el, "clause")
		if el.classes[1] ~= "TF" then
		el.content = Substitute(el, "figure")
		end
		if el.classes[1] ~= "TH" then
		el.content = Substitute(el, "table")
		end
		el.content = Substitute(el, "annex")
		return el
		end

		function Para(el)
		--normalize Para
		el.content = Normalize(el)

		el = Linking(el)
		return el
		end


		function Plain(el)
		el = Linking(el)
		return el
		end

		function Strong(el)
		el = Linking(el)
		return el
		end

		function Emph(el)
		el = Linking(el)
		return el
		end

		function Underline(el)
		el = Linking(el)
		return el
		end

		function Span(el)
		el = Linking(el)
		return el
		end

		function Str(el)
		--substitute reference with link
		local startIndex, endIndex, reference, key = el.text:find("(%[(i?%.?%d+)%])")
		if reference and references[key] then --reference found
		return pandoc.Span({pandoc.Str(el.text:sub(1,startIndex-1)), pandoc.Link(reference, references[key]), pandoc.Str(el.text:sub(endIndex+1))}) --start of the string + Link + last bit of string
		end
		end
		end
		No newline at end of file

html_to_docx.py

0 → 100644

+0 −0

File added.

Preview size limit exceeded, changes collapsed.