#!/usr/bin/env lua5.3 -- XEP to Markdown converter -- -- Copyright (C) 2021 Kim Alvefur -- -- This file is released under the MIT license. -- -- Invoke with: -- xmllint --nonet --noent --loaddtd "$@" | lua5.3 -lluarocks.loader xep2md.lua -- Inlined util.events from Prosody, you may wanna skip ahead ~160 lines -- or so to the main script. package.preload["util.events"] = (function() -- Prosody IM -- Copyright (C) 2008-2010 Matthew Wild -- Copyright (C) 2008-2010 Waqas Hussain -- -- This project is MIT/X11 licensed. Please see the -- COPYING file in the source package for more information. -- local pairs = pairs; local t_insert = table.insert; local t_remove = table.remove; local t_sort = table.sort; local setmetatable = setmetatable; local next = next; local _ENV = nil; local function new() local handlers = {}; local global_wrappers; local wrappers = {}; local event_map = {}; local function _rebuild_index(handlers, event) local _handlers = event_map[event]; if not _handlers or next(_handlers) == nil then return; end local index = {}; for handler in pairs(_handlers) do t_insert(index, handler); end t_sort(index, function(a, b) return _handlers[a] > _handlers[b]; end); handlers[event] = index; return index; end; setmetatable(handlers, { __index = _rebuild_index }); local function add_handler(event, handler, priority) local map = event_map[event]; if map then map[handler] = priority or 0; else map = {[handler] = priority or 0}; event_map[event] = map; end handlers[event] = nil; end; local function remove_handler(event, handler) local map = event_map[event]; if map then map[handler] = nil; handlers[event] = nil; if next(map) == nil then event_map[event] = nil; end end end; local function get_handlers(event) return handlers[event]; end; local function add_handlers(handlers) for event, handler in pairs(handlers) do add_handler(event, handler); end end; local function remove_handlers(handlers) for event, handler in pairs(handlers) do remove_handler(event, handler); end end; local function _fire_event(event_name, event_data) local h = handlers[event_name]; if h then for i=1,#h do local ret = h[i](event_data); if ret ~= nil then return ret; end end end end; local function fire_event(event_name, event_data) local w = wrappers[event_name] or global_wrappers; if w then local curr_wrapper = #w; local function c(event_name, event_data) curr_wrapper = curr_wrapper - 1; if curr_wrapper == 0 then if global_wrappers == nil or w == global_wrappers then return _fire_event(event_name, event_data); end w, curr_wrapper = global_wrappers, #global_wrappers; return w[curr_wrapper](c, event_name, event_data); else return w[curr_wrapper](c, event_name, event_data); end end return w[curr_wrapper](c, event_name, event_data); end return _fire_event(event_name, event_data); end local function add_wrapper(event_name, wrapper) local w; if event_name == false then w = global_wrappers; if not w then w = {}; global_wrappers = w; end else w = wrappers[event_name]; if not w then w = {}; wrappers[event_name] = w; end end w[#w+1] = wrapper; end local function remove_wrapper(event_name, wrapper) local w; if event_name == false then w = global_wrappers; else w = wrappers[event_name]; end if not w then return; end for i = #w, 1 do if w[i] == wrapper then t_remove(w, i); end end if #w == 0 then if event_name == nil then global_wrappers = nil; else wrappers[event_name] = nil; end end end return { add_handler = add_handler; remove_handler = remove_handler; add_handlers = add_handlers; remove_handlers = remove_handlers; get_handlers = get_handlers; wrappers = { add_handler = add_wrapper; remove_handler = remove_wrapper; }; add_wrapper = add_wrapper; remove_wrapper = remove_wrapper; fire_event = fire_event; _handlers = handlers; _event_map = event_map; }; end return { new = new; }; end); local lxp = require "lxp"; local lom = require "lxp.lom"; local events = require"util.events".new(); local have_yaml, yaml = pcall(require, "lyaml"); local handler = {}; local stack = {}; local meta = {}; local no_write = true; local function output(...) if no_write then return end io.write(...); end local function print_empty_line() output("\n\n"); return true; end local text_buf; -- FIXME LuaExpat claims to not require this hack local function CharacterDataDone() if text_buf then local text = table.concat(text_buf); if text ~= "" then events.fire_event("#text", { stack = stack, text = text }); end text_buf = nil; end end function handler:StartElement(tagname, attr) CharacterDataDone(); tagname = tagname:gsub("^([^\1]+)\1", "{%1}"); table.insert(stack, tagname) events.fire_event(tagname, { stack = stack, attr = attr }); end function handler:CharacterData(data) if text_buf then table.insert(text_buf, data) else text_buf = { data }; end end function handler:EndElement() CharacterDataDone(); events.fire_event(table.remove(stack) .. "/", { stack = stack }); end -- Oh god oh god we're all gonna die! local function escape_text(event) event.text = event.text:gsub("['&<>\"]", "\\%1"); end events.add_handler("#text", escape_text, 1000); events.add_handler("#text", function (event) local stack = event.stack; return events.fire_event(stack[#stack].."#text", event); end, 10); events.add_handler("#text", function (event) if event.text:find"%S" then output(event.text); end return true; end); local header_schema = [[ (title , abstract , legal , number , status , lastcall* , interim* , type , sig , approver* , dependencies , supersedes , supersededby , shortname , schemaloc* , registry? , discuss? , expires? , author+ , revision+ , councilnote?) ]]; for field in header_schema:gmatch("%w+") do events.add_handler(field.."#text", function (event) meta[field] = event.text:match("%S.*%S"); return true; end); end do local author; events.add_handler("author", function (event) author = { }; return true; end); for _, field in pairs{"firstname", "surname", "email", "jid"} do events.add_handler(field.."#text", function (event) author[field] = event.text; return true; end); end events.add_handler("author/", function (event) if author.email and author.jid then author = string.format("%s %s <%s> ", author.firstname, author.surname, author.email, author.jid); elseif author.email then author = string.format("%s %s <%s>", author.firstname, author.surname, author.email); else author = string.format("%s %s", author.firstname, author.surname); end local authors = meta.author; if not authors then meta.author = { author; }; else table.insert(authors, author); end author = nil; return true; end); end do local revision; for _, field in pairs{"version", "date", "initials"} do events.add_handler(field.."#text", function (event) if revision then revision[field] = event.text; return true; end end); end local function handle_remark(event) if revision and event.text and event.text:match("%S") then table.insert(revision.remark, event.text); end end events.add_handler("remark#text", handle_remark, 100); events.add_handler("remark", function (event) events.add_handler("p#text", handle_remark, 100); end); events.add_handler("remark/", function (event) events.remove_handler("p#text", handle_remark); end); events.add_handler("revision", function (event) revision = {remark={}}; return true; end); local revisions = {}; events.add_handler("revision/", function (event) table.insert(revisions, revision); meta.revision = revisions; revision = nil; return true; end); end events.add_handler("date#text", function (event) if meta and not meta.date then meta.date = event.text; end end, 1); events.add_handler("spec#text", function (event) if not meta then return end local kind = stack[#stack-1]; if meta[kind] then table.insert(meta[kind], event.text); else meta[kind] = { event.text }; end end); events.add_handler("header/", function (event) no_write = false; if next(meta) ~= nil then if meta.title and meta.number then meta.title = "XEP-"..meta.number..": "..meta.title; end if have_yaml and yaml.dump then output(yaml.dump({meta})); else print("% "..meta.title); if type(meta.author) == "table" then print("% "..table.concat(meta.author, "; ")); elseif meta.author then print("% "..meta.author); else print("% "); end if meta.date then print("% "..meta.date); end end end return true; end); for i = 1, 6 do events.add_handler("section"..i, function () output("\n"); end, 10); events.add_handler("section"..i.."/", function () output("\n"); return true; end, 10); events.add_handler("section"..i, function (event) assert(event.attr.topic, "no @topic"); output(string.rep("#", i), " ", event.attr.topic); if event.attr.anchor and event.attr.anchor ~= "" then output(" {#", event.attr.anchor, "}\n") else output("\n"); end return true; end); end events.add_handler("section1", function (event) output(event.attr.topic); if event.attr.anchor and event.attr.anchor ~= "" then output(" {#", event.attr.anchor, "}"); end output("\n", string.rep("=", #event.attr.topic), "\n\n"); return true; end, 1); events.add_handler("section2", function (event) output(event.attr.topic); if event.attr.anchor and event.attr.anchor ~= "" then output(" {#", event.attr.anchor, "}"); end output("\n", string.rep("-", #event.attr.topic), "\n\n"); return true; end, 1); local function normalize_whitespace(event) event.text = event.text:gsub("%s+", " ") -- event.text = event.text:match("^%s*(.-)%s*$") end events.add_handler("p#text", normalize_whitespace, 10); events.add_handler("li#text", normalize_whitespace, 10); events.add_handler("dt#text", normalize_whitespace, 10); events.add_handler("dd#text", normalize_whitespace, 10); local example_count = 1; events.add_handler("example", function (event) output("\n#### Example ", example_count, ". "); if event.attr.caption then output(event.attr.caption, " ") end output("{#example-", example_count, " .unnumbered}\n\n") example_count = example_count + 1; output("``` {.xml .example}\n"); events.remove_handler("#text", escape_text); end); events.add_handler("example#text", function (event) local example_text = event.text:match("^%s*(.-)%s*$"); output(example_text, "\n"); return true; end); events.add_handler("example/", function () events.add_handler("#text", escape_text, 1000); output("```\n\n"); return true; end); events.add_handler("note", function (event) output(" ^["); return true; end); events.add_handler("note/", function (event) output("]"); return true; end); -- TODO magically import citation data events.add_handler("cite#text", function (event) output("**", event.text, "**"); if meta.references then local refid = event.text:gsub("%W", ""):lower(); if meta.references[refid] then output("[@", refid, "]"); end end return true; end); local url; events.add_handler("link", function (event) url = event.attr.url; if url then output("["); end return true; end); events.add_handler("link/", function (event) if url then output("](", url, ")"); url = nil; end return true; end); local list_depth, list_type = 0, "ul"; events.add_handler("ul", function () list_depth = list_depth + 1; list_type = "ul"; end); events.add_handler("ul/", function (event) local stack = event.stack; list_depth = list_depth - 1; for i = #stack, 1, -1 do local element = stack[i] if element == "ul" or element == "ol" then list_type = element; break; end end return true; end); events.add_handler("li", function (event) for i = 2, list_depth do output(" "); end if list_type == "ul" then output("- "); elseif list_type == "ul" then output("#. "); end return true; end); events.add_handler("li#text", function (event) local text = event.text:gsub("%s+", " "); output(text); return true; end); events.add_handler("dd#text", function (event) output("\n: "); end); events.add_handler("li/", print_empty_line, 1); events.add_handler("ul", print_empty_line, 1); events.add_handler("ul/", print_empty_line, 1); events.add_handler("ol", print_empty_line, 1); events.add_handler("ol/", print_empty_line, 1); events.add_handler("p/", print_empty_line, 1); events.add_handler("dd/", print_empty_line, 1); local function printcell(event) output("|"); end events.add_handler("th", printcell, 1); events.add_handler("td", printcell, 1); events.add_handler("tr/", printcell, 3); events.add_handler("tr", function () output(" ") end, 1); events.add_handler("tr/", function () output("\n") end, 1); local th; events.add_handler("table", function () th = 0; end); events.add_handler("table/", function () th = 0; end); events.add_handler("th", function () if th then th = th + 1; end end); events.add_handler("tr/", function () if th then output("\n");output(" |"); output(string.rep("---|", th)); th = nil end end, 2); -- Non-example code blocks, like schemas events.add_handler("code", function (event) output("```xml\n"); events.remove_handler("#text", escape_text); return true; end); events.add_handler("code#text", function (event) local example_text = event.text:match("^%s*(.-)%s*$"); output(example_text, "\n"); return true; end); events.add_handler("code/", function () events.add_handler("#text", escape_text, 1000); output("```\n\n"); return true; end); if meta.references then events.add_handler("xep/", function () output("\n\n# References {#references}\n\n"); end); end if arg[1] == "--debug" then events.add_wrapper(false, function (fire_event, event_name, event_data) io.stderr:write("D: "..event_name.."\n"); io.stderr:write("D: /"..table.concat(event_data.stack, "/").."\n"); return fire_event(event_name, event_data); end); setmetatable(handler, { __index = function (_, missinghandler) io.stderr:write("D: Missing handler: "..missinghandler.."\n"); return function (parser, ...) io.stderr:write("D: ", missinghandler, "("); local count = select('#', ...); for i = 1, count do local arg = select(i, ...); local arg_t = type(arg); io.stderr:write(arg_t, ":"); if arg_t == "string" then io.stderr:write(string.format("%q", arg)); else io.stderr:write(tostring(arg)); end if i ~= count then io.stderr:write(", "); end end io.stderr:write(")\n"); return ""; end; end; }); end local parser = lxp.new(handler, "\1"); parser:setbase("."); local function chunks(file, size) return function () return file:read(size); end end if not have_yaml then io.stderr:write("lua-yaml missing, header metadata will be incomplete\n"); end for chunk in chunks(io.stdin, 4096) do local ok, err, line, col = parser:parse(chunk); if not ok then io.stderr:write("E: "..err.." on line "..line..", col "..col.."\n"); os.exit(1); end end parser:close();