1
0
mirror of https://github.com/moparisthebest/xeps synced 2025-01-10 13:28:11 -05:00
xeps/tools/xep2md.lua
Kim Alvefur 318e25bd6c xep2md: fix spaces around links or spans
Don't remember why it did this here. There's another spot where it
removes longer stretches of whitespace. Doesn't trim leading or trailing
whitespace tho, but that's what breaks it here.
2021-04-03 14:19:54 +02:00

627 lines
15 KiB
Lua

#!/usr/bin/env lua5.3
-- XEP to Markdown converter
--
-- Copyright (C) 2021 Kim Alvefur
--
-- This file is released under the MIT license.
--
-- Invoke with:
-- xmllint --nonet --noent --loaddtd "$@" | lua5.3 -lluarocks.loader xep2md.lua
-- Inlined util.events from Prosody, you may wanna skip ahead ~160 lines
-- or so to the main script.
package.preload["util.events"] = (function()
-- Prosody IM
-- Copyright (C) 2008-2010 Matthew Wild
-- Copyright (C) 2008-2010 Waqas Hussain
--
-- This project is MIT/X11 licensed. Please see the
-- COPYING file in the source package for more information.
--
local pairs = pairs;
local t_insert = table.insert;
local t_remove = table.remove;
local t_sort = table.sort;
local setmetatable = setmetatable;
local next = next;
local _ENV = nil;
local function new()
local handlers = {};
local global_wrappers;
local wrappers = {};
local event_map = {};
local function _rebuild_index(handlers, event)
local _handlers = event_map[event];
if not _handlers or next(_handlers) == nil then return; end
local index = {};
for handler in pairs(_handlers) do
t_insert(index, handler);
end
t_sort(index, function(a, b) return _handlers[a] > _handlers[b]; end);
handlers[event] = index;
return index;
end;
setmetatable(handlers, { __index = _rebuild_index });
local function add_handler(event, handler, priority)
local map = event_map[event];
if map then
map[handler] = priority or 0;
else
map = {[handler] = priority or 0};
event_map[event] = map;
end
handlers[event] = nil;
end;
local function remove_handler(event, handler)
local map = event_map[event];
if map then
map[handler] = nil;
handlers[event] = nil;
if next(map) == nil then
event_map[event] = nil;
end
end
end;
local function get_handlers(event)
return handlers[event];
end;
local function add_handlers(handlers)
for event, handler in pairs(handlers) do
add_handler(event, handler);
end
end;
local function remove_handlers(handlers)
for event, handler in pairs(handlers) do
remove_handler(event, handler);
end
end;
local function _fire_event(event_name, event_data)
local h = handlers[event_name];
if h then
for i=1,#h do
local ret = h[i](event_data);
if ret ~= nil then return ret; end
end
end
end;
local function fire_event(event_name, event_data)
local w = wrappers[event_name] or global_wrappers;
if w then
local curr_wrapper = #w;
local function c(event_name, event_data)
curr_wrapper = curr_wrapper - 1;
if curr_wrapper == 0 then
if global_wrappers == nil or w == global_wrappers then
return _fire_event(event_name, event_data);
end
w, curr_wrapper = global_wrappers, #global_wrappers;
return w[curr_wrapper](c, event_name, event_data);
else
return w[curr_wrapper](c, event_name, event_data);
end
end
return w[curr_wrapper](c, event_name, event_data);
end
return _fire_event(event_name, event_data);
end
local function add_wrapper(event_name, wrapper)
local w;
if event_name == false then
w = global_wrappers;
if not w then
w = {};
global_wrappers = w;
end
else
w = wrappers[event_name];
if not w then
w = {};
wrappers[event_name] = w;
end
end
w[#w+1] = wrapper;
end
local function remove_wrapper(event_name, wrapper)
local w;
if event_name == false then
w = global_wrappers;
else
w = wrappers[event_name];
end
if not w then return; end
for i = #w, 1 do
if w[i] == wrapper then
t_remove(w, i);
end
end
if #w == 0 then
if event_name == nil then
global_wrappers = nil;
else
wrappers[event_name] = nil;
end
end
end
return {
add_handler = add_handler;
remove_handler = remove_handler;
add_handlers = add_handlers;
remove_handlers = remove_handlers;
get_handlers = get_handlers;
wrappers = {
add_handler = add_wrapper;
remove_handler = remove_wrapper;
};
add_wrapper = add_wrapper;
remove_wrapper = remove_wrapper;
fire_event = fire_event;
_handlers = handlers;
_event_map = event_map;
};
end
return {
new = new;
};
end);
local lxp = require "lxp";
local lom = require "lxp.lom";
local events = require"util.events".new();
local have_yaml, yaml = pcall(require, "lyaml");
local handler = {};
local stack = {};
local meta = {};
local no_write = true;
local function output(...)
if no_write then return end
io.write(...);
end
local function print_empty_line()
output("\n\n");
return true;
end
local text_buf;
-- FIXME LuaExpat claims to not require this hack
local function CharacterDataDone()
if text_buf then
local text = table.concat(text_buf);
if text ~= "" then
events.fire_event("#text", { stack = stack, text = text });
end
text_buf = nil;
end
end
function handler:StartElement(tagname, attr)
CharacterDataDone();
tagname = tagname:gsub("^([^\1]+)\1", "{%1}");
table.insert(stack, tagname)
events.fire_event(tagname, { stack = stack, attr = attr });
end
function handler:CharacterData(data)
if text_buf then
table.insert(text_buf, data)
else
text_buf = { data };
end
end
function handler:EndElement()
CharacterDataDone();
events.fire_event(table.remove(stack) .. "/", { stack = stack });
end
-- Oh god oh god we're all gonna die!
local function escape_text(event)
event.text = event.text:gsub("['&<>\"]", "\\%1");
end
events.add_handler("#text", escape_text, 1000);
events.add_handler("#text", function (event)
local stack = event.stack;
return events.fire_event(stack[#stack].."#text", event);
end, 10);
events.add_handler("#text", function (event)
if event.text:find"%S" then
output(event.text);
end
return true;
end);
local header_schema = [[
(title , abstract , legal , number , status , lastcall* ,
interim* , type , sig , approver* , dependencies , supersedes ,
supersededby , shortname , schemaloc* , registry? , discuss? ,
expires? , author+ , revision+ , councilnote?)
]];
for field in header_schema:gmatch("%w+") do
events.add_handler(field.."#text", function (event)
meta[field] = event.text:match("%S.*%S");
return true;
end);
end
do
local author;
events.add_handler("author", function (event)
author = { };
return true;
end);
for _, field in pairs{"firstname", "surname", "email", "jid"} do
events.add_handler(field.."#text", function (event) author[field] = event.text; return true; end);
end
events.add_handler("author/", function (event)
if author.email and author.jid then
author = string.format("%s %s <%s> <xmpp:%s>", author.firstname, author.surname, author.email, author.jid);
elseif author.email then
author = string.format("%s %s <%s>", author.firstname, author.surname, author.email);
else
author = string.format("%s %s", author.firstname, author.surname);
end
local authors = meta.author;
if not authors then
meta.author = { author; };
else
table.insert(authors, author);
end
author = nil;
return true;
end);
end
do
local revision;
for _, field in pairs{"version", "date", "initials"} do
events.add_handler(field.."#text", function (event)
if revision then
revision[field] = event.text;
return true;
end
end);
end
local function handle_remark(event)
if revision and event.text and event.text:match("%S") then
table.insert(revision.remark, event.text);
end
end
events.add_handler("remark#text", handle_remark, 100);
events.add_handler("remark", function (event)
events.add_handler("p#text", handle_remark, 100);
end);
events.add_handler("remark/", function (event)
events.remove_handler("p#text", handle_remark);
end);
events.add_handler("revision", function (event)
revision = {remark={}};
return true;
end);
local revisions = {};
events.add_handler("revision/", function (event)
table.insert(revisions, revision);
meta.revision = revisions;
revision = nil;
return true;
end);
end
events.add_handler("date#text", function (event)
if meta and not meta.date then
meta.date = event.text;
end
end, 1);
events.add_handler("spec#text", function (event)
if not meta then return end
local kind = stack[#stack-1];
if meta[kind] then
table.insert(meta[kind], event.text);
else
meta[kind] = { event.text };
end
end);
events.add_handler("header/", function (event)
no_write = false;
if next(meta) ~= nil then
if meta.title and meta.number then
meta.title = "XEP-"..meta.number..": "..meta.title;
end
if have_yaml and yaml.dump then
output(yaml.dump({meta}));
else
print("% "..meta.title);
if type(meta.author) == "table" then
print("% "..table.concat(meta.author, "; "));
elseif meta.author then
print("% "..meta.author);
else
print("% ");
end
if meta.date then
print("% "..meta.date);
end
end
end
return true;
end);
for i = 1, 6 do
events.add_handler("section"..i, function ()
output("\n");
end, 10);
events.add_handler("section"..i.."/", function ()
output("\n");
return true;
end, 10);
events.add_handler("section"..i, function (event)
assert(event.attr.topic, "no @topic");
output(string.rep("#", i), " ", event.attr.topic);
if event.attr.anchor and event.attr.anchor ~= "" then
output(" {#", event.attr.anchor, "}\n")
else
output("\n");
end
return true;
end);
end
events.add_handler("section1", function (event)
output(event.attr.topic);
if event.attr.anchor and event.attr.anchor ~= "" then
output(" {#", event.attr.anchor, "}");
end
output("\n", string.rep("=", #event.attr.topic), "\n\n");
return true;
end, 1);
events.add_handler("section2", function (event)
output(event.attr.topic);
if event.attr.anchor and event.attr.anchor ~= "" then
output(" {#", event.attr.anchor, "}");
end
output("\n", string.rep("-", #event.attr.topic), "\n\n");
return true;
end, 1);
local function normalize_whitespace(event)
event.text = event.text:gsub("%s+", " ")
-- event.text = event.text:match("^%s*(.-)%s*$")
end
events.add_handler("p#text", normalize_whitespace, 10);
events.add_handler("li#text", normalize_whitespace, 10);
events.add_handler("dt#text", normalize_whitespace, 10);
events.add_handler("dd#text", normalize_whitespace, 10);
local example_count = 1;
events.add_handler("example", function (event)
output("\n#### Example ", example_count, ". ");
if event.attr.caption then
output(event.attr.caption, " ")
end
output("{#example-", example_count, " .unnumbered}\n\n")
example_count = example_count + 1;
output("``` {.xml .example}\n");
events.remove_handler("#text", escape_text);
end);
events.add_handler("example#text", function (event)
local example_text = event.text:match("^%s*(.-)%s*$");
output(example_text, "\n");
return true;
end);
events.add_handler("example/", function ()
events.add_handler("#text", escape_text, 1000);
output("```\n\n");
return true;
end);
events.add_handler("note", function (event)
output(" ^[");
return true;
end);
events.add_handler("note/", function (event)
output("]");
return true;
end);
-- TODO magically import citation data
events.add_handler("cite#text", function (event)
output("**", event.text, "**");
if meta.references then
local refid = event.text:gsub("%W", ""):lower();
if meta.references[refid] then
output("[@", refid, "]");
end
end
return true;
end);
local url;
events.add_handler("link", function (event)
url = event.attr.url;
if url then
output("[");
end
return true;
end);
events.add_handler("link/", function (event)
if url then
output("](", url, ")");
url = nil;
end
return true;
end);
local list_depth, list_type = 0, "ul";
events.add_handler("ul", function ()
list_depth = list_depth + 1;
list_type = "ul";
end);
events.add_handler("ul/", function (event)
local stack = event.stack;
list_depth = list_depth - 1;
for i = #stack, 1, -1 do
local element = stack[i]
if element == "ul" or element == "ol" then
list_type = element;
break;
end
end
return true;
end);
events.add_handler("li", function (event)
for i = 2, list_depth do
output(" ");
end
if list_type == "ul" then
output("- ");
elseif list_type == "ul" then
output("#. ");
end
return true;
end);
events.add_handler("li#text", function (event)
local text = event.text:gsub("%s+", " ");
output(text);
return true;
end);
events.add_handler("dd#text", function (event)
output("\n: ");
end);
events.add_handler("li/", print_empty_line, 1);
events.add_handler("ul", print_empty_line, 1);
events.add_handler("ul/", print_empty_line, 1);
events.add_handler("ol", print_empty_line, 1);
events.add_handler("ol/", print_empty_line, 1);
events.add_handler("p/", print_empty_line, 1);
events.add_handler("dd/", print_empty_line, 1);
local function printcell(event)
output("|");
end
events.add_handler("th", printcell, 1);
events.add_handler("td", printcell, 1);
events.add_handler("tr/", printcell, 3);
events.add_handler("tr", function () output(" ") end, 1);
events.add_handler("tr/", function () output("\n") end, 1);
local th;
events.add_handler("table", function () th = 0; end);
events.add_handler("table/", function () th = 0; end);
events.add_handler("th", function () if th then th = th + 1; end end);
events.add_handler("tr/", function () if th then output("\n");output(" |"); output(string.rep("---|", th)); th = nil end end, 2);
-- Non-example code blocks, like schemas
events.add_handler("code", function (event)
output("```xml\n");
events.remove_handler("#text", escape_text);
return true;
end);
events.add_handler("code#text", function (event)
local example_text = event.text:match("^%s*(.-)%s*$");
output(example_text, "\n");
return true;
end);
events.add_handler("code/", function ()
events.add_handler("#text", escape_text, 1000);
output("```\n\n");
return true;
end);
if meta.references then
events.add_handler("xep/", function ()
output("\n\n# References {#references}\n\n");
end);
end
if arg[1] == "--debug" then
events.add_wrapper(false, function (fire_event, event_name, event_data)
io.stderr:write("D: "..event_name.."\n");
io.stderr:write("D: /"..table.concat(event_data.stack, "/").."\n");
return fire_event(event_name, event_data);
end);
setmetatable(handler, {
__index = function (_, missinghandler)
io.stderr:write("D: Missing handler: "..missinghandler.."\n");
return function (parser, ...)
io.stderr:write("D: ", missinghandler, "(");
local count = select('#', ...);
for i = 1, count do
local arg = select(i, ...);
local arg_t = type(arg);
io.stderr:write(arg_t, ":");
if arg_t == "string" then
io.stderr:write(string.format("%q", arg));
else
io.stderr:write(tostring(arg));
end
if i ~= count then
io.stderr:write(", ");
end
end
io.stderr:write(")\n");
return "";
end;
end;
});
end
local parser = lxp.new(handler, "\1");
parser:setbase(".");
local function chunks(file, size)
return function ()
return file:read(size);
end
end
if not have_yaml then
io.stderr:write("lua-yaml missing, header metadata will be incomplete\n");
end
for chunk in chunks(io.stdin, 4096) do
local ok, err, line, col = parser:parse(chunk);
if not ok then
io.stderr:write("E: "..err.." on line "..line..", col "..col.."\n");
os.exit(1);
end
end
parser:close();