mirror of
https://github.com/moparisthebest/xeps
synced 2024-11-21 08:45:04 -05:00
tooling: Tool to extract a XEP metadata list from the repository
This commit is contained in:
parent
a4f01fabf3
commit
1a679824b2
211
tools/extract-metadata.py
Executable file
211
tools/extract-metadata.py
Executable file
@ -0,0 +1,211 @@
|
||||
#!/usr/bin/env python3
|
||||
import pathlib
|
||||
import xml.dom.minidom
|
||||
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
|
||||
DESCRIPTION = """\
|
||||
Extract a list of XEPs with metadata from the xeps repository."""
|
||||
|
||||
EPILOG = """"""
|
||||
|
||||
|
||||
def open_xml(f):
|
||||
return xml.dom.minidom.parse(f)
|
||||
|
||||
|
||||
def find_child(elem, child_tag):
|
||||
for child in elem.childNodes:
|
||||
if hasattr(child, "tagName") and child.tagName == child_tag:
|
||||
return child
|
||||
return None
|
||||
|
||||
|
||||
def find_header(document):
|
||||
header = find_child(document.documentElement, "header")
|
||||
if header is None:
|
||||
raise ValueError("cannot find <header/>")
|
||||
return header
|
||||
|
||||
|
||||
def get_text(elem):
|
||||
return "".join(
|
||||
child.nodeValue
|
||||
for child in elem.childNodes
|
||||
if isinstance(child, (xml.dom.minidom.Text,
|
||||
xml.dom.minidom.CDATASection))
|
||||
)
|
||||
|
||||
|
||||
def children(elem):
|
||||
return [
|
||||
child for child in elem.childNodes
|
||||
if isinstance(child, (xml.dom.minidom.Element))
|
||||
]
|
||||
|
||||
|
||||
def extract_xep_metadata(document):
|
||||
header = find_header(document)
|
||||
|
||||
latest_revision = find_child(header, "revision")
|
||||
if latest_revision is not None:
|
||||
last_revision_version = get_text(find_child(latest_revision, "version"))
|
||||
last_revision_date = get_text(find_child(latest_revision, "date"))
|
||||
remark_el = find_child(latest_revision, "remark")
|
||||
last_revision_remark = None
|
||||
if remark_el is not None:
|
||||
remark_children = children(remark_el)
|
||||
if len(remark_children) == 1 and remark_children[0].tagName == "p":
|
||||
last_revision_remark = get_text(remark_children[0])
|
||||
|
||||
if last_revision_remark is not None:
|
||||
initials_el = find_child(latest_revision, "initials")
|
||||
last_revision_initials = initials_el and get_text(initials_el)
|
||||
else:
|
||||
last_revision_initials = None
|
||||
else:
|
||||
last_revision_version = None
|
||||
last_revision_date = None
|
||||
last_revision_remark = None
|
||||
last_revision_initials = None
|
||||
|
||||
status = get_text(find_child(header, "status"))
|
||||
type_ = get_text(find_child(header, "type"))
|
||||
abstract = " ".join(get_text(find_child(header, "abstract")).split())
|
||||
sig_el = find_child(header, "sig")
|
||||
if sig_el is None:
|
||||
sig = None
|
||||
else:
|
||||
sig = get_text(sig_el)
|
||||
shortname = get_text(find_child(header, "shortname"))
|
||||
if shortname.replace("-", " ").replace("_", " ").lower() in [
|
||||
"not yet assigned", "n/a", "none", "to be assigned",
|
||||
"to be issued"]:
|
||||
shortname = None
|
||||
title = get_text(find_child(header, "title"))
|
||||
|
||||
approver_el = find_child(header, "approver")
|
||||
if approver_el is not None:
|
||||
approver = get_text(approver_el)
|
||||
else:
|
||||
approver = "Board" if type_ == "Procedural" else "Council"
|
||||
|
||||
return {
|
||||
"last_revision": {
|
||||
"version": last_revision_version,
|
||||
"date": last_revision_date,
|
||||
"initials": last_revision_initials,
|
||||
"remark": last_revision_remark,
|
||||
},
|
||||
"status": status,
|
||||
"type": type_,
|
||||
"sig": sig,
|
||||
"abstract": abstract,
|
||||
"shortname": shortname,
|
||||
"title": title,
|
||||
"approver": approver,
|
||||
}
|
||||
|
||||
|
||||
def text_element(tag, text):
|
||||
el = etree.Element(tag)
|
||||
el.text = text
|
||||
return el
|
||||
|
||||
|
||||
def make_metadata_element(number, metadata, accepted, *, protoname=None):
|
||||
result = etree.Element("xep")
|
||||
result.append(text_element("number", number))
|
||||
result.append(text_element("title", metadata["title"]))
|
||||
result.append(text_element("abstract", metadata["abstract"]))
|
||||
result.append(text_element("type", metadata["type"]))
|
||||
result.append(text_element("status", metadata["status"]))
|
||||
result.append(text_element("approver", metadata["approver"]))
|
||||
|
||||
if metadata["shortname"] is not None:
|
||||
result.append(text_element("shortname", metadata["shortname"]))
|
||||
|
||||
if metadata["last_revision"]["version"] is not None:
|
||||
last_revision = metadata["last_revision"]
|
||||
revision_el = etree.Element("last-revision")
|
||||
revision_el.append(text_element("date", last_revision["date"]))
|
||||
revision_el.append(text_element("version", last_revision["version"]))
|
||||
if last_revision["initials"]:
|
||||
revision_el.append(text_element("initials",
|
||||
last_revision["initials"]))
|
||||
if last_revision["remark"]:
|
||||
revision_el.append(text_element("remark",
|
||||
last_revision["remark"]))
|
||||
result.append(revision_el)
|
||||
|
||||
if metadata["sig"] is not None:
|
||||
result.append(
|
||||
text_element("sig", metadata["sig"])
|
||||
)
|
||||
|
||||
if accepted:
|
||||
result.set("accepted", "true")
|
||||
else:
|
||||
result.set("accepted", "false")
|
||||
|
||||
if protoname is not None:
|
||||
result.append(text_element("proto-name", protoname))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description=DESCRIPTION,
|
||||
epilog=EPILOG,
|
||||
)
|
||||
parser.add_argument(
|
||||
"xepdir",
|
||||
nargs="?",
|
||||
type=pathlib.Path,
|
||||
default=pathlib.Path.cwd(),
|
||||
help="Directory where the XEP XMLs are. Defaults to current directory."
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
tree = etree.Element("xep-infos")
|
||||
|
||||
for xepfile in args.xepdir.glob("xep-*.xml"):
|
||||
number = xepfile.name.split("-", 1)[1].split(".", 1)[0]
|
||||
try:
|
||||
number = str(int(number))
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
with xepfile.open("rb") as f:
|
||||
parsed = open_xml(f)
|
||||
|
||||
tree.append(make_metadata_element(
|
||||
number,
|
||||
extract_xep_metadata(parsed),
|
||||
True,
|
||||
))
|
||||
|
||||
for xepfile in (args.xepdir / "inbox").glob("*.xml"):
|
||||
protoname = xepfile.name.rsplit(".", 1)[0]
|
||||
|
||||
with xepfile.open("rb") as f:
|
||||
parsed = open_xml(f)
|
||||
|
||||
tree.append(make_metadata_element(
|
||||
"xxxx",
|
||||
extract_xep_metadata(parsed),
|
||||
False,
|
||||
protoname=protoname
|
||||
))
|
||||
|
||||
sys.stdout.buffer.raw.write(etree.tostring(tree))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue
Block a user