import calendar
import sys
import time
import xml.dom.minidom as mini
class C:
TIMEFMT = "%Y-%m-%dT%H:%M:%S+00:00"
EMPTYFEED = """\
<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns:yt="http://www.youtube.com/xml/schemas/2015" xmlns:media="http://search.yahoo.com/mrss/" xmlns="http://www.w3.org/2005/Atom">
<title>merged feed</title>
<author>
<name>comrade</name>
</author>
</feed>"""
def nodetext (node):
out = []
for n in node.childNodes:
if n.nodeType == n.TEXT_NODE:
out.append (n.data)
return ''.join (out)
def parsetime (s):
t = time.strptime (s, C.TIMEFMT)
return calendar.timegm (t)
def entrytime (entry):
nodes = entry.getElementsByTagName ("updated")
if nodes:
return parsetime (nodetext (nodes [0]).strip ())
nodes = entry.getElementsByTagName ("published")
if nodes:
return parsetime (nodetext (nodes [0]).strip ())
return 0
def merge_docs (docs):
merged = mini.parseString (C.EMPTYFEED)
mfeed = merged.documentElement
all = []
for d in docs:
entries = d.documentElement.getElementsByTagName ("entry")
all.extend (entries)
_et = entrytime
keys = [(_et (e), k) for k, e in enumerate (all)]
keys.sort (reverse = True)
for sec, k in keys:
mfeed.appendChild (all [k])
return merged
def main_merge (paths):
docs = [mini.parse (p) for p in paths]
merged = merge_docs (docs)
out = "merged.xml"
with open (out, "w") as f:
merged.writexml (f,
indent = "",
addindent = "",
newl = "",
encoding = "UTF-8")
merged.unlink ()
for d in docs:
d.unlink ()
def main ():
main_merge (sys.argv [1:])
if __name__ == "__main__": main ()