import calendar import sys import time import xml.dom.minidom as mini class C: TIMEFMT = "%Y-%m-%dT%H:%M:%S+00:00" EMPTYFEED = """\ merged feed comrade """ def nodetext (node): out = [] for n in node.childNodes: if n.nodeType == n.TEXT_NODE: out.append (n.data) return ''.join (out) def parsetime (s): t = time.strptime (s, C.TIMEFMT) return calendar.timegm (t) def entrytime (entry): nodes = entry.getElementsByTagName ("published") if nodes: return parsetime (nodetext (nodes [0]).strip ()) return 0 def merge_docs (docs): merged = mini.parseString (C.EMPTYFEED) mfeed = merged.documentElement all = [] for d in docs: entries = d.documentElement.getElementsByTagName ("entry") all.extend (entries) _et = entrytime keys = [(_et (e), k) for k, e in enumerate (all)] keys.sort (reverse = True) for sec, k in keys: mfeed.appendChild (all [k]) return merged def main_merge (paths): docs = [mini.parse (p) for p in paths] merged = merge_docs (docs) out = "merged.xml" with open (out, "w") as f: merged.writexml (f, indent = "", addindent = "", newl = "", encoding = "UTF-8") merged.unlink () for d in docs: d.unlink () def main (): main_merge (sys.argv [1:]) if __name__ == "__main__": main ()