#!/usr/bin/python -tt
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Copyright 2006 Duke University
from yum.misc import cElementTree_iterparse as iterparse
from yum.misc import _available_compression, stat_f
from Errors import RepoMDError
import sys
import types
from misc import AutoFileChecksums, to_xml
def ns_cleanup(qn):
if qn.find('}') == -1: return qn
return qn.split('}')[1]
class RepoData:
"""represents anything beneath a <data> tag"""
def __init__(self, elem=None):
self.type = None
if elem:
self.type = elem.attrib.get('type')
self.location = (None, None)
self.checksum = (None,None) # type,value
self.openchecksum = (None,None) # type,value
self.timestamp = None
self.dbversion = None
self.size = None
self.opensize = None
self.deltas = []
if elem:
self.parse(elem)
def parse(self, elem):
for child in elem:
child_name = ns_cleanup(child.tag)
if child_name == 'location':
relative = child.attrib.get('href')
base = child.attrib.get('base')
self.location = (base, relative)
elif child_name == 'checksum':
csum_value = child.text
csum_type = child.attrib.get('type')
self.checksum = (csum_type,csum_value)
elif child_name == 'open-checksum':
csum_value = child.text
csum_type = child.attrib.get('type')
self.openchecksum = (csum_type, csum_value)
elif child_name == 'timestamp':
self.timestamp = child.text
elif child_name == 'database_version':
self.dbversion = child.text
elif child_name == 'size':
self.size = child.text
elif child_name == 'open-size':
self.opensize = child.text
elif child_name == 'delta':
delta = RepoData(child)
delta.type = self.type
self.deltas.append(delta)
def dump_xml(self):
msg = ""
top = """<data type="%s">\n""" % to_xml(self.type, attrib=True)
msg += top
for (data, xmlname) in [('checksum', 'checksum'),('openchecksum', 'open-checksum')]:
if hasattr(self, data):
val = getattr(self, data)
if val[0]:
d_xml = """ <%s type="%s">%s</%s>\n""" % (xmlname,
to_xml(val[0], attrib=True),
to_xml(val[1]), xmlname)
msg += d_xml
if hasattr(self, 'location'):
val = getattr(self, 'location')
if val[1]:
loc = """ <location href="%s"/>\n""" % to_xml(val[1], attrib=True)
if val[0]:
loc = """ <location xml:base="%s" href="%s"/>\n""" % (
to_xml(val[0], attrib=True), to_xml(val[1], attrib=True))
msg += loc
for (data,xmlname) in [('timestamp', 'timestamp'),
('dbversion', 'database_version'),
('size','size'), ('opensize', 'open-size')]:
val = getattr(self, data)
if val:
d_xml = """ <%s>%s</%s>\n""" % (xmlname, to_xml(val),
xmlname)
msg += d_xml
for delta in self.deltas:
# change tag to "delta" and increase indent
body = '\n '.join(delta.dump_xml().split('\n')[1:-2])
msg += ' <delta>\n %s\n </delta>\n' % body
bottom = """</data>\n"""
msg += bottom
return msg
def getDelta(self, old_timestamp):
old_timestamp = int(old_timestamp)
for deltamd in self.deltas:
if int(deltamd.timestamp) <= old_timestamp:
return deltamd
class RepoMD:
"""represents the repomd xml file"""
def __init__(self, repoid, srcfile=None):
"""takes a repoid and a filename for the repomd.xml"""
self.timestamp = 0
self.repoid = repoid
self.repoData = {}
self.checksums = {}
self.length = 0
self.revision = None
self.tags = {'content' : set(), 'distro' : {}, 'repo': set()}
if srcfile:
self.parse(srcfile)
def parse(self, srcfile):
if type(srcfile) in types.StringTypes:
# srcfile is a filename string
try:
infile = open(srcfile, 'rt')
except IOError:
raise RepoMDError, "Unable to open %s" %(srcfile,)
else:
# srcfile is a file object
infile = srcfile
srcfile = None
# We trust any of these to mean the repomd.xml is valid.
infile = AutoFileChecksums(infile, ['sha256', 'sha512'],
ignore_missing=True, ignore_none=True)
parser = iterparse(infile)
try:
for event, elem in parser:
elem_name = ns_cleanup(elem.tag)
if elem_name == "data":
thisdata = RepoData(elem=elem)
old = self.repoData.get(thisdata.type)
if (old and old.size and old.size < thisdata.size
and old.location[1].rsplit('.', 1)[1] in _available_compression
and srcfile and stat_f(srcfile.rsplit('/', 1)[0] +'/'+
thisdata.location[1].rsplit('/', 1)[1]) is None):
# previous is smaller, can unzip it, and next is not cached
thisdata = old
self.repoData[thisdata.type] = thisdata
try:
# NOTE: This will fail on float timestamps, this is
# required for compatability. Fix is to not generate
# float timestamps in repomd.xml.
nts = int(thisdata.timestamp)
if nts > self.timestamp: # max() not in old python
self.timestamp = nts
except:
pass
elif elem_name == "revision":
self.revision = elem.text
elif elem_name == "tags":
for child in elem:
child_name = ns_cleanup(child.tag)
if child_name == 'content':
self.tags['content'].add(child.text)
if child_name == 'distro':
cpeid = child.attrib.get('cpeid', '')
distro = self.tags['distro'].setdefault(cpeid,set())
distro.add(child.text)
self.checksums = infile.checksums.hexdigests()
self.length = len(infile.checksums)
except SyntaxError, e:
raise RepoMDError, "Damaged repomd.xml file"
def fileTypes(self):
"""return list of metadata file types available"""
return self.repoData.keys()
def getData(self, type):
if type in self.repoData:
return self.repoData[type]
else:
raise RepoMDError, "requested datatype %s not available" % type
def dump(self):
"""dump fun output"""
print "file timestamp: %s" % self.timestamp
print "file length : %s" % self.length
for csum in sorted(self.checksums):
print "file checksum : %s/%s" % (csum, self.checksums[csum])
if self.revision is not None:
print 'revision: %s' % self.revision
if self.tags['content']:
print 'tags content: %s' % ", ".join(sorted(self.tags['content']))
if self.tags['distro']:
for distro in sorted(self.tags['distro']):
print 'tags distro: %s' % distro
tags = self.tags['distro'][distro]
print ' tags: %s' % ", ".join(sorted(tags))
print '\n---- Data ----'
for ft in sorted(self.fileTypes()):
thisdata = self.repoData[ft]
print ' datatype: %s' % thisdata.type
print ' location : %s %s' % thisdata.location
print ' timestamp : %s' % thisdata.timestamp
print ' size : %s' % thisdata.size
print ' open size : %s' % thisdata.opensize
print ' checksum : %s - %s' % thisdata.checksum
print ' open checksum: %s - %s' % thisdata.openchecksum
print ' dbversion : %s' % thisdata.dbversion
print ''
def dump_xml(self):
msg = ""
top = """<?xml version="1.0" encoding="UTF-8"?>
<repomd xmlns="http://linux.duke.edu/metadata/repo" xmlns:rpm="http://linux.duke.edu/metadata/rpm">\n"""
msg += top
if self.revision:
rev = """ <revision>%s</revision>\n""" % to_xml(self.revision)
msg += rev
if self.tags['content'] or self.tags['distro'] or self.tags['repo']:
tags = """ <tags>\n"""
for item in self.tags['content']:
tag = """ <content>%s</content>\n""" % (to_xml(item))
tags += tag
for item in self.tags['repo']:
tag = """ <repo>%s</repo>\n""" % (to_xml(item))
tags += tag
distro = self.tags['distro']
if isinstance(distro, dict):
lst = []
for cpeid in sorted(distro):
for item in sorted(distro[cpeid]):
lst.append((cpeid, item))
distro = lst
for (cpeid, item) in distro:
if cpeid:
tag = """ <distro cpeid="%s">%s</distro>\n""" % (
to_xml(cpeid, attrib=True), to_xml(item))
else:
tag = """ <distro>%s</distro>\n""" % (to_xml(item))
tags += tag
tags += """ </tags>\n"""
msg += tags
for md in self.repoData.values():
msg += md.dump_xml()
msg += """</repomd>\n"""
return msg
def main():
try:
print "file : %s" % sys.argv[1]
p = RepoMD('repoid', sys.argv[1])
p.dump()
except IOError:
print >> sys.stderr, "newcomps.py: No such file:\'%s\'" % sys.argv[1]
sys.exit(1)
if __name__ == '__main__':
main()
|