#!/usr/bin/env python # # svn-restore-dumps.py -- Load dumpfiles back into a subversion repository. # # ==================================================================== # Copyright (c) 2006-2009 CollabNet. All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at http://subversion.tigris.org/license-1.html. # If newer versions of this license are posted there, you may use a # newer version instead, at your option. # # This software consists of voluntary contributions made by many # individuals. For exact contribution history, see the revision # history and logs, available at http://subversion.tigris.org/. # ==================================================================== # # This script loads dump files from a subversion repository (like those created. # by svn-backup-dumps.py) - particularly intended for single incremental & # relative incremental # # # The basic operation modes are: # 1. Restore all dump files (revisions HEAD to latest). # # All dump files are prefixed with the basename of the repository. All # examples below assume that the repository '/srv/svn/repos/src' is # dumped so all dumpfiles start with 'src'. # # Optional functionality: # 2. Create the repository if it doesn't exist # __version = "0.3" import sys import os if os.name != "nt": import fcntl import select import gzip import os.path import re from optparse import OptionParser from subprocess import Popen, PIPE try: import bz2 have_bz2 = True except ImportError: have_bz2 = False class SvnRestoreInput: def __init__(self, abspath, filename): self.__filename = filename self.__absfilename = os.path.join(abspath, filename) def open(self): pass def read(self, size): pass def close(self): pass def get_filename(self): return self.__filename def get_absfilename(self): return self.__absfilename class SvnRestoreInputPlain(SvnRestoreInput): def __init__(self, abspath, filename): SvnRestoreInput.__init__(self, abspath, filename) def open(self): self.__ifd = open(self.get_absfilename(), "rb") def read(self, size): return self.__ifd.read(size) def close(self): self.__ifd.close() class SvnRestoreInputGzip(SvnRestoreInput): def __init__(self, abspath, filename): SvnRestoreInput.__init__(self, abspath, filename) def open(self): self.__compressor = gzip.GzipFile(filename=self.get_absfilename(), mode="rb") def read(self, size): return self.__compressor.read(size) def close(self): self.__compressor.close() class SvnRestoreInputBzip2(SvnRestoreInput): def __init__(self, abspath, filename): SvnRestoreInput.__init__(self, abspath, filename) def open(self): self.__decompressor = bz2.BZ2Decompressor() self.__ifd = open(self.get_absfilename(), "rb") def read(self, size): while True: data = self.__ifd.read(size) if len(data) == 0: break # end of file data = self.__decompressor.decompress(data) if len(data) > 0: break # got some data to return to the caller return data def close(self): self.__ifd.close() class SvnRestoreException(Exception): def __init__(self, errortext): self.errortext = errortext def __str__(self): return self.errortext class SvnRestore: def __init__(self, options, args): # need 3 args: progname, reposname, dumpdir if len(args) != 3: if len(args) < 3: raise SvnRestoreException("too few arguments, specify repospath and dumpdir.") else: raise SvnRestoreException("too many arguments, specify repospath and dumpdir only.") self.__repospath = args[1] self.__dumpdir = args[2] # set options self.__create = options.create self.__verbose = options.verbose # check repospath rpathparts = os.path.split(self.__repospath) if len(rpathparts[1]) == 0: # repospath without trailing slash self.__repospath = rpathparts[0] if not os.path.exists(self.__repospath): if not self.__create: raise SvnRestoreException("repos '%s' does not exist." % self.__repospath) rc = self.create_new_repository() if not rc: raise SvnRestoreException("cannot create repos '%s'." % self.__repospath) if not os.path.isdir(self.__repospath): raise SvnRestoreException("repos '%s' is not a directory." % self.__repospath) for subdir in [ "db", "conf", "hooks" ]: dir = os.path.join(self.__repospath, subdir) if not os.path.isdir(dir): raise SvnRestoreException("repos '%s' is not a repository." % self.__repospath) rpathparts = os.path.split(self.__repospath) self.__reposname = rpathparts[1] if self.__reposname in [ "", ".", ".." ]: raise SvnRestoreException("couldn't extract repos name from '%s'." % self.__repospath) # check dumpdir if not os.path.exists(self.__dumpdir): raise SvnRestoreException("dumpdir '%s' does not exist." % self.__dumpdir) elif not os.path.isdir(self.__dumpdir): raise SvnRestoreException("dumpdir '%s' is not a directory." % self.__dumpdir) def set_nonblock(self, fileobj): fd = fileobj.fileno() n = fcntl.fcntl(fd, fcntl.F_GETFL) fcntl.fcntl(fd, fcntl.F_SETFL, n|os.O_NONBLOCK) def exec_input_cmd(self, cmd, input): if os.name == "nt": return self.exec_input_cmd_nt(cmd, input) else: return self.exec_input_cmd_unix(cmd, input) def exec_input_cmd_unix(self, cmd, input): try: proc = Popen(cmd, stdin=PIPE, shell=False) except: return (256, "", "Popen failed (%s ...):\n %s" % (cmd[0], str(sys.exc_info()[1]))) stdin = proc.stdin self.set_nonblock(stdin) writefds = [ stdin ] selres = select.select([], writefds, []) while len(selres[1]) > 0: for fd in selres[1]: buf = input.read(16384) if len(buf) == 0: writefds.remove(fd) elif fd == stdin: fd.write(buf) if len(writefds) == 0: break selres = select.select([], writefds, []) stdin.close() rc = proc.wait() return rc def exec_input_cmd_nt(self, cmd, input): try: proc = Popen(cmd, stdin=PIPE, shell=False) except: return (256, "", "Popen failed (%s ...):\n %s" % (cmd[0], str(sys.exc_info()[1]))) stdin = proc.stdin buf = input.read(16384) while len(buf) > 0: stdin.write(buf) buf = input.read(16384) stdin.close() rc = proc.wait() return rc def exec_cmd(self, cmd, output=None, printerr=False): if os.name == "nt": return self.exec_cmd_nt(cmd, output, printerr) else: return self.exec_cmd_unix(cmd, output, printerr) def exec_cmd_unix(self, cmd, output=None, printerr=False): try: proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=False) except: return (256, "", "Popen failed (%s ...):\n %s" % (cmd[0], str(sys.exc_info()[1]))) stdout = proc.stdout stderr = proc.stderr self.set_nonblock(stdout) self.set_nonblock(stderr) readfds = [ stdout, stderr ] selres = select.select(readfds, [], []) bufout = "" buferr = "" while len(selres[0]) > 0: for fd in selres[0]: buf = fd.read(16384) if len(buf) == 0: readfds.remove(fd) elif fd == stdout: if output: output.write(buf) else: bufout += buf else: if printerr: sys.stdout.write("%s " % buf) else: buferr += buf if len(readfds) == 0: break selres = select.select(readfds, [], []) rc = proc.wait() if printerr: print("") return (rc, bufout, buferr) def exec_cmd_nt(self, cmd, output=None, printerr=False): try: proc = Popen(cmd, stdout=PIPE, stderr=None, shell=False) except: return (256, "", "Popen failed (%s ...):\n %s" % (cmd[0], str(sys.exc_info()[1]))) stdout = proc.stdout bufout = "" buferr = "" buf = stdout.read(16384) while len(buf) > 0: if output: output.write(buf) else: bufout += buf buf = stdout.read(16384) rc = proc.wait() return (rc, bufout, buferr) def get_head_rev(self): cmd = [ "svnlook", "youngest", self.__repospath ] r = self.exec_cmd(cmd) if r[0] == 0 and len(r[2]) == 0: return int(r[1].strip()) else: print(r[2]) return -1 def create_new_repository(self): cmd = [ "svnadmin", "create", self.__repospath ] r = self.exec_cmd(cmd) rc = r[0] == 0 if not rc: print(r[2]) return rc class DumpInfo: def __init__(self, filename, rev_end, file_extension): self.filename = filename self.rev_end = rev_end self.file_extension = file_extension def scan_all_dump_files(self): prog = re.compile("(.+)\.(\d+)-(\d+)\.svndmp(.*)") highest_dump_rev = -1 dump_info_table = dict() # keyed on the rev_start for filename in os.listdir(self.__dumpdir): m = prog.match( filename ) if not m: # a non-matching filename continue if m.group(1) != self.__reposname: if self.__verbose: print("Ignoring dump file '%s' - this is for another repos" % filename) continue rev_start = int(m.group(2)) rev_end = int(m.group(3)) dump_ext = None if m.group(4): if m.group(4)[0] == ".": dump_ext = m.group(4)[1:] # grab the filename extension dump_info = SvnRestore.DumpInfo( filename, rev_end, dump_ext ) if rev_start in dump_info_table: # only overwrite if a larger range of revisions is covered if rev_end <= dump_info_table[rev_start].rev_end: if (rev_end == dump_info_table[rev_start].rev_end) and self.__verbose: # this would indicate multiple 'matching' files - each with a unique extension print("Ignoring dump file '%s' for duplicate revision range %s-%s" % (filename, rev_start, rev_end)) continue # add or overwrite the existing entry dump_info_table[rev_start] = dump_info if rev_end > highest_dump_rev: highest_dump_rev = rev_end if highest_dump_rev == -1: raise SvnRestoreException("There are no matching dump files in this directory") return dump_info_table, highest_dump_rev def load_dumps(self): dump_info_table, highest_dump_rev = self.scan_all_dump_files() headrev = self.get_head_rev() if headrev == -1: return False if self.__verbose: print("Repository revision is %s" % headrev) if headrev > highest_dump_rev: raise SvnRestoreException("The repository revision (%s) is later than the highest dump (%s)" % (headrev, highest_dump_rev)) if headrev == 0: # special case to ensure revision 0 is loaded against the repository revision_required = 0 else: revision_required = headrev + 1 while revision_required <= highest_dump_rev: try: dump_info = dump_info_table[revision_required] except KeyError: # must be a gap in the dumps - we should be able to go up to highest_dump_rev raise SvnRestoreException("Cannot find dump file for revision %s yet later dumps exist" % revision_required) if self.__verbose: print("Processing dump file %s, for rev %s" % (dump_info.filename, revision_required)) input = None if dump_info.file_extension: if dump_info.file_extension == "gz": input = SvnRestoreInputGzip(self.__dumpdir, dump_info.filename) elif dump_info.file_extension == "bz2": if not have_bz2: raise SvnRestoreException("bz2 decompression unavailable on this system") input = SvnRestoreInputBzip2(self.__dumpdir, dump_info.filename) else: raise SvnRestoreException("unsupported extension for dump_file '%s'." % dump_info.filename) else: input = SvnRestoreInputPlain(self.__dumpdir, dump_info.filename) cmd = [ "svnadmin", "load", self.__repospath ] input.open() rc = self.exec_input_cmd(cmd, input) input.close() if rc: return False revision_required = dump_info.rev_end + 1 return True if __name__ == "__main__": usage = "usage: svn-restore-dumps.py [options] repospath dumpdir" parser = OptionParser(usage=usage, version="%prog "+__version) parser.add_option("-c", action="store_true", dest="create", default=False, help="create repository if it doesn't exist.") parser.add_option("-v", action="store_true", dest="verbose", default=False, help="verbose logging.") (options, args) = parser.parse_args(sys.argv) rc = False try: restore = SvnRestore(options, args) rc = restore.load_dumps() except SvnRestoreException, e: print("svn-restore-dumps.py: %s" % e) if rc: print("Everything OK.") sys.exit(0) else: print("An error occured!") sys.exit(1)