#!/usr/bin/env python # # svn-restore-dumps.py -- Load dumpfiles back into a subversion repository. # # ==================================================================== # Copyright (c) 2006-2009 CollabNet. All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. The terms # are also available at http://subversion.tigris.org/license-1.html. # If newer versions of this license are posted there, you may use a # newer version instead, at your option. # # This software consists of voluntary contributions made by many # individuals. For exact contribution history, see the revision # history and logs, available at http://subversion.tigris.org/. # ==================================================================== # # This script loads dump files from a subversion repository (like those created. # by svn-backup-dumps.py) - particularly intended for single incremental & # relative incremental # # # The basic operation modes are: # 1. Restore all dump files (revisions HEAD to latest). # # All dump files are prefixed with the basename of the repository. All # examples below assume that the repository '/srv/svn/repos/src' is # dumped so all dumpfiles start with 'src'. # # Optional functionality: # 2. Create the repository if it doesn't exist # __version = "0.3" import sys import os if os.name != "nt": import fcntl import select import gzip import os.path import re from optparse import OptionParser from subprocess import Popen, PIPE try: import bz2 have_bz2 = True except ImportError: have_bz2 = False class SvnRestoreInput: def __init__(self, abspath, filename): self.__filename = filename self.__absfilename = os.path.join(abspath, filename) def open(self): pass def read(self, size): pass def close(self): pass def get_filename(self): return self.__filename def get_absfilename(self): return self.__absfilename class SvnRestoreInputPlain(SvnRestoreInput): def __init__(self, abspath, filename): SvnRestoreInput.__init__(self, abspath, filename) def open(self): self.__ifd = open(self.get_absfilename(), "rb") def read(self, size): return self.__ifd.read(size) def close(self): self.__ifd.close() class SvnRestoreInputGzip(SvnRestoreInput): def __init__(self, abspath, filename): SvnRestoreInput.__init__(self, abspath, filename) def open(self): self.__compressor = gzip.GzipFile(filename=self.get_absfilename(), mode="rb") def read(self, size): return self.__compressor.read(size) def close(self): self.__compressor.close() class SvnRestoreInputBzip2(SvnRestoreInput): def __init__(self, abspath, filename): SvnRestoreInput.__init__(self, abspath, filename) def open(self): self.__decompressor = bz2.BZ2Decompressor() self.__ifd = open(self.get_absfilename(), "rb") def read(self, size): while True: data = self.__ifd.read(size) if len(data) == 0: break # end of file data = self.__decompressor.decompress(data) if len(data) > 0: break # got some data to return to the caller return data def close(self): self.__ifd.close() class SvnRestoreException(Exception): def __init__(self, errortext): self.errortext = errortext def __str__(self): return self.errortext class SvnRestore: def __init__(self, options, args): # need 3 args: progname, reposname, dumpdir if len(args) != 3: if len(args) < 3: raise SvnRestoreException("too few arguments, specify repospath and dumpdir.") else: raise SvnRestoreException("too many arguments, specify repospath and dumpdir only.") self.__repospath = args[1] self.__dumpdir = args[2] # set options self.__create = options.create self.__verbose = options.verbose # check repospath rpathparts = os.path.split(self.__repospath) if len(rpathparts[1]) == 0: # repospath without trailing slash self.__repospath = rpathparts[0] if not os.path.exists(self.__repospath): if not self.__create: raise SvnRestoreException("repos '%s' does not exist." % self.__repospath) rc = self.create_new_repository() if not rc: raise SvnRestoreException("cannot create repos '%s'." % self.__repospath) if not os.path.isdir(self.__repospath): raise SvnRestoreException("repos '%s' is not a directory." % self.__repospath) for subdir in [ "db", "conf", "hooks" ]: dir = os.path.join(self.__repospath, subdir) if not os.path.isdir(dir): raise SvnRestoreException("repos '%s' is not a repository." % self.__repospath) rpathparts = os.path.split(self.__repospath) self.__reposname = rpathparts[1] if self.__reposname in [ "", ".", ".." ]: raise SvnRestoreException("couldn't extract repos name from '%s'." % self.__repospath) # check dumpdir if not os.path.exists(self.__dumpdir): raise SvnRestoreException("dumpdir '%s' does not exist." % self.__dumpdir) elif not os.path.isdir(self.__dumpdir): raise SvnRestoreException("dumpdir '%s' is not a directory." % self.__dumpdir) def set_nonblock(self, fileobj): fd = fileobj.fileno() n = fcntl.fcntl(fd, fcntl.F_GETFL) fcntl.fcntl(fd, fcntl.F_SETFL, n|os.O_NONBLOCK) def exec_input_cmd(self, cmd, input): if os.name == "nt": return self.exec_input_cmd_nt(cmd, input) else: return self.exec_input_cmd_unix(cmd, input) def exec_input_cmd_unix(self, cmd, input): try: proc = Popen(cmd, stdin=PIPE, shell=False) except: return (256, "", "Popen failed (%s ...):\n %s" % (cmd[0], str(sys.exc_info()[1]))) stdin = proc.stdin self.set_nonblock(stdin) writefds = [ stdin ] selres = select.select([], writefds, []) while len(selres[1]) > 0: for fd in selres[1]: buf = input.read(16384) if len(buf) == 0: writefds.remove(fd) elif fd == stdin: fd.write(buf) if len(writefds) == 0: break selres = select.select([], writefds, []) stdin.close() rc = proc.wait() return rc def exec_input_cmd_nt(self, cmd, input): try: proc = Popen(cmd, stdin=PIPE, shell=False) except: return (256, "", "Popen failed (%s ...):\n %s" % (cmd[0], str(sys.exc_info()[1]))) stdin = proc.stdin buf = input.read(16384) while len(buf) > 0: stdin.write(buf) buf = input.read(16384) stdin.close() rc = proc.wait() return rc def exec_cmd(self, cmd, output=None, printerr=False): if os.name == "nt": return self.exec_cmd_nt(cmd, output, printerr) else: return self.exec_cmd_unix(cmd, output, printerr) def exec_cmd_unix(self, cmd, output=None, printerr=False): try: proc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=False) except: return (256, "", "Popen failed (%s ...):\n %s" % (cmd[0], str(sys.exc_info()[1]))) stdout = proc.stdout stderr = proc.stderr self.set_nonblock(stdout) self.set_nonblock(stderr) readfds = [ stdout, stderr ] selres = select.select(readfds, [], []) bufout = "" buferr = "" while len(selres[0]) > 0: for fd in selres[0]: buf = fd.read(16384) if len(buf) == 0: readfds.remove(fd) elif fd == stdout: if output: output.write(buf) else: bufout += buf else: if printerr: sys.stdout.write("%s " % buf) else: buferr += buf if len(readfds) == 0: break selres = select.select(readfds, [], []) rc = proc.wait() if printerr: print("") return (rc, bufout, buferr) def exec_cmd_nt(self, cmd, output=None, printerr=False): try: proc = Popen(cmd, stdout=PIPE, stderr=None, shell=False) except: return (256, "", "Popen failed (%s ...):\n %s" % (cmd[0], str(sys.exc_info()[1]))) stdout = proc.stdout bufout = "" buferr = "" buf = stdout.read(16384) while len(buf) > 0: if output: output.write(buf) else: bufout += buf buf = stdout.read(16384) rc = proc.wait() return (rc, bufout, buferr) def get_head_rev(self): cmd = [ "svnlook", "youngest", self.__repospath ] r = self.exec_cmd(cmd) if r[0] == 0 and len(r[2]) == 0: return int(r[1].strip()) else: print(r[2]) return -1 def create_new_repository(self): cmd = [ "svnadmin", "create", self.__repospath ] r = self.exec_cmd(cmd) rc = r[0] == 0 if not rc: print(r[2]) return rc class DumpInfo: def __init__(self, filename, rev_start, rev_end, file_extension): self.filename = filename self.rev_start = rev_start self.rev_end = rev_end self.file_extension = file_extension self.chain_forward = [] self.chain_backward = [] def scan_all_dump_files(self, headrev): prog = re.compile("(.+)\.(\d+)-(\d+)\.svndmp(\..*)?") lowest_dump_rev = 999999999 highest_dump_rev = -1 # dictionary with start rev as key, containing lists of dump infos dump_info_by_start = {} # dictionary with start/end rev tuple as key, containing dump infos dump_info_by_range = {} for filename in os.listdir(self.__dumpdir): m = prog.match( filename ) if not m: # a non-matching filename continue if m.group(1) != self.__reposname: if self.__verbose: print("Ignoring dump file '%s' - this is for another repos" % filename) continue rev_start = int(m.group(2)) rev_end = int(m.group(3)) if rev_start < headrev or (rev_start == headrev and headrev != 0): if self.__verbose: print("Ignoring dump file '%s' - this contains older revisions" % filename) continue dump_ext = None if m.group(4): if m.group(4)[0] == ".": dump_ext = m.group(4)[1:] # grab the filename extension dump_info = SvnRestore.DumpInfo( filename, rev_start, rev_end, dump_ext ) if (rev_start, rev_end) in dump_info_by_range: if self.__verbose: print("Ignoring dump file '%s' - duplicate revision range" % filename) continue # add infos dump_info_by_range[(rev_start, rev_end)] = dump_info if rev_start in dump_info_by_start: dump_info_by_start[rev_start].append( dump_info ) else: dump_info_by_start[rev_start] = [ dump_info ] if rev_start < lowest_dump_rev: lowest_dump_rev = rev_start if rev_end > highest_dump_rev: highest_dump_rev = rev_end if highest_dump_rev == -1: raise SvnRestoreException("There are no matching dump files in this directory") for dump_info in dump_info_by_range.values(): # link infos next_start = dump_info.rev_end+1 if next_start in dump_info_by_start: for next_info in dump_info_by_start[next_start]: dump_info.chain_forward.append( next_info ) next_info.chain_backward.append( dump_info ) # mark start/end of chain if dump_info.rev_start == lowest_dump_rev: dump_info.chain_backward = None if dump_info.rev_end == highest_dump_rev: dump_info.chain_forward = None # Remove infos with missing forward/backward connection until # no infos have been removed. What remains is either a directed # graph from HEAD+1 to the highest revision, or nothing. infos_removed = True while infos_removed: to_remove = [] for dump_key, dump_info in dump_info_by_range.items(): forward_empty = dump_info.chain_forward != None and len(dump_info.chain_forward) == 0 backward_empty = dump_info.chain_backward != None and len(dump_info.chain_backward) == 0 if forward_empty or backward_empty: to_remove.append( dump_key ) if dump_info.chain_forward != None: for next_info in dump_info.chain_forward: next_info.chain_backward.remove( dump_info ) if dump_info.chain_backward != None: for prev_info in dump_info.chain_backward: prev_info.chain_forward.remove( dump_info ) infos_removed = len(to_remove) > 0 for dump_key in to_remove: dump_info = dump_info_by_range[dump_key] if self.__verbose: print("Ignoring dump file '%s' - single or no connection in chain" % dump_info.filename) dump_info_by_start[dump_info.rev_start].remove( dump_info ) del dump_info_by_range[dump_key] if len(dump_info_by_start) == 0: raise SvnRestoreException("Chain is empty after removing files") # build list of needed infos dump_info_list = [] chain_forward = dump_info_by_start[lowest_dump_rev] while chain_forward != None: # find dump file with highest end revision dump_info = chain_forward[0] for info in chain_forward[1:]: if info.rev_end > dump_info.rev_end: dump_info = info # add it to the list dump_info_list.append( dump_info ) del dump_info_by_range[(dump_info.rev_start, dump_info.rev_end)] chain_forward = dump_info.chain_forward if self.__verbose: for dump_info in dump_info_by_range.values(): print("Ignoring dump file '%s' - redundant sub-chain" % dump_info.filename) return dump_info_list def load_dumps(self): headrev = self.get_head_rev() if headrev == -1: return False dump_info_list = self.scan_all_dump_files(headrev) if self.__verbose: print("Repository revision is %s" % headrev) for dump_info in dump_info_list: if self.__verbose: print("Processing dump file %s, for rev %s" % (dump_info.filename, dump_info.rev_start)) input = None if dump_info.file_extension: if dump_info.file_extension == "gz": input = SvnRestoreInputGzip(self.__dumpdir, dump_info.filename) elif dump_info.file_extension == "bz2": if not have_bz2: raise SvnRestoreException("bz2 decompression unavailable on this system") input = SvnRestoreInputBzip2(self.__dumpdir, dump_info.filename) else: raise SvnRestoreException("unsupported extension for dump_file '%s'." % dump_info.filename) else: input = SvnRestoreInputPlain(self.__dumpdir, dump_info.filename) cmd = [ "svnadmin", "load", self.__repospath ] input.open() rc = self.exec_input_cmd(cmd, input) input.close() if rc: return False return True if __name__ == "__main__": usage = "usage: svn-restore-dumps.py [options] repospath dumpdir" parser = OptionParser(usage=usage, version="%prog "+__version) parser.add_option("-c", action="store_true", dest="create", default=False, help="create repository if it doesn't exist.") parser.add_option("-v", action="store_true", dest="verbose", default=False, help="verbose logging.") (options, args) = parser.parse_args(sys.argv) rc = False try: restore = SvnRestore(options, args) rc = restore.load_dumps() except SvnRestoreException, e: print("svn-restore-dumps.py: %s" % e) if rc: print("Everything OK.") sys.exit(0) else: print("An error occured!") sys.exit(1)