#!/usr/bin/env python2.4

# $Id: svn-fast-backup 8631 2005-08-27 02:06:21Z quarl $

# svn-fast-backup: use rsync snapshots for very fast FSFS repository backup.
#    Multiple FSFS backups share data via hardlinks, meaning old backups are
#    almost free, since a newer revision of a repository is almost a complete
#    superset of an older revision.

## quarl 2005-08-17 initial version

# Originally based on svn-hot-backup.py, whose copyright notice states:

# ====================================================================
# Copyright (c) 2000-2004 CollabNet.  All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution.  The terms
# are also available at http://subversion.tigris.org/license-1.html.
# If newer versions of this license are posted there, you may use a
# newer version instead, at your option.
#
# This software consists of voluntary contributions made by many
# individuals.  For exact contribution history, see the revision
# history and logs, available at http://subversion.tigris.org/.
# ====================================================================

######################################################################

import sys, os, re
import subprocess                                   # python2.4

######################################################################
# Global Settings

# Path to svnlook utility
svnlook = "/usr/bin/svnlook"

# Path to svnadmin utility
svnadmin = "/usr/bin/svnadmin"

# Number of backups to keep around (0 for "keep them all")
num_backups = int(os.environ.get('SVN_FAST_BACKUPS_NUM',0)) or 64

######################################################################
# Command line arguments

args = sys.argv[1:]

quiet = False
if args and args[0] == '-q':
    del args[0]
    quiet = True

if len(args) != 2:
    raise SystemExit("""Syntax: %s [-q] repos_path backup_dir

Makes a hot backup of a FSFS repository at REPOS_PATH to BACKUP_DIR/repos-rev.

If a previous version exists, make hard links of its files.
Maintains up to $SVN_FAST_BACKUPS_NUM (or 20) backups.
""" %sys.argv[0])

# Path to repository
repo_dir = args[0]
repo = os.path.basename(os.path.abspath(repo_dir))

os.chdir(repo_dir)

# Where to store the repository backup.  The backup will be placed in
# a *subdirectory* of this location, named after the youngest
# revision.
backup_dir = args[1]

######################################################################
# Helper functions

def comparator(a, b):
    # We pass in filenames so there is never a case where they are equal.
    regexp = re.compile("-(?P<revision>[0-9]+)(-(?P<increment>[0-9]+))?$")
    matcha = regexp.search(a)
    matchb = regexp.search(b)
    reva = int(matcha.groupdict()['revision'])
    revb = int(matchb.groupdict()['revision'])
    if (reva < revb):
        return -1
    elif (reva > revb):
        return 1
    else:
        inca = matcha.groupdict()['increment']
        incb = matchb.groupdict()['increment']
    if not inca:
        return -1
    elif not incb:
        return 1;
    elif (int(inca) < int(incb)):
        return -1
    else:
        return 1

def pipe(command):
    return subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0].strip()

def readfile(filename):
    try:
        return open(filename).read().strip()
    except:
        return 'x'

def list_repo_backups(repo):
    '''Return a list of backups for this repository, sorted.'''
    regexp = re.compile("^" + repo + "-[0-9]+(-[0-9]+)?$")
    directory_list = os.listdir(backup_dir)
    directory_list = filter(lambda x: regexp.search(x), directory_list)
    directory_list.sort(comparator)
    return directory_list

######################################################################
# Main

### Step 1: get the youngest revision.

if readfile(os.path.join('db', 'fs-type')) != 'fsfs':
    raise SystemExit("Path '%s' doesn't contain a FSFS repository"%repo_dir)

youngest = pipe(["svnlook","youngest","."])

print "Beginning hot backup of '%s'; youngest revision is %s..." %(repo, youngest),

### Step 2: Find next available backup path

backup_subdir = os.path.join(backup_dir, repo + "-" + youngest)
backup_tmpdir = backup_subdir + '.tmp'

if os.path.exists(backup_tmpdir):
    raise SystemExit("%s: Backup in progress?  '%s' exists -- aborting."%(sys.argv[0],backup_tmpdir))

if os.path.exists(backup_subdir):
    # if not quiet:
    print "Backup already exists at",backup_subdir
    raise SystemExit


previous_backups = list_repo_backups(repo)

### Step 3: use rsync to make a copy.
# We need to copy the 'current' file first.
# Don't copy the transactions/ directory.
#   see bottom of http://svn.collab.net/repos/svn/trunk/notes/fsfs

rsync_dest = os.path.join(backup_tmpdir,'')

# copy db/current.  -R tells rsync to use relative pathnames.
if subprocess.call(['rsync', '-aR', 'db/current', rsync_dest]):
    raise "%s: rsync failed" %sys.argv[0]

# Now copy everything else.

cmd = ['rsync', '-a',
       '--exclude', 'db/current',
       '--exclude', 'db/transactions/*',
       '--exclude', 'db/log.*',
       '.', rsync_dest]
# If there's a previous backup, make hard links against the latest.
if previous_backups:
    cmd += ['--link-dest', os.path.join(backup_dir, previous_backups[-1])]

# print cmd
if subprocess.call(cmd):
    raise "%s: rsync failed" %sys.argv[0]

# Rename to final name.
os.rename(backup_tmpdir, backup_subdir)

### Step 4: finally, remove all repository backups other than the last
###         NUM_BACKUPS.

print "Finished backup to", backup_subdir

if num_backups > 0:
    old_list = list_repo_backups(repo)
    del old_list[max(0,len(old_list)-num_backups):]
    for item in old_list:
        old_backup_subdir = os.path.join(backup_dir, item)
        print "  Removing old backup: ", old_backup_subdir
        subprocess.call(['rm', '-r', old_backup_subdir])

