Attached is a sample run of this script. Some interesting data!
Cheers,
-g
On Thu, Apr 16, 2009 at 18:29, Greg Stein <gstein_at_gmail.com> wrote:
> Author: gstein
> Date: Thu Apr 16 09:29:08 2009
> New Revision: 37306
>
> Log:
> Add an interesting little script to analyze what each committer has
> touched over all time.
>
> * tools/dev:
> Â (svn:ignore): ignore logdata.py
>
> * tools/dev/analyze-settings.py: new analysis script
>
> Added:
>  trunk/tools/dev/analyze-svnlogs.py  (contents, props changed)
> Modified:
> Â trunk/tools/dev/ Â (props changed)
>
> Added: trunk/tools/dev/analyze-svnlogs.py
> URL: http://svn.collab.net/viewvc/svn/trunk/tools/dev/analyze-svnlogs.py?pathrev=37306
> ==============================================================================
> --- /dev/null  00:00:00 1970  (empty, because file is newly added)
> +++ trunk/tools/dev/analyze-svnlogs.py  Thu Apr 16 09:29:08 2009     (r37306)
> @@ -0,0 +1,147 @@
> +#!/usr/bin/python
> +#
> +# Generate a report of each area each committer has touched over all time.
> +#
> +# $ svn log -v ^/ > svnlogdata
> +# $ ./analyze-svnlogs.py < svnlogdata > report.txt
> +#
> +# NOTE: ./logdata.py is written with a cached version of the data extracted
> +# Â Â Â from 'svnlogdata'. That data can be analyzed in many ways, beyond
> +# Â Â Â what this script is reporting.
> +#
> +
> +import sys
> +import re
> +
> +
> +RE_LOG_HEADER = re.compile('^(r[0-9]+) '
> + Â Â Â Â Â Â Â Â Â Â Â Â Â '\| ([^|]+) '
> + Â Â Â Â Â Â Â Â Â Â Â Â Â '\| ([^|]+) '
> + Â Â Â Â Â Â Â Â Â Â Â Â Â '\| ([0-9]+) line')
> +RE_PATH = re.compile(r' Â [MARD] (.*?)( \(from .*\))?$')
> +SEPARATOR = '-' * 72
> +
> +
> +def parse_one_commit(logfile):
> + Â line = logfile.readline().strip()
> + Â if line != SEPARATOR:
> + Â Â raise ParseError('missing separator: %s' % line)
> +
> + Â line = logfile.readline()
> + Â if not line:
> + Â Â # end of file!
> + Â Â return None, None
> +
> + Â m = RE_LOG_HEADER.match(line)
> + Â if not m:
> + Â Â raise ParseError('could not match log header')
> + Â revision = m.group(1)
> + Â author = m.group(2)
> + Â num_lines = int(m.group(4))
> + Â paths = set()
> +
> + Â # skip "Changed paths:"
> + Â line = logfile.readline().strip()
> + Â if not line:
> + Â Â # there were no paths. just a blank before the log message. continue on.
> + Â Â sys.stderr.write('Funny revision: %s\n' % revision)
> + Â else:
> + Â Â if not line.startswith('Changed'):
> + Â Â Â raise ParseError('log not run with -v. paths missing in %s' % revision)
> +
> + Â Â # gather all the affected paths
> + Â Â while 1:
> + Â Â Â line = logfile.readline().rstrip()
> + Â Â Â if not line:
> + Â Â Â Â # just hit end of the changed paths
> + Â Â Â Â break
> + Â Â Â m = RE_PATH.match(line)
> + Â Â Â if not m:
> + Â Â Â Â raise ParseError('bad path in %s: %s' % (revision, line))
> + Â Â Â paths.add(m.group(1))
> +
> + Â # suck up the log message
> + Â for i in range(num_lines):
> + Â Â logfile.readline()
> +
> + Â return author, paths
> +
> +
> +def parse_file(logfile):
> + Â authors = { }
> +
> + Â while True:
> + Â Â author, paths = parse_one_commit(logfile)
> + Â Â if author is None:
> + Â Â Â return authors
> +
> + Â Â if author in authors:
> + Â Â Â authors[author] = authors[author].union(paths)
> + Â Â else:
> + Â Â Â authors[author] = paths
> +
> +
> +def write_logdata(authors):
> + Â out = open('logdata.py', 'w')
> + Â out.write('authors = {\n')
> + Â for author, paths in authors.items():
> + Â Â out.write(" Â '%s': set([\n" % author)
> + Â Â for path in paths:
> + Â Â Â out.write(' Â Â %s,\n' % repr(path))
> + Â Â out.write(' Â ]),\n')
> + Â out.write('}\n')
> +
> +
> +def print_report(authors):
> + Â for author, paths in sorted(authors.items()):
> + Â Â topdirs = { }
> + Â Â for path in paths:
> + Â Â Â key = tuple(path.split('/', 3)[1:3])
> + Â Â Â if key in topdirs:
> + Â Â Â Â topdirs[key] += 1
> + Â Â Â else:
> + Â Â Â Â topdirs[key] = 1
> +
> + Â Â print author
> + Â Â tags = [ ]
> + Â Â branches = [ ]
> + Â Â for topdir in sorted(topdirs):
> + Â Â Â if len(topdir) == 1:
> + Â Â Â Â assert topdirs[topdir] == 1
> +     print '  %s  (ROOT)' % topdir[0]
> + Â Â Â else:
> + Â Â Â Â if topdir[0] == 'tags':
> + Â Â Â Â Â tags.append(topdir[1])
> + Â Â Â Â elif topdir[0] == 'branches':
> + Â Â Â Â Â branches.append(topdir[1])
> + Â Â Â Â else:
> + Â Â Â Â Â print ' Â %s (%d items)' % ('/'.join(topdir), topdirs[topdir])
> + Â Â if tags:
> + Â Â Â print ' Â TAGS:', ', '.join(tags)
> + Â Â if branches:
> + Â Â Â print ' Â BRANCHES:', ', '.join(branches)
> +
> + Â Â print
> +
> +
> +def run(logfile):
> + Â try:
> + Â Â import logdata
> + Â Â authors = logdata.authors
> + Â except ImportError:
> + Â Â authors = parse_file(logfile)
> + Â Â write_logdata(authors)
> +
> + Â print_report(authors)
> +
> +
> +class ParseError(Exception):
> + Â pass
> +
> +
> +if __name__ == '__main__':
> + Â if len(sys.argv) > 1:
> + Â Â logfile = open(sys.argv[1])
> + Â else:
> + Â Â logfile = sys.stdin
> + Â run(logfile)
>
> ------------------------------------------------------
> http://subversion.tigris.org/ds/viewMessage.do?dsForumId=495&dsMessageId=1750326
>
------------------------------------------------------
http://subversion.tigris.org/ds/viewMessage.do?dsForumId=462&dsMessageId=1750355
Received on 2009-04-16 18:33:09 CEST