[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

Re: svn commit: r37306 - trunk/tools/dev

From: Greg Stein <gstein_at_gmail.com>
Date: Thu, 16 Apr 2009 18:32:09 +0200

Attached is a sample run of this script. Some interesting data!

Cheers,
-g

On Thu, Apr 16, 2009 at 18:29, Greg Stein <gstein_at_gmail.com> wrote:
> Author: gstein
> Date: Thu Apr 16 09:29:08 2009
> New Revision: 37306
>
> Log:
> Add an interesting little script to analyze what each committer has
> touched over all time.
>
> * tools/dev:
>  (svn:ignore): ignore logdata.py
>
> * tools/dev/analyze-settings.py: new analysis script
>
> Added:
>   trunk/tools/dev/analyze-svnlogs.py   (contents, props changed)
> Modified:
>   trunk/tools/dev/   (props changed)
>
> Added: trunk/tools/dev/analyze-svnlogs.py
> URL: http://svn.collab.net/viewvc/svn/trunk/tools/dev/analyze-svnlogs.py?pathrev=37306
> ==============================================================================
> --- /dev/null   00:00:00 1970   (empty, because file is newly added)
> +++ trunk/tools/dev/analyze-svnlogs.py  Thu Apr 16 09:29:08 2009        (r37306)
> @@ -0,0 +1,147 @@
> +#!/usr/bin/python
> +#
> +# Generate a report of each area each committer has touched over all time.
> +#
> +# $ svn log -v ^/ > svnlogdata
> +# $ ./analyze-svnlogs.py < svnlogdata > report.txt
> +#
> +# NOTE: ./logdata.py is written with a cached version of the data extracted
> +#       from 'svnlogdata'. That data can be analyzed in many ways, beyond
> +#       what this script is reporting.
> +#
> +
> +import sys
> +import re
> +
> +
> +RE_LOG_HEADER = re.compile('^(r[0-9]+) '
> +                           '\| ([^|]+) '
> +                           '\| ([^|]+) '
> +                           '\| ([0-9]+) line')
> +RE_PATH = re.compile(r'   [MARD] (.*?)( \(from .*\))?$')
> +SEPARATOR = '-' * 72
> +
> +
> +def parse_one_commit(logfile):
> +  line = logfile.readline().strip()
> +  if line != SEPARATOR:
> +    raise ParseError('missing separator: %s' % line)
> +
> +  line = logfile.readline()
> +  if not line:
> +    # end of file!
> +    return None, None
> +
> +  m = RE_LOG_HEADER.match(line)
> +  if not m:
> +    raise ParseError('could not match log header')
> +  revision = m.group(1)
> +  author = m.group(2)
> +  num_lines = int(m.group(4))
> +  paths = set()
> +
> +  # skip "Changed paths:"
> +  line = logfile.readline().strip()
> +  if not line:
> +    # there were no paths. just a blank before the log message. continue on.
> +    sys.stderr.write('Funny revision: %s\n' % revision)
> +  else:
> +    if not line.startswith('Changed'):
> +      raise ParseError('log not run with -v. paths missing in %s' % revision)
> +
> +    # gather all the affected paths
> +    while 1:
> +      line = logfile.readline().rstrip()
> +      if not line:
> +        # just hit end of the changed paths
> +        break
> +      m = RE_PATH.match(line)
> +      if not m:
> +        raise ParseError('bad path in %s: %s' % (revision, line))
> +      paths.add(m.group(1))
> +
> +  # suck up the log message
> +  for i in range(num_lines):
> +    logfile.readline()
> +
> +  return author, paths
> +
> +
> +def parse_file(logfile):
> +  authors = { }
> +
> +  while True:
> +    author, paths = parse_one_commit(logfile)
> +    if author is None:
> +      return authors
> +
> +    if author in authors:
> +      authors[author] = authors[author].union(paths)
> +    else:
> +      authors[author] = paths
> +
> +
> +def write_logdata(authors):
> +  out = open('logdata.py', 'w')
> +  out.write('authors = {\n')
> +  for author, paths in authors.items():
> +    out.write("  '%s': set([\n" % author)
> +    for path in paths:
> +      out.write('    %s,\n' % repr(path))
> +    out.write('  ]),\n')
> +  out.write('}\n')
> +
> +
> +def print_report(authors):
> +  for author, paths in sorted(authors.items()):
> +    topdirs = { }
> +    for path in paths:
> +      key = tuple(path.split('/', 3)[1:3])
> +      if key in topdirs:
> +        topdirs[key] += 1
> +      else:
> +        topdirs[key] = 1
> +
> +    print author
> +    tags = [ ]
> +    branches = [ ]
> +    for topdir in sorted(topdirs):
> +      if len(topdir) == 1:
> +        assert topdirs[topdir] == 1
> +        print '  %s  (ROOT)' % topdir[0]
> +      else:
> +        if topdir[0] == 'tags':
> +          tags.append(topdir[1])
> +        elif topdir[0] == 'branches':
> +          branches.append(topdir[1])
> +        else:
> +          print '  %s (%d items)' % ('/'.join(topdir), topdirs[topdir])
> +    if tags:
> +      print '  TAGS:', ', '.join(tags)
> +    if branches:
> +      print '  BRANCHES:', ', '.join(branches)
> +
> +    print
> +
> +
> +def run(logfile):
> +  try:
> +    import logdata
> +    authors = logdata.authors
> +  except ImportError:
> +    authors = parse_file(logfile)
> +    write_logdata(authors)
> +
> +  print_report(authors)
> +
> +
> +class ParseError(Exception):
> +  pass
> +
> +
> +if __name__ == '__main__':
> +  if len(sys.argv) > 1:
> +    logfile = open(sys.argv[1])
> +  else:
> +    logfile = sys.stdin
> +  run(logfile)
>
> ------------------------------------------------------
> http://subversion.tigris.org/ds/viewMessage.do?dsForumId=495&dsMessageId=1750326
>

------------------------------------------------------
http://subversion.tigris.org/ds/viewMessage.do?dsForumId=462&dsMessageId=1750355

Received on 2009-04-16 18:33:09 CEST

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.