[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

[PATCH] rssfeed.py (RSS feed for commit / rev-propchange hooks)

From: Stein Roger Skafløtten <srsmail_at_hotmail.com>
Date: 2004-07-25 00:09:46 CEST

The attached rssfeed.py provides a way to create RSS feeds from
commit/propchange hooks. It is based on mailer.py so the diff report will be
quite familiar ;-)

As discussed on #svn, I'm posting it here in case you want to consider it
for the contrib area. Hopefully the script header contains enough
information for using it, but let me know if it doesn't.

Best regards,
Stein Roger Skafløtten

------ rssfeed.py follows -------

#!/usr/bin/env python
#
# rssfeed.py: Creates RSS feeds; typically called from post-commit
#
# Author: Stein Roger Skafløtten <srsmail at hotmail dot com>
#
# Based on mailer.py
#
# $HeadURL$
# $LastChangedDate$
# $LastChangedBy$
# $LastChangedRevision$
#
# USAGE: rssfeed.py commit REPOS-DIR REVISION WWWDIR URL
# rssfeed.py propchange REPOS-DIR REVISION AUTHOR PROPNAME WWWDIR URL
#
# Creates an RSS feed describing the changes between REV and REV-1 for the
# repository REPOS. The feed contains the latest 20 items.
#
# To access the RSS files, use
#
# http://myserver/rss/<myrepos>.rss
#
# assuming WWWDIR is something like /var/www/html/rss, but that's your
choice.
#
# There is also a feed containing all commits/propchanges from all
# repositories using this script in the hooks:
#
# http://myserver/rss/all.rss
#
# which also contains the latest 20 items.
#
# The RSS item title is formed like this:
# <repos name> Rev. <rev>: <40 first characters of the log message, if
any>
#
# The RSS item subject/category is either 'Commit' or 'Property Change'
#
# Sample:
# rssfeed.py commit /myrepos/myproject 413 /var/www/html/rss
http://myserver/rss
#
# Troubleshooting:
#
# Nothing happends in the RSS reader:
# - RSS feeds are created on the first commit after the
commit-hook
# using this script is installed. You can, of course, run
rssfeed.py
# manually to create an initial RSS file
#
# - Make sure Apache (or whatever) has rw permission to WWWDIR
#
# Weird characters in non-ascii title/log:
# - Make sure the RSS reader is using UTF-8 encoding.
#

import os
import sys
import string
import time
import popen2
import cStringIO
import re
import svn.fs
import svn.delta
import svn.repos
import svn.core
import codecs
from xml.dom import minidom, Node
from xml.dom.minidom import Document

SEPARATOR = '=' * 78

def main(pool, cmd, repos_dir, rev, author, propname, wwwdir, url):
  repos = Repository(repos_dir, rev, pool)

  if cmd == 'commit':
    messenger = Commit(pool, repos, wwwdir, url)
  elif cmd == 'propchange':
    # Override the repos revision author with the author of the propchange
    repos.author = author
    messenger = PropChange(pool, repos, author, propname, wwwdir, url)
  else:
    raise UnknownSubcommand(cmd)

  messenger.generate()

class RSSOutput:
  "Creates an XML feed."

  def __init__(self, repos, wwwdir, url, action):
    self.repos = repos
    self.wwwdir = wwwdir
    self.url = url
    self.action = action

  def start(self, **args):

    self.buffer = cStringIO.StringIO()
    self.write = self.buffer.write

  def run(self, cmd):
    # we're holding everything in memory, so we may as well read the
    # entire diff into memory and stash that into the buffer
    pipe_ob = popen2.Popen3(cmd)
    self.write(pipe_ob.fromchild.read())

    # wait on the child so we don't end up with a billion zombies
    pipe_ob.wait()

  # creates new xml dom and returns the document node
  def createDocument(self):
    newdoc = Document()
    newdoc.encoding = "UTF-8"
    rssNode = newdoc.createElement('rss')
    rssNode.setAttribute('version','2.0')
    channelNode = newdoc.createElement('channel')
    channelTitleNode = newdoc.createElement('title')
    
channelTitleNode.appendChild(newdoc.createTextNode((os.path.basename(self.repos.repos_dir)
or 'unknown')+' - '+self.action))

    # hook up nodes
    rssNode.appendChild(channelNode)
    newdoc.appendChild(rssNode)

    # return doc node
    return newdoc

  # creates a new RSS item
  def createItemNode(self, xmldoc, commitLogFile):
    newItem = xmldoc.createElement('item')

    # title node
    titleNode = xmldoc.createElement('title')
    logMsg = (self.repos.get_rev_prop(svn.core.SVN_PROP_REVISION_LOG) or 'No
log message')
    if len(logMsg) > 50:
      logMsg = logMsg[:50]+'...'

    logMsg = logMsg.replace('\n', ' ')
    logMsg = logMsg.replace('\r', ' ')
    logMsg = logMsg.replace('\t', ' ')

    titleText = (os.path.basename(self.repos.repos_dir) or 'unknown')+' Rev.
'+str(self.repos.rev)+': '+logMsg
    u = unicode(titleText, "utf-8")
    titleNode.appendChild(xmldoc.createCDATASection(u))
    newItem.appendChild(titleNode)

    # category node
    catNode = xmldoc.createElement('category')
    catNode.appendChild(xmldoc.createTextNode(self.action))
    newItem.appendChild(catNode)

    # link node
    linkNode = xmldoc.createElement('link')
    linkNode.appendChild(xmldoc.createTextNode(self.url+'/'+commitLogFile))
    newItem.appendChild(linkNode)

    # author node
    authorNode = xmldoc.createElement('author')
    authorNode.appendChild(xmldoc.createTextNode((self.repos.author or
'unknown')))
    newItem.appendChild(authorNode)

    # pubDate node
    pdNode = xmldoc.createElement('pubDate')
    pdNode.appendChild(xmldoc.createTextNode(time.strftime("%a, %d %b %Y
%H:%M:%S +0000", time.gmtime())))
    newItem.appendChild(pdNode)

    # description node
    descNode = xmldoc.createElement('description')
    descNode.appendChild(xmldoc.createTextNode('View link for details'))
    newItem.appendChild(descNode)

    return newItem

  def getChannelNode(self, xmldoc):
    return xmldoc.getElementsByTagName("channel")[0]

  def finish(self):

    # create commit log
    commitLogFile= 'feed'+(self.repos.author or
'')+'-'+time.strftime("%a%d%b%Y%H%M%S",time.gmtime())+'.txt'
    commitLog=codecs.open(self.wwwdir+'/'+commitLogFile, 'w', 'utf-8')
    try:
      commitLog.write(unicode(self.buffer.getvalue(), 'utf-8'))
    except:
      commitLog.write('No log available for this revision')

    commitLog.close()

    # update REPOS-SPECIFIC feed file
    feedFileName =
self.wwwdir+'/'+os.path.basename(self.repos.repos_dir)+'.rss'
    self.updateOrCreateFeed(feedFileName, commitLogFile)

    # update ALL-REPOS feed file
    feedFileName = self.wwwdir+'/all.rss'
    self.updateOrCreateFeed(feedFileName, commitLogFile)

  # Updates or creates the RSS feed file
  # If there are more than 20 entries, the oldest
  # item will be removed; the log file referred to
  # in the link-node will be removed as well (to
  # prevent bloat in the feed directory)
  def updateOrCreateFeed(self, feedFileName, commitLogFile):

    try:

      # parse existing rss, if any
      xmldoc = minidom.parse(feedFileName)

      # create new item
      newItem = self.createItemNode(xmldoc, commitLogFile)

      channel = self.getChannelNode(xmldoc)
      items = channel.getElementsByTagName("item")

      try:
        # remove oldest item
        if len(items) >= 20:
          url = items[0].getElementsByTagName('link')[0].firstChild.toxml()
          channel.removeChild(items[0])

          # attempt to remove log file only when updating repos-specific rss
          if not feedFileName.endswith("/all.rss"):
            try:
              os.unlink(self.wwwdir+'/'+os.path.basename(url))
            except:
              # no sweat - someone has probably just wiped the feed
directory
              pass

      except:
        sys.stderr.write("error: could not remove xml item")

    except:
      # no luck parsing existing rss -> create new xml doc
      xmldoc = self.createDocument()
      channel = self.getChannelNode(xmldoc)

    # append new item
    channel.appendChild(self.createItemNode(xmldoc, commitLogFile))

    # open/truncate file feed
    f=codecs.open(feedFileName, 'w', 'utf-8')
    f.write(xmldoc.toxml())
    f.close()

class Messenger:
  def __init__(self, pool, repos, wwwdir, url, action, prefix_param):
    self.pool = pool
    self.repos = repos
    self.output = RSSOutput(repos, wwwdir, url, action)

class Commit(Messenger):
  def __init__(self, pool, repos, wwwdir, url):
    Messenger.__init__(self, pool, repos, wwwdir, url, 'Commit',
'commit_subject_prefix')

    # get all the changes and sort by path
    editor = svn.repos.RevisionChangeCollector(repos.fs_ptr, repos.rev,
                                               self.pool)
    e_ptr, e_baton = svn.delta.make_editor(editor, self.pool)
    svn.repos.svn_repos_replay(repos.root_this, e_ptr, e_baton, self.pool)

    self.changelist = editor.changes.items()
    self.changelist.sort()

    # figure out the changed directories
    dirs = { }
    for path, change in self.changelist:
      if change.item_kind == svn.core.svn_node_dir:
        dirs[path] = None
      else:
        idx = string.rfind(path, '/')
        if idx == -1:
          dirs[''] = None
        else:
          dirs[path[:idx]] = None

    dirlist = dirs.keys()

    # figure out the common portion of all the dirs. note that there is
    # no "common" if only a single dir was changed, or the root was changed.
    if len(dirs) == 1 or dirs.has_key(''):
      commondir = ''
    else:
      common = string.split(dirlist.pop(), '/')
      for d in dirlist:
        parts = string.split(d, '/')
        for i in range(len(common)):
          if i == len(parts) or common[i] != parts[i]:
            del common[i:]
            break
      commondir = string.join(common, '/')
      if commondir:
        # strip the common portion from each directory
        l = len(commondir) + 1
        dirlist = [ ]
        for d in dirs.keys():
          if d == commondir:
            dirlist.append('.')
          else:
            dirlist.append(d[l:])
      else:
        # nothing in common, so reset the list of directories
        dirlist = dirs.keys()

    # compose the basic subject line. later, we can prefix it.
    dirlist.sort()
    dirlist = string.join(dirlist)
    if commondir:
      self.output.subject = 'r%d - in %s: %s' % (repos.rev, commondir,
dirlist)
    else:
      self.output.subject = 'r%d - %s' % (repos.rev, dirlist)

  def generate(self):

    subpool = svn.core.svn_pool_create(self.pool)

    self.output.start()

    # generate the content
    generate_content(self.output, self.repos, self.changelist, subpool)

    self.output.finish()

    svn.core.svn_pool_clear(subpool)
    svn.core.svn_pool_destroy(subpool)

class PropChange(Messenger):
  def __init__(self, pool, repos, author, propname, wwwdir, url):
    Messenger.__init__(self, pool, repos, wwwdir, url, 'Property Change',
'propchange_subject_prefix')
    self.author = author
    self.propname = propname
    self.output.subject = 'r%d - %s' % (repos.rev, propname)

  def generate(self):
    self.output.start()
    self.output.write('Author: %s\nRevision: %s\nProperty Name: %s\n\n'
                        % (self.author, self.repos.rev, self.propname))

    propvalue = self.repos.get_rev_prop(self.propname)
    self.output.write('New Property Value:\n')
    self.output.write(str(propvalue))
    self.output.finish()

def generate_content(output, repos, changelist, pool):

  svndate = repos.get_rev_prop(svn.core.SVN_PROP_REVISION_DATE)
  ### pick a different date format?
  date = time.ctime(svn.core.secs_from_timestr(svndate, pool))

  output.write('Author: %s\nDate: %s\nNew Revision: %s\n\n'
               % (repos.author, date, repos.rev))

  # print summary sections
  generate_list(output, 'Added', changelist, _select_adds)
  generate_list(output, 'Removed', changelist, _select_deletes)
  generate_list(output, 'Modified', changelist, _select_modifies)

  output.write('\nLog:\n%s\n'
               % (repos.get_rev_prop(svn.core.SVN_PROP_REVISION_LOG) or ''))

  # these are sorted by path already
  for path, change in changelist:
    generate_diff(output, repos, date, change, pool)

def _select_adds(change):
  return change.added
def _select_deletes(change):
  return change.path is None
def _select_modifies(change):
  return not change.added and change.path is not None

def generate_list(output, header, changelist, selection):
  items = [ ]
  for path, change in changelist:
    if selection(change):
      items.append((path, change))
  if items:
    output.write('%s:\n' % header)
    for fname, change in items:
      if change.item_kind == svn.core.svn_node_dir:
        is_dir = '/'
      else:
        is_dir = ''
      if change.prop_changes:
        if change.text_changed:
          props = ' (contents, props changed)'
        else:
          props = ' (props changed)'
      else:
        props = ''
      output.write(' %s%s%s\n' % (fname, is_dir, props))
      if change.added and change.base_path:
        if is_dir:
          text = ''
        elif change.text_changed:
          text = ', changed'
        else:
          text = ' unchanged'
        output.write(' - copied%s from r%d, %s%s\n'
                     % (text, change.base_rev, change.base_path[1:],
is_dir))

def generate_diff(output, repos, date, change, pool):
  if change.item_kind == svn.core.svn_node_dir:
    # all changes were printed in the summary. nothing to do.
    return

  # make this configurable?
  diff_add = True
  diff_copy = True
  diff_delete = True
  diff_modify = True

  if not change.path:
    ### params is a bit silly here
    if diff_delete == False:
      # a record of the deletion is in the summary. no need to write
      # anything further here.
      return

    output.write('\nDeleted: %s\n' % change.base_path)
    diff = svn.fs.FileDiff(repos.get_root(change.base_rev),
                           change.base_path, None, None, pool)

    label1 = '%s\t%s' % (change.base_path, date)
    label2 = '(empty file)'
    singular = True
  elif change.added:
    if change.base_path and (change.base_rev != -1):
      # this file was copied.

      if not change.text_changed:
        # copies with no changes are reported in the header, so we can just
        # skip them here.
        return

      if diff_copy == False:
        # a record of the copy is in the summary, no need to write
        # anything further here.
        return

      # note that we strip the leading slash from the base (copyfrom) path
      output.write('\nCopied: %s (from r%d, %s)\n'
                   % (change.path, change.base_rev, change.base_path[1:]))
      diff = svn.fs.FileDiff(repos.get_root(change.base_rev),
                             change.base_path[1:],
                             repos.root_this, change.path,
                             pool)
      label1 = change.base_path[1:] + '\t(original)'
      label2 = '%s\t%s' % (change.path, date)
      singular = False
    else:
      if diff_add == False:
        # a record of the addition is in the summary. no need to write
        # anything further here.
        return

      output.write('\nAdded: %s\n' % change.path)
      diff = svn.fs.FileDiff(None, None, repos.root_this, change.path, pool)
      label1 = '(empty file)'
      label2 = '%s\t%s' % (change.path, date)
      singular = True
  elif not change.text_changed:
    # don't bother to show an empty diff. prolly just a prop change.
    return
  else:
    if diff_modify == False:
      # a record of the modification is in the summary, no need to write
      # anything further here.
      return

    output.write('\nModified: %s\n' % change.path)
    diff = svn.fs.FileDiff(repos.get_root(change.base_rev),
                           change.base_path[1:],
                           repos.root_this, change.path,
                           pool)
    label1 = change.base_path[1:] + '\t(original)'
    label2 = '%s\t%s' % (change.path, date)
    singular = False

  output.write(SEPARATOR + '\n')

  if diff.either_binary():
    if singular:
      output.write('Binary file. No diff available.\n')
    else:
      output.write('Binary files. No diff available.\n')
    return

  ### do something with change.prop_changes

  src_fname, dst_fname = diff.get_files()

  def get_diff_cmd(diff_cmd, args):
    cmd = [ ]
    for part in diff_cmd:
      cmd.append(part % args)
    return cmd

  # append the e di to outputstream
  output.run('/usr/bin/diff -u -L \"'+label1+'\" \"'+src_fname+'\" -L
\"'+label2+'\" \"'+dst_fname+'\"')

class Repository:
  "Hold roots and other information about the repository."

  def __init__(self, repos_dir, rev, pool):
    self.repos_dir = repos_dir
    self.rev = rev
    self.pool = pool

    db_path = os.path.join(repos_dir, 'db')
    if not os.path.exists(db_path):
      db_path = repos_dir

    self.fs_ptr = svn.fs.new(None, pool)
    svn.fs.open_berkeley(self.fs_ptr, db_path)

    self.roots = { }
    self.root_this = self.get_root(rev)
    self.author = self.get_rev_prop(svn.core.SVN_PROP_REVISION_AUTHOR)

  def get_rev_prop(self, propname):
    return svn.fs.revision_prop(self.fs_ptr, self.rev, propname, self.pool)

  def get_root(self, rev):
    try:
      return self.roots[rev]
    except KeyError:
      pass
    root = self.roots[rev] = svn.fs.revision_root(self.fs_ptr, rev,
self.pool)
    return root

class UnknownSubcommand(Exception):
  pass

# enable True/False in older vsns of Python
try:
  _unused = True
except NameError:
  True = 1
  False = 0

if __name__ == '__main__':
  def usage():
    sys.stderr.write(
'''USAGE: %s commit REPOS-DIR REVISION WWWDIR URL
       %s propchange REPOS-DIR REVISION AUTHOR PROPNAME WWWDIR URL
'''
                     % (sys.argv[0], sys.argv[0]))
    sys.exit(1)

  if len(sys.argv) < 4:
    usage()

  # get params not specific to command
  cmd = sys.argv[1]
  repos_dir = sys.argv[2]
  revision = int(sys.argv[3])
  wwwdir = None
  url = None
  author = None
  propname = None

  # get more params
  if cmd == 'commit':
    if len(sys.argv) != 6:
      usage()
    wwwdir = sys.argv[4]
    url = sys.argv[5]
  elif cmd == 'propchange':
    if len(sys.argv) != 8:
      usage()
    author = sys.argv[4]
    propname = sys.argv[5]
    wwwdir = sys.argv[6]
    url = sys.argv[7]
  else:
    usage()

  # run main as svn app
  svn.core.run_app(main, cmd, repos_dir, revision,
                   author, propname, wwwdir, url)

_________________________________________________________________
MSN Hotmail http://www.hotmail.com Med markedets beste SPAM-filter. Gratis!

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Sun Jul 25 01:01:53 2004

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.