[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

[PATCH] rssfeed.py (corrected)

From: Stein Roger Skafløtten <srsmail_at_hotmail.com>
Date: 2004-07-25 00:38:01 CEST

Ref. previous post... use following source instead as the first one had a
missing line.

-------- rssfeed.py --------
#!/usr/bin/env python
# rssfeed.py: Creates RSS feeds; typically called from commit/revpropchange
# Author: Stein Roger Skafløtten <srsmail at hotmail dot com>
# Based on mailer.py
# $HeadURL$
# $LastChangedDate$
# $LastChangedBy$
# $LastChangedRevision$
# Creates an RSS feed describing the changes between REV and REV-1 for the
# repository REPOS. The feed contains the latest 20 items.
# To access the RSS files, use
# http://myserver/rss/<myrepos>.rss
# assuming WWWDIR is something like /var/www/html/rss, but that's your
# There is also a feed containing all commits/propchanges from all
# repositories using this script in the hooks:
# http://myserver/rss/all.rss
# which also contains the latest 20 items.
# The RSS item title is formed like this:
# <repos name> Rev. <rev>: <40 first characters of the log message, if
# The RSS item subject/category is either 'Commit' or 'Property Change'
# Sample:
# rssfeed.py commit /myrepos/myproject 413 /var/www/html/rss
# Troubleshooting:
# Nothing happends in the RSS reader:
# - RSS feeds are created on the first commit after the
# using this script is installed. You can, of course, run
# manually to create an initial RSS file
# - Make sure Apache (or whatever) has rw permission to WWWDIR
# Weird characters in non-ascii title/log:
# - Make sure the RSS reader is using UTF-8 encoding.

import os
import sys
import string
import time
import popen2
import cStringIO
import re
import svn.fs
import svn.delta
import svn.repos
import svn.core
import codecs
from xml.dom import minidom, Node
from xml.dom.minidom import Document

SEPARATOR = '=' * 78

def main(pool, cmd, repos_dir, rev, author, propname, wwwdir, url):
  repos = Repository(repos_dir, rev, pool)

  if cmd == 'commit':
    messenger = Commit(pool, repos, wwwdir, url)
  elif cmd == 'propchange':
    # Override the repos revision author with the author of the propchange
    repos.author = author
    messenger = PropChange(pool, repos, author, propname, wwwdir, url)
    raise UnknownSubcommand(cmd)


class RSSOutput:
  "Creates an XML feed."

  def __init__(self, repos, wwwdir, url, action):
    self.repos = repos
    self.wwwdir = wwwdir
    self.url = url
    self.action = action

  def start(self, **args):

    self.buffer = cStringIO.StringIO()
    self.write = self.buffer.write

  def run(self, cmd):
    # we're holding everything in memory, so we may as well read the
    # entire diff into memory and stash that into the buffer
    pipe_ob = popen2.Popen3(cmd)

    # wait on the child so we don't end up with a billion zombies

  # creates new xml dom and returns the document node
  def createDocument(self):
    newdoc = Document()
    newdoc.encoding = "UTF-8"
    rssNode = newdoc.createElement('rss')
    channelNode = newdoc.createElement('channel')
    channelTitleNode = newdoc.createElement('title')
or 'unknown')+' - '+self.action))

    # hook up nodes

    # return doc node
    return newdoc

  # creates a new RSS item
  def createItemNode(self, xmldoc, commitLogFile):
    newItem = xmldoc.createElement('item')

    # title node
    titleNode = xmldoc.createElement('title')
    logMsg = (self.repos.get_rev_prop(svn.core.SVN_PROP_REVISION_LOG) or 'No
log message')
    if len(logMsg) > 50:
      logMsg = logMsg[:50]+'...'

    logMsg = logMsg.replace('\n', ' ')
    logMsg = logMsg.replace('\r', ' ')
    logMsg = logMsg.replace('\t', ' ')

    titleText = (os.path.basename(self.repos.repos_dir) or 'unknown')+' Rev.
'+str(self.repos.rev)+': '+logMsg
    u = unicode(titleText, "utf-8")

    # category node
    catNode = xmldoc.createElement('category')

    # link node
    linkNode = xmldoc.createElement('link')

    # author node
    authorNode = xmldoc.createElement('author')
    authorNode.appendChild(xmldoc.createTextNode((self.repos.author or

    # pubDate node
    pdNode = xmldoc.createElement('pubDate')
    pdNode.appendChild(xmldoc.createTextNode(time.strftime("%a, %d %b %Y
%H:%M:%S +0000", time.gmtime())))

    # description node
    descNode = xmldoc.createElement('description')
    descNode.appendChild(xmldoc.createTextNode('View link for details'))

    return newItem

  def getChannelNode(self, xmldoc):
    return xmldoc.getElementsByTagName("channel")[0]

  def finish(self):

    # create commit log
    commitLogFile= 'feed'+(self.repos.author or
    commitLog=codecs.open(self.wwwdir+'/'+commitLogFile, 'w', 'utf-8')
      commitLog.write(unicode(self.buffer.getvalue(), 'utf-8'))
      commitLog.write('No log available for this revision')


    # update REPOS-SPECIFIC feed file
    feedFileName =
    self.updateOrCreateFeed(feedFileName, commitLogFile)

    # update ALL-REPOS feed file
    feedFileName = self.wwwdir+'/all.rss'
    self.updateOrCreateFeed(feedFileName, commitLogFile)

  # Updates or creates the RSS feed file
  # If there are more than 20 entries, the oldest
  # item will be removed; the log file referred to
  # in the link-node will be removed as well (to
  # prevent bloat in the feed directory)
  def updateOrCreateFeed(self, feedFileName, commitLogFile):


      # parse existing rss, if any
      xmldoc = minidom.parse(feedFileName)

      # create new item
      newItem = self.createItemNode(xmldoc, commitLogFile)

      channel = self.getChannelNode(xmldoc)
      items = channel.getElementsByTagName("item")

        # remove oldest item
        if len(items) >= 20:
          url = items[0].getElementsByTagName('link')[0].firstChild.toxml()

          # attempt to remove log file only when updating repos-specific rss
          if not feedFileName.endswith("/all.rss"):
              # no sweat - someone has probably just wiped the feed

        sys.stderr.write("error: could not remove xml item")

      # no luck parsing existing rss -> create new xml doc
      xmldoc = self.createDocument()
      channel = self.getChannelNode(xmldoc)

    # append new item
    channel.appendChild(self.createItemNode(xmldoc, commitLogFile))

    # open/truncate file feed
    f=codecs.open(feedFileName, 'w', 'utf-8')

class Messenger:
  def __init__(self, pool, repos, wwwdir, url, action, prefix_param):
    self.pool = pool
    self.repos = repos
    self.output = RSSOutput(repos, wwwdir, url, action)

class Commit(Messenger):
  def __init__(self, pool, repos, wwwdir, url):
    Messenger.__init__(self, pool, repos, wwwdir, url, 'Commit',

    # get all the changes and sort by path
    editor = svn.repos.RevisionChangeCollector(repos.fs_ptr, repos.rev,
    e_ptr, e_baton = svn.delta.make_editor(editor, self.pool)
    svn.repos.svn_repos_replay(repos.root_this, e_ptr, e_baton, self.pool)

    self.changelist = editor.changes.items()

    # figure out the changed directories
    dirs = { }
    for path, change in self.changelist:
      if change.item_kind == svn.core.svn_node_dir:
        dirs[path] = None
        idx = string.rfind(path, '/')
        if idx == -1:
          dirs[''] = None
          dirs[path[:idx]] = None

    dirlist = dirs.keys()

    # figure out the common portion of all the dirs. note that there is
    # no "common" if only a single dir was changed, or the root was changed.
    if len(dirs) == 1 or dirs.has_key(''):
      commondir = ''
      common = string.split(dirlist.pop(), '/')
      for d in dirlist:
        parts = string.split(d, '/')
        for i in range(len(common)):
          if i == len(parts) or common[i] != parts[i]:
            del common[i:]
      commondir = string.join(common, '/')
      if commondir:
        # strip the common portion from each directory
        l = len(commondir) + 1
        dirlist = [ ]
        for d in dirs.keys():
          if d == commondir:
        # nothing in common, so reset the list of directories
        dirlist = dirs.keys()

    # compose the basic subject line. later, we can prefix it.
    dirlist = string.join(dirlist)
    if commondir:
      self.output.subject = 'r%d - in %s: %s' % (repos.rev, commondir,
      self.output.subject = 'r%d - %s' % (repos.rev, dirlist)

  def generate(self):

    subpool = svn.core.svn_pool_create(self.pool)


    # generate the content
    generate_content(self.output, self.repos, self.changelist, subpool)



class PropChange(Messenger):
  def __init__(self, pool, repos, author, propname, wwwdir, url):
    Messenger.__init__(self, pool, repos, wwwdir, url, 'Property Change',
    self.author = author
    self.propname = propname
    self.output.subject = 'r%d - %s' % (repos.rev, propname)

  def generate(self):
    self.output.write('Author: %s\nRevision: %s\nProperty Name: %s\n\n'
                        % (self.author, self.repos.rev, self.propname))

    propvalue = self.repos.get_rev_prop(self.propname)
    self.output.write('New Property Value:\n')

def generate_content(output, repos, changelist, pool):

  svndate = repos.get_rev_prop(svn.core.SVN_PROP_REVISION_DATE)
  ### pick a different date format?
  date = time.ctime(svn.core.secs_from_timestr(svndate, pool))

  output.write('Author: %s\nDate: %s\nNew Revision: %s\n\n'
               % (repos.author, date, repos.rev))

  # print summary sections
  generate_list(output, 'Added', changelist, _select_adds)
  generate_list(output, 'Removed', changelist, _select_deletes)
  generate_list(output, 'Modified', changelist, _select_modifies)

               % (repos.get_rev_prop(svn.core.SVN_PROP_REVISION_LOG) or ''))

  # these are sorted by path already
  for path, change in changelist:
    generate_diff(output, repos, date, change, pool)

def _select_adds(change):
  return change.added
def _select_deletes(change):
  return change.path is None
def _select_modifies(change):
  return not change.added and change.path is not None

def generate_list(output, header, changelist, selection):
  items = [ ]
  for path, change in changelist:
    if selection(change):
      items.append((path, change))
  if items:
    output.write('%s:\n' % header)
    for fname, change in items:
      if change.item_kind == svn.core.svn_node_dir:
        is_dir = '/'
        is_dir = ''
      if change.prop_changes:
        if change.text_changed:
          props = ' (contents, props changed)'
          props = ' (props changed)'
        props = ''
      output.write(' %s%s%s\n' % (fname, is_dir, props))
      if change.added and change.base_path:
        if is_dir:
          text = ''
        elif change.text_changed:
          text = ', changed'
          text = ' unchanged'
        output.write(' - copied%s from r%d, %s%s\n'
                     % (text, change.base_rev, change.base_path[1:],

def generate_diff(output, repos, date, change, pool):
  if change.item_kind == svn.core.svn_node_dir:
    # all changes were printed in the summary. nothing to do.

  # make this configurable?
  diff_add = True
  diff_copy = True
  diff_delete = True
  diff_modify = True

  if not change.path:
    ### params is a bit silly here
    if diff_delete == False:
      # a record of the deletion is in the summary. no need to write
      # anything further here.

    output.write('\nDeleted: %s\n' % change.base_path)
    diff = svn.fs.FileDiff(repos.get_root(change.base_rev),
                           change.base_path, None, None, pool)

    label1 = '%s\t%s' % (change.base_path, date)
    label2 = '(empty file)'
    singular = True
  elif change.added:
    if change.base_path and (change.base_rev != -1):
      # this file was copied.

      if not change.text_changed:
        # copies with no changes are reported in the header, so we can just
        # skip them here.

      if diff_copy == False:
        # a record of the copy is in the summary, no need to write
        # anything further here.

      # note that we strip the leading slash from the base (copyfrom) path
      output.write('\nCopied: %s (from r%d, %s)\n'
                   % (change.path, change.base_rev, change.base_path[1:]))
      diff = svn.fs.FileDiff(repos.get_root(change.base_rev),
                             repos.root_this, change.path,
      label1 = change.base_path[1:] + '\t(original)'
      label2 = '%s\t%s' % (change.path, date)
      singular = False
      if diff_add == False:
        # a record of the addition is in the summary. no need to write
        # anything further here.

      output.write('\nAdded: %s\n' % change.path)
      diff = svn.fs.FileDiff(None, None, repos.root_this, change.path, pool)
      label1 = '(empty file)'
      label2 = '%s\t%s' % (change.path, date)
      singular = True
  elif not change.text_changed:
    # don't bother to show an empty diff. prolly just a prop change.
    if diff_modify == False:
      # a record of the modification is in the summary, no need to write
      # anything further here.

    output.write('\nModified: %s\n' % change.path)
    diff = svn.fs.FileDiff(repos.get_root(change.base_rev),
                           repos.root_this, change.path,
    label1 = change.base_path[1:] + '\t(original)'
    label2 = '%s\t%s' % (change.path, date)
    singular = False

  output.write(SEPARATOR + '\n')

  if diff.either_binary():
    if singular:
      output.write('Binary file. No diff available.\n')
      output.write('Binary files. No diff available.\n')

  ### do something with change.prop_changes

  src_fname, dst_fname = diff.get_files()

  def get_diff_cmd(diff_cmd, args):
    cmd = [ ]
    for part in diff_cmd:
      cmd.append(part % args)
    return cmd

  # append the e di to outputstream
  output.run('/usr/bin/diff -u -L \"'+label1+'\" \"'+src_fname+'\" -L
\"'+label2+'\" \"'+dst_fname+'\"')

class Repository:
  "Hold roots and other information about the repository."

  def __init__(self, repos_dir, rev, pool):
    self.repos_dir = repos_dir
    self.rev = rev
    self.pool = pool

    db_path = os.path.join(repos_dir, 'db')
    if not os.path.exists(db_path):
      db_path = repos_dir

    self.fs_ptr = svn.fs.new(None, pool)
    svn.fs.open_berkeley(self.fs_ptr, db_path)

    self.roots = { }
    self.root_this = self.get_root(rev)
    self.author = self.get_rev_prop(svn.core.SVN_PROP_REVISION_AUTHOR)

  def get_rev_prop(self, propname):
    return svn.fs.revision_prop(self.fs_ptr, self.rev, propname, self.pool)

  def get_root(self, rev):
      return self.roots[rev]
    except KeyError:
    root = self.roots[rev] = svn.fs.revision_root(self.fs_ptr, rev,
    return root

class UnknownSubcommand(Exception):

# enable True/False in older vsns of Python
  _unused = True
except NameError:
  True = 1
  False = 0

if __name__ == '__main__':
  def usage():
                     % (sys.argv[0], sys.argv[0]))

  if len(sys.argv) < 4:

  # get params not specific to command
  cmd = sys.argv[1]
  repos_dir = sys.argv[2]
  revision = int(sys.argv[3])
  wwwdir = None
  url = None
  author = None
  propname = None

  # get more params
  if cmd == 'commit':
    if len(sys.argv) != 6:
    wwwdir = sys.argv[4]
    url = sys.argv[5]
  elif cmd == 'propchange':
    if len(sys.argv) != 8:
    author = sys.argv[4]
    propname = sys.argv[5]
    wwwdir = sys.argv[6]
    url = sys.argv[7]

  # run main as svn app
  svn.core.run_app(main, cmd, repos_dir, revision,
                   author, propname, wwwdir, url)

MSN Messenger http://www.msn.no/messenger Den korteste veien mellom deg og
dine venner

To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Sun Jul 25 00:38:15 2004

This is an archived mail posted to the Subversion Dev mailing list.