*** This is just a patch for experimentation. ***

Implement something like the --ignore-all-space in libsvn_diff.
Currently, this option is hard-coded, just to play with it.
Before the lines are compared, all space characters are stripped.
This works for diff and diff3, so don't use svn update with this patch:-)

* subversion/libsvn_diff/diff_file.c (svn_diff__file_baton_t): Add filter_baton
  pointers.
  (struct filter_baton): New struct.
  (fitler): New function that does the very advanced string normalization.
  (svn_diff__file_datasource_get_next_token): Normalize chunk before computing
  hash code.
  (svn_diff__file_token_compare): Normalize the chunks before comparing.

  (svn_diff_file_diff, svn_diff_file_diff3): Initialize filter batons.
  (svn_diff3__file_output_common): Output the line from the "modified" file
  instead of the "original" file.


Index: subversion/libsvn_diff/diff_file.c
===================================================================
--- subversion/libsvn_diff/diff_file.c	(revision 18321)
+++ subversion/libsvn_diff/diff_file.c	(arbetskopia)
@@ -60,6 +60,9 @@
 
   svn_diff__file_token_t *tokens;
 
+  /* ### Filter function? Or maybe not that generality? */
+  void *filter_baton[4];
+
   apr_pool_t *pool;
 } svn_diff__file_baton_t;
 
@@ -242,6 +245,37 @@
 }
 
 
+/* Baton used by the filter function. */
+struct filter_baton {
+  svn_stringbuf_t *buf;
+};
+
+/* Normalize *LEN bytes pointed to by *BUF for comparison.
+   Currently, this removes all spaces.
+   *BUF and *LEN may be replaced with a new buffer/length pair.
+   BATON stores the state of the operation.
+   A call with *LEN set to 0 indicates the end of the chunk and
+   resets the state stored in BATON. */
+static void
+filter(char **buf, apr_off_t *len, void* baton)
+{
+  struct filter_baton *fb = baton;
+  char *p = *buf;
+
+  svn_stringbuf_setempty (fb->buf);
+
+  while (p < *buf + *len)
+    {
+      /* ### NOte that I have a pending patent on this efficient code
+         snippet... */
+      if (*p != ' ')
+        svn_stringbuf_appendbytes (fb->buf, p, 1);
+      ++p;
+    }
+  *buf = fb->buf->data;
+  *len = fb->buf->len;
+}
+
 static svn_error_t *
 svn_diff__file_datasource_get_next_token(apr_uint32_t *hash, void **token,
                                          void *baton,
@@ -258,6 +292,9 @@
   apr_uint32_t h = 0;
   /* Did the last chunk end in a CR character? */
   svn_boolean_t had_cr = FALSE;
+  /* Filtered buf and length. */
+  char *buf;
+  apr_off_t len;
 
   *token = NULL;
 
@@ -314,7 +351,10 @@
 
       length = endp - curp;
       file_token->length += length;
-      h = svn_diff__adler32(h, curp, length);
+      buf = curp;
+      len = length;
+      filter(&buf, &len, file_baton->filter_baton[idx]);
+      h = svn_diff__adler32(h, buf, len);
 
       curp = endp = file_baton->buffer[idx];
       file_baton->chunk[idx]++;
@@ -340,7 +380,15 @@
 
   length = eol - curp;
   file_token->length += length;
-  *hash = svn_diff__adler32(h, curp, length);
+  buf = curp;
+  len = length;
+  filter(&buf, &len, file_baton->filter_baton[idx]);
+  h = svn_diff__adler32(h, buf, len);
+  /* Signal end of token to the filter, and let it finish it's work. */
+  buf = NULL;
+  len = 0;
+  filter(&buf, &len, file_baton->filter_baton[idx]);
+  *hash = svn_diff__adler32(h, buf, len);
 
   file_baton->curp[idx] = eol;
   *token = file_token;
@@ -364,29 +412,37 @@
   apr_off_t offset[2];
   int idx[2];
   apr_off_t length[2];
-  apr_off_t total_length;
+  apr_off_t total_length[2];
   apr_off_t len;
   int i;
   int chunk[2];
 
-  if (file_token1->length < file_token2->length)
+#if 0
+  /* Some easy outs. */
+  if (! filter)
     {
-      *compare = -1;
-      return SVN_NO_ERROR;
-    }
+      apr_off_t tot_length;
+      if (file_token1->length < file_token2->length)
+        {
+          *compare = -1;
+          return SVN_NO_ERROR;
+        }
 
-  if (file_token1->length > file_token2->length)
-    {
-      *compare = 1;
-      return SVN_NO_ERROR;
-    }
+      if (file_token1->length > file_token2->length)
+        {
+          *compare = 1;
+          return SVN_NO_ERROR;
+        }
 
-  total_length = file_token1->length;
-  if (total_length == 0)
-    {
-      *compare = 0;
-      return SVN_NO_ERROR;
+      tot_length = file_token1->length;
+  
+      if (tot_length == 0)
+        {
+          *compare = 0;
+          return SVN_NO_ERROR;
+        }
     }
+#endif
 
   idx[0] = svn_diff__file_datasource_to_index(file_token1->datasource);
   idx[1] = svn_diff__file_datasource_to_index(file_token2->datasource);
@@ -394,51 +450,92 @@
   offset[1] = file_token2->offset;
   chunk[0] = file_baton->chunk[idx[0]];
   chunk[1] = file_baton->chunk[idx[1]];
+  total_length[0] = file_token1->length;
+  total_length[1] = file_token2->length;
 
+  for (i = 0; i < 2; ++i)
+    {
+      if (offset_to_chunk(offset[i]) == chunk[i])
+        {
+          /* If the start of the token is in memory, the entire token is
+           * in memory.
+           */
+          bufp[i] = file_baton->buffer[idx[i]];
+          bufp[i] += offset_in_chunk(offset[i]);
+
+          length[i] = total_length[i];
+          filter(&bufp[i], &length[i], file_baton->filter_baton[idx[i]]);
+          total_length[i] = 0;
+        }
+      else
+        length[i] = 0;
+    }
+
   do
     {
       for (i = 0; i < 2; i++)
         {
-          if (offset_to_chunk(offset[i]) == chunk[i])
+          while (length[i] == 0 && total_length[i] > 0)
             {
-              /* If the start of the token is in memory, the entire token is
-               * in memory.
-               */
-              bufp[i] = file_baton->buffer[idx[i]];
-              bufp[i] += offset_in_chunk(offset[i]);
-
-              length[i] = total_length;
-            }
-          else
-            {
               /* Read a chunk from disk into a buffer */
               bufp[i] = buffer[i];
-              length[i] = total_length > COMPARE_CHUNK_SIZE ? 
-                COMPARE_CHUNK_SIZE : total_length;
+              length[i] = total_length[i] > COMPARE_CHUNK_SIZE ? 
+                COMPARE_CHUNK_SIZE : total_length[i];
 
               SVN_ERR(read_chunk(file_baton->file[idx[i]],
                                  file_baton->path[idx[i]],
                                  bufp[i], length[i], offset[i],
                                  file_baton->pool));
+              offset[i] += length[i];
+              total_length[i] -= length[i];
+              filter(&bufp[i], &length[i], file_baton->filter_baton[idx[i]]);
             }
+          /* If we have no more data, let the filter know. */
+          if (length[i] == 0 && total_length[i] == 0)
+            {
+              filter(&bufp[i], &length[i], file_baton->filter_baton[idx[i]]);
+              total_length[i] = -1;
+            }
+
         }
 
       len = length[0] > length[1] ? length[1] : length[0];
-      offset[0] += len;
-      offset[1] += len;
-
+      if (len == 0)
+        {
+          *compare = 0;
+          break;
+        }
       /* Compare two chunks (that could be entire tokens if they both reside
        * in memory).
        */
       *compare = memcmp(bufp[0], bufp[1], len);
       if (*compare != 0)
-        return SVN_NO_ERROR;
+        break;
 
-      total_length -= len;
+      length[0] -= len;
+      length[1] -= len;
     }
-  while(total_length > 0);
+  while(1);
 
-  *compare = 0;
+  if (*compare == 0)
+    {
+      if (length[0] > 0)
+        *compare = -1;
+      else if (length[1] > 0)
+        *compare = 1;
+    }
+
+  /* Clean up filters.  Note that we don't waste time on reading the whole
+     chunk just to make the filter happy, but who cares? */
+  for (i = 0; i < 2; ++i)
+    {
+      if (total_length[i] >= 0)
+        {
+          length[i] = 0;
+          filter(&bufp[i], &length[i], file_baton->filter_baton[i]);
+        }
+    }
+
   return SVN_NO_ERROR;
 }
 
@@ -482,11 +579,18 @@
                    apr_pool_t *pool)
 {
   svn_diff__file_baton_t baton;
+  struct filter_baton filter_baton[2];
+  int i;
 
   memset(&baton, 0, sizeof(baton));
   baton.path[0] = original;
   baton.path[1] = modified;
   baton.pool = svn_pool_create(pool);
+  for (i = 0; i < 2; ++i)
+    {
+      baton.filter_baton[i] = &filter_baton[i];
+      filter_baton[i].buf = svn_stringbuf_create ("", pool);
+    }
 
   SVN_ERR(svn_diff_diff(diff, &baton, &svn_diff__file_vtable, pool));
 
@@ -502,12 +606,19 @@
                     apr_pool_t *pool)
 {
   svn_diff__file_baton_t baton;
+  struct filter_baton fb[3];
+  int i;
 
   memset(&baton, 0, sizeof(baton));
   baton.path[0] = original;
   baton.path[1] = modified;
   baton.path[2] = latest;
   baton.pool = svn_pool_create(pool);
+  for (i = 0; i < 3; ++i)
+    {
+      baton.filter_baton[i] = &fb[i];
+      fb[i].buf = svn_stringbuf_create("", pool);
+    }
 
   SVN_ERR(svn_diff_diff3(diff, &baton, &svn_diff__file_vtable, pool));
 
@@ -1088,8 +1199,8 @@
   apr_off_t modified_start, apr_off_t modified_length,
   apr_off_t latest_start, apr_off_t latest_length)
 {
-  return svn_diff3__file_output_hunk(baton, 0,
-                                     original_start, original_length);
+  return svn_diff3__file_output_hunk(baton, 1,
+                                     modified_start, modified_length);
 }
 
 static svn_error_t *