[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

RE: svn commit: r1451129 - in /subversion/branches/fsfs-format7/subversion: include/private/svn_string_private.h libsvn_delta/xdelta.c libsvn_subr/string.c

From: Bert Huijben <bert_at_qqmail.nl>
Date: Thu, 28 Feb 2013 08:39:59 +0000

[retry to the right list]
If you are not going to use these functions I don’t see how this is going
to help performance. Before this these functions could be inlined in the
delta code and now they are part of a different library.

Maybe there is no difference if you are compiling a static library with
whole program optimization, but with a shared library build there
certainly will be a performance difference in the delta calculation, which
is already cpu bound in many real world scenarios.

Bert

Sent from Windows Mail

 *From:* stefan2_at_apache.org
*Sent:* February 28, 2013 8:46 AM
*To:* commits_at_subversion.apache.org
*Subject:* svn commit: r1451129 - in
/subversion/branches/fsfs-format7/subversion:
include/private/svn_string_private.h libsvn_delta/xdelta.c
libsvn_subr/string.c

Author: stefan2
Date: Thu Feb 28 07:45:50 2013
New Revision: 1451129

URL: http://svn.apache.org/r1451129
Log:
On the fsfs-format7 branch: Move the string matching functions
from xdelta to libsvn_subr/string.

* subversion/include/private/svn_string_private.h
  (svn_cstring__match_length,
   svn_cstring__reverse_match_length): declare new private API

* subversion/libsvn_subr/string.c
  (svn_cstring__match_length,
   svn_cstring__reverse_match_length): take implementation from xdelta.c

* subversion/libsvn_delta/xdelta.c
  (match_length,
   reverse_match_length): remove here
  (find_match,
   store_delta_trailer,
   compute_delta): update callers

Modified:

subversion/branches/fsfs-format7/subversion/include/private/svn_string_private.h
    subversion/branches/fsfs-format7/subversion/libsvn_delta/xdelta.c
    subversion/branches/fsfs-format7/subversion/libsvn_subr/string.c

Modified:
subversion/branches/fsfs-format7/subversion/include/private/svn_string_private.h
URL:
http://svn.apache.org/viewvc/subversion/branches/fsfs-format7/subversion/include/private/svn_string_private.h?rev=1451129&r1=1451128&r2=1451129&view=diff
==============================================================================

---
subversion/branches/fsfs-format7/subversion/include/private/svn_string_private.h
(original)
+++
subversion/branches/fsfs-format7/subversion/include/private/svn_string_private.h
Thu Feb 28 07:45:50 2013
@@ -210,6 +210,24 @@ svn_string__similarity(const svn_string_
                        svn_membuf_t *buffer, apr_size_t *rlcs);
+/* Return the lowest position at which A and B differ. If no difference
+ * can be found in the first MAX_LEN characters, MAX_LEN will be returned.
+ */
+apr_size_t
+svn_cstring__match_length(const char *a,
+                          const char *b,
+                          apr_size_t max_len);
+
+/* Return the number of bytes before A and B that don't differ.  If no
+ * difference can be found in the first MAX_LEN characters,  MAX_LEN will
+ * be returned.  Please note that A-MAX_LEN and B-MAX_LEN must both be
+ * valid addresses.
+ */
+apr_size_t
+svn_cstring__reverse_match_length(const char *a,
+                                  const char *b,
+                                  apr_size_t max_len);
+
 /** @} */
 /** @} */
Modified: subversion/branches/fsfs-format7/subversion/libsvn_delta/xdelta.c
URL:
http://svn.apache.org/viewvc/subversion/branches/fsfs-format7/subversion/libsvn_delta/xdelta.c?rev=1451129&r1=1451128&r2=1451129&view=diff
==============================================================================
--- subversion/branches/fsfs-format7/subversion/libsvn_delta/xdelta.c
(original)
+++ subversion/branches/fsfs-format7/subversion/libsvn_delta/xdelta.c Thu
Feb 28 07:45:50 2013
@@ -28,6 +28,7 @@
 #include <apr_hash.h>
 #include "svn_delta.h"
+#include "private/svn_string_private.h"
 #include "delta.h"
 /* This is pseudo-adler32. It is adler32 without the prime modulus.
@@ -222,73 +223,6 @@ init_blocks_table(const char *data,
     add_block(blocks, init_adler32(data + i), i);
 }
-/* Return the lowest position at which A and B differ. If no difference
- * can be found in the first MAX_LEN characters, MAX_LEN will be returned.
- */
-static apr_size_t
-match_length(const char *a, const char *b, apr_size_t max_len)
-{
-  apr_size_t pos = 0;
-
-#if SVN_UNALIGNED_ACCESS_IS_OK
-
-  /* Chunky processing is so much faster ...
-   *
-   * We can't make this work on architectures that require aligned access
-   * because A and B will probably have different alignment. So, skipping
-   * the first few chars until alignment is reached is not an option.
-   */
-  for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t))
-    if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
-      break;
-
-#endif
-
-  for (; pos < max_len; ++pos)
-    if (a[pos] != b[pos])
-      break;
-
-  return pos;
-}
-
-/* Return the number of bytes before A and B that don't differ.  If no
- * difference can be found in the first MAX_LEN characters,  MAX_LEN will
- * be returned.  Please note that A-MAX_LEN and B-MAX_LEN must both be
- * valid addresses.
- */
-static apr_size_t
-reverse_match_length(const char *a, const char *b, apr_size_t max_len)
-{
-  apr_size_t pos = 0;
-
-#if SVN_UNALIGNED_ACCESS_IS_OK
-
-  /* Chunky processing is so much faster ...
-   *
-   * We can't make this work on architectures that require aligned access
-   * because A and B will probably have different alignment. So, skipping
-   * the first few chars until alignment is reached is not an option.
-   */
-  for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t))
-    if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos))
-      break;
-
-  pos -= sizeof(apr_size_t);
-
-#endif
-
-  /* If we find a mismatch at -pos, pos-1 characters matched.
-   */
-  while (++pos <= max_len)
-    if (a[0-pos] != b[0-pos])
-      return pos - 1;
-
-  /* No mismatch found -> at least MAX_LEN matching chars.
-   */
-  return max_len;
-}
-
-
 /* Try to find a match for the target data B in BLOCKS, and then
    extend the match as long as data in A and B at the match position
    continues to match.  We set the position in A we ended up in (in
@@ -322,9 +256,9 @@ find_match(const struct blocks *blocks,
   max_delta = asize - apos - MATCH_BLOCKSIZE < bsize - bpos -
MATCH_BLOCKSIZE
             ? asize - apos - MATCH_BLOCKSIZE
             : bsize - bpos - MATCH_BLOCKSIZE;
-  delta = match_length(a + apos + MATCH_BLOCKSIZE,
-                       b + bpos + MATCH_BLOCKSIZE,
-                       max_delta);
+  delta = svn_cstring__match_length(a + apos + MATCH_BLOCKSIZE,
+                                    b + bpos + MATCH_BLOCKSIZE,
+                                    max_delta);
   /* See if we can extend backwards (max MATCH_BLOCKSIZE-1 steps because
A's
      content has been sampled only every MATCH_BLOCKSIZE positions).  */
@@ -361,7 +295,8 @@ store_delta_trailer(svn_txdelta__ops_bat
   if (max_len == 0)
     return;
-  end_match = reverse_match_length(a + asize, b + bsize, max_len);
+  end_match = svn_cstring__reverse_match_length(a + asize, b + bsize,
+                                                max_len);
   if (end_match <= 4)
     end_match = 0;
@@ -413,7 +348,7 @@ compute_delta(svn_txdelta__ops_baton_t *
   /* Optimization: directly compare window starts. If more than 4
    * bytes match, we can immediately create a matching windows.
    * Shorter sequences result in a net data increase. */
-  lo = match_length(a, b, asize > bsize ? bsize : asize);
+  lo = svn_cstring__match_length(a, b, asize > bsize ? bsize : asize);
   if ((lo > 4) || (lo == bsize))
     {
       svn_txdelta__insert_op(build_baton, svn_txdelta_source,
@@ -467,7 +402,8 @@ compute_delta(svn_txdelta__ops_baton_t *
             {
               /* the match borders on the previous op. Maybe, we found a
                * match that is better than / overlapping the previous one.
*/
-              apr_size_t len = reverse_match_length(a + apos, b + lo, apos
< lo ? apos : lo);
+              apr_size_t len = svn_cstring__reverse_match_length
+                                 (a + apos, b + lo, apos < lo ? apos : lo);
               if (len > 0)
                 {
                   len = svn_txdelta__remove_copy(build_baton, len);
Modified: subversion/branches/fsfs-format7/subversion/libsvn_subr/string.c
URL:
http://svn.apache.org/viewvc/subversion/branches/fsfs-format7/subversion/libsvn_subr/string.c?rev=1451129&r1=1451128&r2=1451129&view=diff
==============================================================================
--- subversion/branches/fsfs-format7/subversion/libsvn_subr/string.c
(original)
+++ subversion/branches/fsfs-format7/subversion/libsvn_subr/string.c Thu
Feb 28 07:45:50 2013
@@ -1282,3 +1282,67 @@ svn_string__similarity(const svn_string_
   else
     return 1000;
 }
+
+apr_size_t
+svn_cstring__match_length(const char *a,
+                          const char *b,
+                          apr_size_t max_len)
+{
+  apr_size_t pos = 0;
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+
+  /* Chunky processing is so much faster ...
+   *
+   * We can't make this work on architectures that require aligned access
+   * because A and B will probably have different alignment. So, skipping
+   * the first few chars until alignment is reached is not an option.
+   */
+  for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t))
+    if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
+      break;
+
+#endif
+
+  for (; pos < max_len; ++pos)
+    if (a[pos] != b[pos])
+      break;
+
+  return pos;
+}
+
+apr_size_t
+svn_cstring__reverse_match_length(const char *a,
+                                  const char *b,
+                                  apr_size_t max_len)
+{
+  apr_size_t pos = 0;
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+
+  /* Chunky processing is so much faster ...
+   *
+   * We can't make this work on architectures that require aligned access
+   * because A and B will probably have different alignment. So, skipping
+   * the first few chars until alignment is reached is not an option.
+   */
+  for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t))
+    if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos))
+      break;
+
+  pos -= sizeof(apr_size_t);
+
+#endif
+
+  /* If we find a mismatch at -pos, pos-1 characters matched.
+   */
+  while (++pos <= max_len)
+    if (a[0-pos] != b[0-pos])
+      return pos - 1;
+
+  /* No mismatch found -> at least MAX_LEN matching chars.
+   */
+  return max_len;
+}
+
+
Received on 2013-02-28 09:40:40 CET

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.