[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

Subversion Performance Benchmark

From: Brandon Ehle <azverkan_at_yahoo.com>
Date: 2002-09-23 03:38:50 CEST
This weekend I wrote up a Subversion benchmark comparison versus CVS.

CVS version 1.11.1p1 was used.
Subversion build 3206 was used.
Done on 1.5Ghz AMD Athlon on a ATA66 drive pulling about 34MB/s with hdparm -t.
Linux 2.4.18-3 (RedHat 7.3) and ext3 filesystem

All the tests are done using local disk access (ra_local and local cvs) and averaged over 3 runs.  The repository directory is in its initial empty state (revision 0 for subversion) when the tests were run.


The first results are from importing an empty directory.
CVS (real 0.00533333s  user 0s  sys 0.00333333s)
SVN (real 0.0806667s  user 0.0133333s  sys 0.00333333s)


These results are from importing 100 small text files ranging from 10 to 100 lines. (283 - 2.9k)
CVS (real 0.0973333s  user 0.00333333s  sys 0.0133333s)
SVN (real 2.01767s  user 0.163333s  sys 0.0533333s)


These results are from importing 1000 small text files ranging from 10 to 100 lines. (141 - 3.1k)
CVS (real 0.963s  user 0.0533333s  sys 0.09s)
SVN (real 27.834s  user 8.1s  sys 0.863333s)


These results are from importing 100 large text files ranging from 1000 to 100000 lines. (33k - 2.6M)
CVS (real 1.43667s  user 0.196667s  sys 1.16s)
SVN (real 62.93s  user 44.9333s  sys 2.86s)


These results are from importing 100 binary files ranging from 3.3k to 1M.
CVS (real 0.700667s  user 0.15s  sys 0.45s)
SVN (real 34.638s  user 24.74s  sys 1.13667s)



As you can see, CVS outperformed Subversion by quite a bit.  A little more investigation with oprofile revealed that 62% of the time was spent in libsvn_delta.  (Retired instructions)
[...]
888        0.0899 0.0000 /usr/local/lib/libsvn_subr-1.so.0.0.0
1227       0.1242 0.0000 /usr/X11R6/bin/xterm
1777       0.1799 0.0000 /lib/libpthread-0.9.so
2149       0.2175 0.0000 /usr/local/bin/op_time
4358       0.4411 0.0000 /lib/ext3.o
4505       0.4560 0.0000 /lib/jbd.o
10500      1.0629 0.0000 /usr/bin/python2
12272      1.2422 0.0000 /usr/local/BerkeleyDB.4.0/lib/libdb-4.0.so
13856      1.4026 0.0000 /usr/local/apache2/lib/libapr-0.so.0.9.2
14908      1.5091 0.0000 /lib/libc-2.2.5.so
17673      1.7889 0.0000 /usr/local/lib/libsvn_fs-1.so.0.0.0
280367    28.3801 0.0000 /boot/vmlinux-2.4.18-3
618874    62.6454 0.0000 /usr/local/lib/libsvn_delta-1.so.0.0.0


Showing functions shows that ~60% of time was spent in 3 functions in libsvn_delta: vdelta, find_match_len, & get_bucket. (Retired instructions)
[...]
000165a4 3420     0.351684    apr_palloc              /usr/local/apache2/lib/libapr-0.so.0.9.2
0007b980 5497     0.565265    memcpy                  /lib/libc-2.2.5.so
0000aa7c 5753     0.591589    find_entry              /usr/local/apache2/lib/libapr-0.so.0.9.2
0001a488 5803     0.596731    implicit_atom           /usr/local/lib/libsvn_fs-1.so.0.0.0
0004bfe8 9566     0.983686    __ham_func4             /usr/local/BerkeleyDB.4.0/lib/libdb-4.0.so
00009128 10426    1.07212     store_mapping           /usr/local/lib/libsvn_delta-1.so.0.0.0
c0173dd0 17730    1.8232      SHATransform            /boot/vmlinux-2.4.18-3
00008ca4 20589    2.1172      create_hash_table       /usr/local/lib/libsvn_delta-1.so.0.0.0
000084ac 21116    2.17139     svn_txdelta__apply_instructions /usr/local/lib/libsvn_delta-1.so.0.0.0
000090d8 80701    8.2986      get_bucket              /usr/local/lib/libsvn_delta-1.so.0.0.0
000091c0 237090   24.3803     find_match_len          /usr/local/lib/libsvn_delta-1.so.0.0.0
00008d94 246770   25.3757     vdelta                  /usr/local/lib/libsvn_delta-1.so.0.0.0
c0106de0 249588   25.6655     default_idle            /boot/vmlinux-2.4.18-3


Using oprofile's "place timings in source" revealed the following (Retired instructions, Retired Ops, Instruction cache misses, Data cache misses).  I am not exactly sure what the vdelta for loop that calls find_match_len is doing, but its spending more time in that function than it takes CVS to complete the entire operation.  Is there any way I can disable the delta stuff and retry these benchmarks?

[...]

static APR_INLINE apr_uint32_t
get_bucket (const hash_table_t *table, const char *key)
/* get_bucket 80701 13.04% 48035 41.14% 0 0% 34 4.187% */
/* 769 0.1243% 488 0.4179% 0 0% 17 2.094% */
{
  int i;
  apr_uint32_t hash = 0;
  /* 15159 2.449% 7312 6.262% 0 0% 7 0.8621% */
  for (i = 0; i < VD_KEY_SIZE; ++i)
    /* 24980 4.036% 12209 10.46% 0 0% 8 0.9852% */
    hash = hash * 127 + *key++;
  /* 38588 6.235% 26019 22.28% 0 0% 2 0.2463% */
  return hash % table->num_buckets;
/* 1205 0.1947% 2007 1.719% 0 0% 0 0% */
}

[...]

store_mapping (hash_table_t *table, const char* key, apr_off_t idx)
/* store_mapping 10426 1.685% 10147 8.689% 1 50% 78 9.606% */
/* 1232 0.1991% 642 0.5498% 0 0% 8 0.9852% */
{
  /* 1499 0.2422% 1477 1.265% 0 0% 44 5.419% */
  apr_uint32_t bucket = get_bucket (table, key);;
  /* 828 0.1338% 1336 1.144% 0 0% 0 0% */
  assert (table->slots[idx].next == NULL);
  /* 1027 0.1659% 1780 1.524% 1 50% 0 0% */
  table->slots[idx].next = table->buckets[bucket];
  /* 2847 0.46% 3231 2.767% 0 0% 0 0% */
  table->buckets[bucket] = &table->slots[idx];
/* 2993 0.4836% 1681 1.44% 0 0% 26 3.202% */
}

[...]

static APR_INLINE int
find_match_len (const char *match, const char *from, const char *end)
/* find_match_len 237090 38.31% 13501 11.56% 0 0% 63 7.759% */
/* 17495 2.827% 1033 0.8846% 0 0% 0 0% */
{
  /* 6039 0.9758% 385 0.3297% 0 0% 0 0% */
  const char *here = from;
  /* 78643 12.71% 4340 3.717% 0 0% 18 2.217% */
  while (here < end && *match == *here)
    {
      /* 25197 4.071% 1282 1.098% 0 0% 11 1.355% */
      ++match;
      /* 54382 8.787% 2846 2.437% 0 0% 24 2.956% */
      ++here;
    }
  /* 46280 7.478% 2906 2.489% 0 0% 10 1.232% */
  return here - from;
/* 9054 1.463% 709 0.6072% 0 0% 0 0% */
}

[...]

static void
vdelta (svn_txdelta__ops_baton_t *build_baton,
        const char *data,
        const char *start,
        const char *end,
        svn_boolean_t outputflag,
        hash_table_t *table,
        apr_pool_t *pool)
/* vdelta 246770 39.87% 39884 34.15% 0 0% 608 74.88% */
{
  const char *here = start;     /* Current position in the buffer. */
  const char *insert_from = NULL; /* Start of byte range for insertion. */

  for (;;)
    {
      const char *current_match, *key;
      /* 316 0.05106% 24 0.02055% 0 0% 0 0% */
      apr_size_t current_match_len = 0;
      hash_slot_t *slot;
      svn_boolean_t progress;

      /* If we're near the end, just insert the last few bytes. */
      /* 723 0.1168% 554 0.4744% 0 0% 0 0% */
      if (end - here < VD_KEY_SIZE)
        {
          /* 0 0% 1 0.0008564% 0 0% 0 0% */
          const char *from = ((insert_from != NULL) ? insert_from : here);

          if (outputflag && from < end)
            svn_txdelta__insert_op (build_baton, svn_txdelta_new, 0,
                                    end - from, from, pool);
          return;
        }

      /* Search for the longest match.  */
      current_match = NULL;
      /* 637 0.1029% 446 0.3819% 0 0% 0 0% */
      current_match_len = 0;
      /* 503 0.08128% 388 0.3323% 0 0% 0 0% */
      key = here;
      do
        {
          /* Try to extend the current match.  Our key is the last
             three matched bytes plus one unmatched byte if we already
             have a current match, or just the four bytes where we are
             if we don't have a current match yet.  See which mapping
             yields the longest extension.  */
          /* 25 0.00404% 7 0.005994% 0 0% 0 0% */
          progress = FALSE;
          /* 1748 0.2825% 2362 2.023% 0 0% 0 0% */
          for (slot = table->buckets[get_bucket (table, key)];
               /* 13653 2.206% 2019 1.729% 0 0% 0 0% */
               slot != NULL;
               /* 36991 5.977% 4944 4.234% 0 0% 0 0% */
               slot = slot->next)
            {
              const char *match;
              apr_size_t match_len;

              /* 37457 6.052% 6633 5.68% 0 0% 145 17.86% */
              if (slot - table->slots < key - here) /* Too close to start */
                continue;
              /* 52564 8.494% 6357 5.444% 0 0% 155 19.09% */
              match = data + (slot - table->slots) - (key - here);
              /* 66193 10.7% 5189 4.444% 0 0% 137 16.87% */
              match_len = find_match_len (match, here, end);

              /* We can only copy from the source or from the target, so
                 don't let the match cross START.  */
              /* 17097 2.763% 1675 1.434% 0 0% 0 0% */
              if (match < start && match + match_len > start)
                match_len = start - match;

              /* 1585 0.2561% 84 0.07193% 0 0% 1 0.1232% */
              if (match_len >= VD_KEY_SIZE && match_len > current_match_len)
                {
                  /* We have a longer match; record it.  */
                  /* 325 0.05252% 20 0.01713% 0 0% 0 0% */
                  current_match = match;
                  /* 3 0.0004848% 2 0.001713% 0 0% 0 0% */
                  current_match_len = match_len;
                  /* 0 0% 1 0.0008564% 0 0% 0 0% */
                  progress = TRUE;
                }
            }
          /* 6502 1.051% 3487 2.986% 0 0% 126 15.52% */
          if (progress)
            /* 49 0.007918% 17 0.01456% 0 0% 0 0% */
            key = here + current_match_len - (VD_KEY_SIZE - 1);
        }
      /* 284 0.04589% 206 0.1764% 0 0% 0 0% */
      while (progress && end - key >= VD_KEY_SIZE);

      /* 415 0.06706% 336 0.2877% 0 0% 1 0.1232% */
      if (current_match_len < VD_KEY_SIZE)
        {
          /* There is no match here; store a mapping and insert this byte. */
          /* 1480 0.2391% 1452 1.243% 0 0% 31 3.818% */
          store_mapping (table, here, here - data);
          /* 3994 0.6454% 2997 2.566% 0 0% 0 0% */
          if (insert_from == NULL)
            /* 24 0.003878% 9 0.007707% 0 0% 0 0% */
            insert_from = here;
          /* 523 0.08451% 314 0.2689% 0 0% 0 0% */
          here++;
          continue;
        }
      /* 78 0.0126% 12 0.01028% 0 0% 0 0% */
      else if (outputflag)
        {
          if (insert_from != NULL)
            {
              /* Commit the pending insert. */
              /* 22 0.003555% 18 0.01541% 0 0% 0 0% */
              svn_txdelta__insert_op (build_baton, svn_txdelta_new,
                                      0, here - insert_from,
                                      insert_from, pool);
              /* 8 0.001293% 8 0.006851% 0 0% 0 0% */
              insert_from = NULL;
            }
          /* 18 0.002909% 4 0.003425% 0 0% 0 0% */
          if (current_match < start) /* Copy from source. */
            svn_txdelta__insert_op (build_baton, svn_txdelta_source,
                                    current_match - data,
                                    current_match_len,
                                    NULL, pool);
          else                       /* Copy from target */
            /* 84 0.01357% 28 0.02398% 0 0% 1 0.1232% */
            svn_txdelta__insert_op (build_baton, svn_txdelta_target,
                                    current_match - start,
                                    current_match_len,
                                    NULL, pool);
        }

      /* Adjust the current position and insert mappings for the
         last three bytes of the match. */
      /* 698 0.1128% 33 0.02826% 0 0% 3 0.3695% */
      here += current_match_len;
      /* 63 0.01018% 10 0.008564% 0 0% 0 0% */
      if (end - here >= VD_KEY_SIZE)
        {
          /* 2 0.0003232% 1 0.0008564% 0 0% 0 0% */
          char const *last = here - (VD_KEY_SIZE - 1);
          /* 1962 0.317% 141 0.1207% 0 0% 5 0.6158% */
          for (; last < here; ++last)
            /* 744 0.1202% 105 0.08992% 0 0% 3 0.3695% */
            store_mapping (table, last, last - data);
        }
    }
}


[...]
--------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org For additional commands, e-mail: dev-help@subversion.tigris.org Received on Mon Sep 23 03:39:48 2002

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.