[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

[PATCH] mime-type detection "improvement"

From: SLOGEN <jensen_at_slog.dk>
Date: 2003-07-09 17:27:37 CEST

Proposed log message:
---------------------
- Added support for /etc/mime.types and ~/.mime.types in mime-type detection
- Added printout of mime-type for binary files

Comments:
---------
When importing source to be served using mod_dav_svn, it's tedious to
use svn propset to set the mime-type of the imported files.

I propose that svn uses the existing de-facto standard of the
/etc/mime.types file to deduce a mime-type for imported files, before
falling back to the old behaviour.

This is my first patch to subversion, so I perhaps have something to
learn about the coding standards and use of the apr_pool_t. especially,
i'm concerned with the use of a static apr_pool_t for storage of an
apr_hash_t that will last the lifetime of the svn process. Please
comment on the solution used in the patch.

I also have some concern about how the names of the considered
mime.types files should be generated. right now "/etc/mime.types" and
"~/.mime-types" (of course with proper user-home-dir subst), works for
me, but it may not be what people want. Some other suggestions are:

        - ~/.svn/mime-types
        - have a svn_config option with a list of files to use

Please suggest some solutions.

BTW: the patch is against rev. 6416

Index: subversion/include/svn_mime.h
===================================================================
--- subversion/include/svn_mime.h (working copy)
+++ subversion/include/svn_mime.h (working copy)
@@ -0,0 +1,17 @@
+
+#ifndef SVN_MIME_H
+#define SVN_MIME_H
+
+struct svn_error_t;
+struct apr_pool_t;
+
+svn_error_t*
+svn_mime_guess_type_ext(const char **mimetype,
+ const char *file,
+ apr_pool_t *cont);
+svn_error_t*
+svn_mime_guess_type_by_content(const char **mimetype,
+ const char *file,
+ apr_pool_t *pool);
+
+#endif /* SVN_MIME_H */
Index: subversion/libsvn_subr/io.c
===================================================================
--- subversion/libsvn_subr/io.c (revision 6416)
+++ subversion/libsvn_subr/io.c (working copy)
@@ -39,6 +39,7 @@
  #include "svn_pools.h"
  #include "svn_utf.h"
  #include "svn_config.h"
+#include "svn_mime.h"

  #include "svn_private_config.h" /* For SVN_WIN32 */

@@ -1319,78 +1320,13 @@
                          const char *file,
                          apr_pool_t *pool)
  {
- static const char * const generic_binary = "application/octet-stream";
-
- svn_node_kind_t kind;
- apr_file_t *fh;
- apr_status_t apr_err;
- unsigned char block[1024];
- apr_size_t amt_read = sizeof (block);
-
- /* Default return value is NULL. */
- *mimetype = NULL;
-
- /* See if this file even exists, and make sure it really is a file. */
- SVN_ERR (svn_io_check_path (file, &kind, pool));
- if (kind != svn_node_file)
- return svn_error_createf (SVN_ERR_BAD_FILENAME, NULL,
- "svn_io_detect_mimetype: "
- "Can't detect mimetype of non-file '%s'",
- file);
-
- SVN_ERR (svn_io_file_open (&fh, file, APR_READ, 0, pool));
-
- /* Read a block of data from FILE. */
- apr_err = apr_file_read (fh, block, &amt_read);
- if (apr_err && ! APR_STATUS_IS_EOF(apr_err))
- return svn_error_createf (apr_err, NULL,
- "svn_io_detect_mimetype: error reading '%s'",
- file);
-
- /* Now close the file. No use keeping it open any more. */
- apr_file_close (fh);
-
-
- /* Right now, this function is going to be really stupid. It's
- going to examine the first block of data, and make sure that 85%
- of the bytes are such that their value is in the ranges 0x07-0x0D
- or 0x20-0x7F, and that 100% of those bytes is not 0x00.
-
- If those criteria are not met, we're calling it binary. */
- if (amt_read > 0)
- {
- apr_size_t i;
- int binary_count = 0;
-
- /* Run through the data we've read, counting the 'binary-ish'
- bytes. HINT: If we see a 0x00 byte, we'll set our count to its
- max and stop reading the file. */
- for (i = 0; i < amt_read; i++)
- {
- if (block[i] == 0)
- {
- binary_count = amt_read;
- break;
- }
- if ((block[i] < 0x07)
- || ((block[i] > 0x0D) && (block[i] < 0x20))
- || (block[i] > 0x7F))
- {
- binary_count++;
- }
- }
-
- if (((binary_count * 1000) / amt_read) > 850)
- {
- *mimetype = generic_binary;
- return SVN_NO_ERROR;
- }
- }
-
+ SVN_ERR( svn_mime_guess_type_ext( mimetype, file, pool) );
+ if (*mimetype != NULL) /* found mime-type? */
+ return SVN_NO_ERROR;
+ SVN_ERR( svn_mime_guess_type_by_content(mimetype, file, pool) );
    return SVN_NO_ERROR;
  }

-
  svn_error_t *
  svn_io_file_open (apr_file_t **new_file, const char *fname,
                    apr_int32_t flag, apr_fileperms_t perm,
Index: subversion/libsvn_subr/mime.c
===================================================================
--- subversion/libsvn_subr/mime.c (working copy)
+++ subversion/libsvn_subr/mime.c (working copy)
@@ -0,0 +1,349 @@
+/* @file mime.c
+ mime.c: guessing mime types
+ */
+
+
+
+#include "apr_pools.h"
+#include "apr_user.h"
+#include "svn_io.h"
+
+#include "svn_mime.h"
+
+/** @brief set @ª *cont to the staticly allocated pool used for
+ svn_mime_hash(...) */
+static svn_error_t*
+svn_mime__cache_pool(apr_pool_t **cont)
+{
+ static apr_pool_t* p = NULL;
+ if (p == NULL)
+ {
+ apr_status_t apr_status;
+ /* may leak if multiple threads enter at same time, not really a
+ problem */
+ apr_status = apr_pool_create(&p, NULL);
+ if ( !APR_STATUS_IS_SUCCESS(apr_status) )
+ return svn_error_create(apr_status,
+ 0,
+ "failed to allocate pool for mime
caching");
+ }
+ *cont = p;
+ return SVN_NO_ERROR;
+}
+
+/** @brief set @ª *cont to the staticly allocated pool used for
+ svn_mime_hash(...) */
+static svn_error_t*
+svn_mime__hash(apr_hash_t** ht)
+{
+ static apr_hash_t* h = NULL;
+ if (h == NULL)
+ {
+ apr_pool_t* pool;
+ /* may leak if multiple threads enter at same time, not really a
+ problem */
+ SVN_ERR( svn_mime__cache_pool(&pool) );
+ h = apr_hash_make(pool);
+ }
+ *ht = h;
+ return SVN_NO_ERROR;
+}
+
+/** @*/
+static void
+svn_mime__debug_print_hash()
+{
+ apr_hash_t* h = NULL;
+ apr_hash_index_t *e = NULL;
+ apr_pool_t *p = NULL;
+
+ if( svn_mime__cache_pool(&p) != SVN_NO_ERROR )
+ abort();
+
+ if( svn_mime__hash(&h) != SVN_NO_ERROR )
+ abort();
+
+ for( e = apr_hash_first(p, h);
+ e != NULL;
+ e = apr_hash_next(e))
+ {
+ const char *key;
+ size_t key_len;
+ const char *val;
+ apr_hash_this(e, (const void **)&key, &key_len, (void **)&val);
+ }
+}
+
+/** @brief parse a mime.types file's content from @a buf into @a ht */
+static svn_error_t*
+svn_mime__parse_file_entries(apr_hash_t* ht,
+ svn_stringbuf_t* buf,
+ apr_pool_t* cont)
+{
+ enum PARSE_STATE
+ {
+ PARSE_MIME_TYPE,
+ PARSE_EXT
+ };
+ enum PARSE_STATE state = PARSE_MIME_TYPE;
+ char *p, *end, *type_begin = 0, *ext_begin = 0;
+ for ( p = buf->data,
+ end = buf->data + buf->len;
+ p < end;
+ ++p )
+ {
+ switch ( *p ) {
+ case '\n':
+ /* newline => next entry */
+ *p = 0;
+ if ( state == PARSE_EXT && ext_begin != 0 )
+ apr_hash_set(ht, ext_begin, APR_HASH_KEY_STRING, type_begin);
+ /* start over */
+ type_begin = 0;
+ ext_begin = 0;
+ state = PARSE_MIME_TYPE;
+ break;
+ case '#':
+ /** comment, eat to '\n' or end */
+ for( ; *p != '\n' && p < end; ++p)
+ ;
+ if (p < end)
+ --p;
+ /* start over */
+ type_begin = 0;
+ ext_begin = 0;
+ state = PARSE_MIME_TYPE;
+ break;
+ case ' ':
+ case '\t':
+ /* white-space => possibly change state, and possibly found
+ entry */
+ *p = 0;
+ switch ( state ) {
+ case PARSE_MIME_TYPE:
+ state = PARSE_EXT;
+ break;
+ case PARSE_EXT:
+ if ( ext_begin != 0 )
+ {
+ apr_hash_set(ht, ext_begin, APR_HASH_KEY_STRING, type_begin);
+ ext_begin = 0;
+ }
+ break;
+ }
+ break;
+ default:
+ switch ( state )
+ {
+ case PARSE_MIME_TYPE:
+ if ( type_begin == 0 )
+ type_begin = p;
+ break;
+ case PARSE_EXT:
+ if ( ext_begin == 0 )
+ ext_begin = p;
+ break;
+ }
+ break;
+ }
+ }
+ return SVN_NO_ERROR;
+}
+
+static char *
+svn_mime__calculate_homedir_path(apr_pool_t *cont, apr_pool_t
*static_pool)
+{
+ /* get path to homedir */
+ apr_uid_t userid;
+ apr_gid_t groupid;
+ char *username;
+ char *homedir;
+
+ if( !(APR_STATUS_IS_SUCCESS( apr_uid_current( &userid, &groupid, cont ) )
+ && APR_STATUS_IS_SUCCESS( apr_uid_name_get(&username, userid,
cont) )
+ && APR_STATUS_IS_SUCCESS( apr_uid_homepath_get(&homedir,
+ username,
+ cont)) ) )
+ return 0;
+ else
+ return apr_pstrcat(static_pool, homedir, "/.mime.types", NULL);
+}
+
+/** @brief insert the mime-types from @a file into @a ht, in pool @a
+ cache, allocation temporary storage in @a cont */
+static svn_error_t*
+svn_mime__set_types(apr_hash_t* ht,
+ const char *file,
+ apr_pool_t* cont)
+{
+ svn_stringbuf_t* buf;
+ svn_error_t* err;
+
+ SVN_ERR( svn_stringbuf_from_file (&buf, file, cont) );
+ SVN_ERR( svn_mime__parse_file_entries (ht, buf, cont) );
+}
+
+static svn_error_t*
+svn_mime__set_types_default(apr_pool_t* cont)
+{
+ apr_pool_t* cache;
+ apr_hash_t *ht;
+ svn_error_t* err;
+ apr_status_t apr_status;
+ static const char *mime_files[] = { "/etc/mime.types", 0, 0 };
+ const char *file;
+ size_t i;
+ SVN_ERR( svn_mime__cache_pool(&cache) );
+ SVN_ERR( svn_mime__hash(&ht) );
+
+ if ( mime_files[1] == 0 )
+ mime_files[1] = svn_mime__calculate_homedir_path(cont, cache);
+
+ for (i = 0; file = mime_files[i], file != 0; ++i)
+ {
+ svn_node_kind_t kind;
+ err = svn_io_check_path (file, &kind, cache);
+ if ( err != SVN_NO_ERROR )
+ {
+ svn_handle_warning(stderr, err);
+ svn_error_clear(err);
+ }
+ if (kind != svn_node_file)
+ continue;
+ err = svn_mime__set_types(ht, file, cache);
+ if ( err != SVN_NO_ERROR )
+ {
+ svn_handle_warning(stderr, err);
+ svn_error_clear(err);
+ }
+ }
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t*
+svn_mime__ensure_types_loaded(apr_pool_t* cont)
+{
+ static int loaded = 0;
+ if ( !loaded )
+ {
+ SVN_ERR( svn_mime__set_types_default(cont) );
+ loaded = 1;
+ /* debug_print_mime_hash(); */
+ }
+ return SVN_NO_ERROR;
+}
+
+svn_error_t*
+svn_mime_guess_type_ext(const char **mimetype,
+ const char *file,
+ apr_pool_t *cont)
+{
+ apr_hash_t *ht;
+ size_t file_len;
+ const char *ext;
+ *mimetype = 0;
+
+ if ( file == NULL ) /* defensive */
+ return svn_error_create
+ (SVN_ERR_UNSUPPORTED_FEATURE, NULL,
+ "svn_guess_mime_type_ext: "
+ "file cannot be NULL");
+
+ file_len = strlen(file);
+ if ( file_len == 0 ) /* defensive */
+ return svn_error_create
+ (SVN_ERR_UNSUPPORTED_FEATURE, NULL,
+ "svn_guess_mime_type_ext: "
+ "filename cannot be the empty string");
+
+ /* find extension */
+ for (ext = file + file_len - 1; ext >= file; --ext)
+ if (*ext == '.')
+ break;
+ if ( *ext != '.' )
+ return SVN_NO_ERROR;
+ /* invariant: ext points to first char after last "." */
+ ++ext;
+
+ SVN_ERR( svn_mime__ensure_types_loaded(cont) );
+ SVN_ERR( svn_mime__hash(&ht) );
+ *mimetype = apr_hash_get(ht, ext, APR_HASH_KEY_STRING);
+ return SVN_NO_ERROR;
+}
+
+svn_error_t*
+svn_mime_guess_type_by_content(const char **mimetype,
+ const char *file,
+ apr_pool_t *pool)
+{
+ static const char * const generic_binary = "application/octet-stream";
+
+ svn_node_kind_t kind;
+ apr_file_t *fh;
+ apr_status_t apr_err;
+ unsigned char block[1024];
+ apr_size_t amt_read = sizeof (block);
+
+ /* Default return value is NULL. */
+ *mimetype = NULL;
+
+ /* See if this file even exists, and make sure it really is a file. */
+ SVN_ERR (svn_io_check_path (file, &kind, pool));
+ if (kind != svn_node_file)
+ return svn_error_createf (SVN_ERR_BAD_FILENAME, NULL,
+ "svn_io_detect_mimetype: "
+ "Can't detect mimetype of non-file '%s'",
+ file);
+
+ SVN_ERR (svn_io_file_open (&fh, file, APR_READ, 0, pool));
+
+ /* Read a block of data from FILE. */
+ apr_err = apr_file_read (fh, block, &amt_read);
+ if (apr_err && ! APR_STATUS_IS_EOF(apr_err))
+ return svn_error_createf (apr_err, NULL,
+ "svn_io_detect_mimetype: error reading '%s'",
+ file);
+
+ /* Now close the file. No use keeping it open any more. */
+ apr_file_close (fh);
+
+
+ /* Right now, this function is going to be really stupid. It's
+ going to examine the first block of data, and make sure that 85%
+ of the bytes are such that their value is in the ranges 0x07-0x0D
+ or 0x20-0x7F, and that 100% of those bytes is not 0x00.
+
+ If those criteria are not met, we're calling it binary. */
+ if (amt_read > 0)
+ {
+ apr_size_t i;
+ int binary_count = 0;
+
+ /* Run through the data we've read, counting the 'binary-ish'
+ bytes. HINT: If we see a 0x00 byte, we'll set our count to its
+ max and stop reading the file. */
+ for (i = 0; i < amt_read; i++)
+ {
+ if (block[i] == 0)
+ {
+ binary_count = amt_read;
+ break;
+ }
+ if ((block[i] < 0x07)
+ || ((block[i] > 0x0D) && (block[i] < 0x20))
+ || (block[i] > 0x7F))
+ {
+ binary_count++;
+ }
+ }
+
+ if (((binary_count * 1000) / amt_read) > 850)
+ {
+ *mimetype = generic_binary;
+ return SVN_NO_ERROR;
+ }
+ }
+
+ return SVN_NO_ERROR;
+}
+
Index: subversion/clients/cmdline/feedback.c
===================================================================
--- subversion/clients/cmdline/feedback.c (revision 6416)
+++ subversion/clients/cmdline/feedback.c (working copy)
@@ -112,7 +112,7 @@
           do get it, and the mime-type is not textual, note that this
           is a binary addition. */
        if (mime_type && (svn_mime_type_is_binary (mime_type)))
- printf ("A (bin) %s\n", path_native);
+ printf ("A (bin) %s (%s)\n", path_native, mime_type);
        else
          printf ("A %s\n", path_native);
        break;
@@ -204,7 +204,7 @@

      case svn_wc_notify_commit_added:
        if (mime_type && svn_mime_type_is_binary (mime_type))
- printf ("Adding (bin) %s\n", path_native);
+ printf ("Adding (bin) %s (%s)\n", path_native, mime_type);
        else
          printf ("Adding %s\n", path_native);
        break;

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Wed Jul 9 17:28:40 2003

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.