[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

Re: [PATCH] mime-type detection "improvement"

From: <kfogel_at_collab.net>
Date: 2003-07-09 17:57:16 CEST

I haven't reviewed the patch (sorry, just lack of time), but have you
seen these issues?

   http://subversion.tigris.org/issues/show_bug.cgi?id=1002
   http://subversion.tigris.org/issues/show_bug.cgi?id=1233

-Karl

SLOGEN <jensen@slog.dk> writes:
> Proposed log message:
> ---------------------
> - Added support for /etc/mime.types and ~/.mime.types in mime-type detection
> - Added printout of mime-type for binary files
>
> Comments:
> ---------
> When importing source to be served using mod_dav_svn, it's tedious to
> use svn propset to set the mime-type of the imported files.
>
> I propose that svn uses the existing de-facto standard of the
> /etc/mime.types file to deduce a mime-type for imported files, before
> falling back to the old behaviour.
>
> This is my first patch to subversion, so I perhaps have something to
> learn about the coding standards and use of the apr_pool_t. especially,
> i'm concerned with the use of a static apr_pool_t for storage of an
> apr_hash_t that will last the lifetime of the svn process. Please
> comment on the solution used in the patch.
>
> I also have some concern about how the names of the considered
> mime.types files should be generated. right now "/etc/mime.types" and
> "~/.mime-types" (of course with proper user-home-dir subst), works for
> me, but it may not be what people want. Some other suggestions are:
>
> - ~/.svn/mime-types
> - have a svn_config option with a list of files to use
>
> Please suggest some solutions.
>
> BTW: the patch is against rev. 6416
>
> Index: subversion/include/svn_mime.h
> ===================================================================
> --- subversion/include/svn_mime.h (working copy)
> +++ subversion/include/svn_mime.h (working copy)
> @@ -0,0 +1,17 @@
> +
> +#ifndef SVN_MIME_H
> +#define SVN_MIME_H
> +
> +struct svn_error_t;
> +struct apr_pool_t;
> +
> +svn_error_t*
> +svn_mime_guess_type_ext(const char **mimetype,
> + const char *file,
> + apr_pool_t *cont);
> +svn_error_t*
> +svn_mime_guess_type_by_content(const char **mimetype,
> + const char *file,
> + apr_pool_t *pool);
> +
> +#endif /* SVN_MIME_H */
> Index: subversion/libsvn_subr/io.c
> ===================================================================
> --- subversion/libsvn_subr/io.c (revision 6416)
> +++ subversion/libsvn_subr/io.c (working copy)
> @@ -39,6 +39,7 @@
> #include "svn_pools.h"
> #include "svn_utf.h"
> #include "svn_config.h"
> +#include "svn_mime.h"
>
> #include "svn_private_config.h" /* For SVN_WIN32 */
>
> @@ -1319,78 +1320,13 @@
> const char *file,
> apr_pool_t *pool)
> {
> - static const char * const generic_binary = "application/octet-stream";
> -
> - svn_node_kind_t kind;
> - apr_file_t *fh;
> - apr_status_t apr_err;
> - unsigned char block[1024];
> - apr_size_t amt_read = sizeof (block);
> -
> - /* Default return value is NULL. */
> - *mimetype = NULL;
> -
> - /* See if this file even exists, and make sure it really is a file. */
> - SVN_ERR (svn_io_check_path (file, &kind, pool));
> - if (kind != svn_node_file)
> - return svn_error_createf (SVN_ERR_BAD_FILENAME, NULL,
> - "svn_io_detect_mimetype: "
> - "Can't detect mimetype of non-file '%s'",
> - file);
> -
> - SVN_ERR (svn_io_file_open (&fh, file, APR_READ, 0, pool));
> -
> - /* Read a block of data from FILE. */
> - apr_err = apr_file_read (fh, block, &amt_read);
> - if (apr_err && ! APR_STATUS_IS_EOF(apr_err))
> - return svn_error_createf (apr_err, NULL,
> - "svn_io_detect_mimetype: error reading '%s'",
> - file);
> -
> - /* Now close the file. No use keeping it open any more. */
> - apr_file_close (fh);
> -
> -
> - /* Right now, this function is going to be really stupid. It's
> - going to examine the first block of data, and make sure that 85%
> - of the bytes are such that their value is in the ranges 0x07-0x0D
> - or 0x20-0x7F, and that 100% of those bytes is not 0x00.
> -
> - If those criteria are not met, we're calling it binary. */
> - if (amt_read > 0)
> - {
> - apr_size_t i;
> - int binary_count = 0;
> -
> - /* Run through the data we've read, counting the 'binary-ish'
> - bytes. HINT: If we see a 0x00 byte, we'll set our count to its
> - max and stop reading the file. */
> - for (i = 0; i < amt_read; i++)
> - {
> - if (block[i] == 0)
> - {
> - binary_count = amt_read;
> - break;
> - }
> - if ((block[i] < 0x07)
> - || ((block[i] > 0x0D) && (block[i] < 0x20))
> - || (block[i] > 0x7F))
> - {
> - binary_count++;
> - }
> - }
> -
> - if (((binary_count * 1000) / amt_read) > 850)
> - {
> - *mimetype = generic_binary;
> - return SVN_NO_ERROR;
> - }
> - }
> -
> + SVN_ERR( svn_mime_guess_type_ext( mimetype, file, pool) );
> + if (*mimetype != NULL) /* found mime-type? */
> + return SVN_NO_ERROR;
> + SVN_ERR( svn_mime_guess_type_by_content(mimetype, file, pool) );
> return SVN_NO_ERROR;
> }
>
> -
> svn_error_t *
> svn_io_file_open (apr_file_t **new_file, const char *fname,
> apr_int32_t flag, apr_fileperms_t perm,
> Index: subversion/libsvn_subr/mime.c
> ===================================================================
> --- subversion/libsvn_subr/mime.c (working copy)
> +++ subversion/libsvn_subr/mime.c (working copy)
> @@ -0,0 +1,349 @@
> +/* @file mime.c
> + mime.c: guessing mime types
> + */
> +
> +
> +
> +#include "apr_pools.h"
> +#include "apr_user.h"
> +#include "svn_io.h"
> +
> +#include "svn_mime.h"
> +
> +/** @brief set @ª *cont to the staticly allocated pool used for
> + svn_mime_hash(...) */
> +static svn_error_t*
> +svn_mime__cache_pool(apr_pool_t **cont)
> +{
> + static apr_pool_t* p = NULL;
> + if (p == NULL)
> + {
> + apr_status_t apr_status;
> + /* may leak if multiple threads enter at same time, not really a
> + problem */
> + apr_status = apr_pool_create(&p, NULL);
> + if ( !APR_STATUS_IS_SUCCESS(apr_status) )
> + return svn_error_create(apr_status,
> + 0,
> + "failed to allocate pool for mime
> caching");
> + }
> + *cont = p;
> + return SVN_NO_ERROR;
> +}
> +
> +/** @brief set @ª *cont to the staticly allocated pool used for
> + svn_mime_hash(...) */
> +static svn_error_t*
> +svn_mime__hash(apr_hash_t** ht)
> +{
> + static apr_hash_t* h = NULL;
> + if (h == NULL)
> + {
> + apr_pool_t* pool;
> + /* may leak if multiple threads enter at same time, not really a
> + problem */
> + SVN_ERR( svn_mime__cache_pool(&pool) );
> + h = apr_hash_make(pool);
> + }
> + *ht = h;
> + return SVN_NO_ERROR;
> +}
> +
> +/** @*/
> +static void
> +svn_mime__debug_print_hash()
> +{
> + apr_hash_t* h = NULL;
> + apr_hash_index_t *e = NULL;
> + apr_pool_t *p = NULL;
> +
> + if( svn_mime__cache_pool(&p) != SVN_NO_ERROR )
> + abort();
> +
> + if( svn_mime__hash(&h) != SVN_NO_ERROR )
> + abort();
> +
> + for( e = apr_hash_first(p, h);
> + e != NULL;
> + e = apr_hash_next(e))
> + {
> + const char *key;
> + size_t key_len;
> + const char *val;
> + apr_hash_this(e, (const void **)&key, &key_len, (void **)&val);
> + }
> +}
> +
> +/** @brief parse a mime.types file's content from @a buf into @a ht */
> +static svn_error_t*
> +svn_mime__parse_file_entries(apr_hash_t* ht,
> + svn_stringbuf_t* buf,
> + apr_pool_t* cont)
> +{
> + enum PARSE_STATE
> + {
> + PARSE_MIME_TYPE,
> + PARSE_EXT
> + };
> + enum PARSE_STATE state = PARSE_MIME_TYPE;
> + char *p, *end, *type_begin = 0, *ext_begin = 0;
> + for ( p = buf->data,
> + end = buf->data + buf->len;
> + p < end;
> + ++p )
> + {
> + switch ( *p ) {
> + case '\n':
> + /* newline => next entry */
> + *p = 0;
> + if ( state == PARSE_EXT && ext_begin != 0 )
> + apr_hash_set(ht, ext_begin, APR_HASH_KEY_STRING, type_begin);
> + /* start over */
> + type_begin = 0;
> + ext_begin = 0;
> + state = PARSE_MIME_TYPE;
> + break;
> + case '#':
> + /** comment, eat to '\n' or end */
> + for( ; *p != '\n' && p < end; ++p)
> + ;
> + if (p < end)
> + --p;
> + /* start over */
> + type_begin = 0;
> + ext_begin = 0;
> + state = PARSE_MIME_TYPE;
> + break;
> + case ' ':
> + case '\t':
> + /* white-space => possibly change state, and possibly found
> + entry */
> + *p = 0;
> + switch ( state ) {
> + case PARSE_MIME_TYPE:
> + state = PARSE_EXT;
> + break;
> + case PARSE_EXT:
> + if ( ext_begin != 0 )
> + {
> + apr_hash_set(ht, ext_begin, APR_HASH_KEY_STRING, type_begin);
> + ext_begin = 0;
> + }
> + break;
> + }
> + break;
> + default:
> + switch ( state )
> + {
> + case PARSE_MIME_TYPE:
> + if ( type_begin == 0 )
> + type_begin = p;
> + break;
> + case PARSE_EXT:
> + if ( ext_begin == 0 )
> + ext_begin = p;
> + break;
> + }
> + break;
> + }
> + }
> + return SVN_NO_ERROR;
> +}
> +
> +static char *
> +svn_mime__calculate_homedir_path(apr_pool_t *cont, apr_pool_t
> *static_pool)
> +{
> + /* get path to homedir */
> + apr_uid_t userid;
> + apr_gid_t groupid;
> + char *username;
> + char *homedir;
> +
> + if( !(APR_STATUS_IS_SUCCESS( apr_uid_current( &userid, &groupid, cont ) )
> + && APR_STATUS_IS_SUCCESS( apr_uid_name_get(&username, userid,
> cont) )
> + && APR_STATUS_IS_SUCCESS( apr_uid_homepath_get(&homedir,
> + username,
> + cont)) ) )
> + return 0;
> + else
> + return apr_pstrcat(static_pool, homedir, "/.mime.types", NULL);
> +}
> +
> +/** @brief insert the mime-types from @a file into @a ht, in pool @a
> + cache, allocation temporary storage in @a cont */
> +static svn_error_t*
> +svn_mime__set_types(apr_hash_t* ht,
> + const char *file,
> + apr_pool_t* cont)
> +{
> + svn_stringbuf_t* buf;
> + svn_error_t* err;
> +
> + SVN_ERR( svn_stringbuf_from_file (&buf, file, cont) );
> + SVN_ERR( svn_mime__parse_file_entries (ht, buf, cont) );
> +}
> +
> +static svn_error_t*
> +svn_mime__set_types_default(apr_pool_t* cont)
> +{
> + apr_pool_t* cache;
> + apr_hash_t *ht;
> + svn_error_t* err;
> + apr_status_t apr_status;
> + static const char *mime_files[] = { "/etc/mime.types", 0, 0 };
> + const char *file;
> + size_t i;
> + SVN_ERR( svn_mime__cache_pool(&cache) );
> + SVN_ERR( svn_mime__hash(&ht) );
> +
> + if ( mime_files[1] == 0 )
> + mime_files[1] = svn_mime__calculate_homedir_path(cont, cache);
> +
> + for (i = 0; file = mime_files[i], file != 0; ++i)
> + {
> + svn_node_kind_t kind;
> + err = svn_io_check_path (file, &kind, cache);
> + if ( err != SVN_NO_ERROR )
> + {
> + svn_handle_warning(stderr, err);
> + svn_error_clear(err);
> + }
> + if (kind != svn_node_file)
> + continue;
> + err = svn_mime__set_types(ht, file, cache);
> + if ( err != SVN_NO_ERROR )
> + {
> + svn_handle_warning(stderr, err);
> + svn_error_clear(err);
> + }
> + }
> + return SVN_NO_ERROR;
> +}
> +
> +static svn_error_t*
> +svn_mime__ensure_types_loaded(apr_pool_t* cont)
> +{
> + static int loaded = 0;
> + if ( !loaded )
> + {
> + SVN_ERR( svn_mime__set_types_default(cont) );
> + loaded = 1;
> + /* debug_print_mime_hash(); */
> + }
> + return SVN_NO_ERROR;
> +}
> +
> +svn_error_t*
> +svn_mime_guess_type_ext(const char **mimetype,
> + const char *file,
> + apr_pool_t *cont)
> +{
> + apr_hash_t *ht;
> + size_t file_len;
> + const char *ext;
> + *mimetype = 0;
> +
> + if ( file == NULL ) /* defensive */
> + return svn_error_create
> + (SVN_ERR_UNSUPPORTED_FEATURE, NULL,
> + "svn_guess_mime_type_ext: "
> + "file cannot be NULL");
> +
> + file_len = strlen(file);
> + if ( file_len == 0 ) /* defensive */
> + return svn_error_create
> + (SVN_ERR_UNSUPPORTED_FEATURE, NULL,
> + "svn_guess_mime_type_ext: "
> + "filename cannot be the empty string");
> +
> + /* find extension */
> + for (ext = file + file_len - 1; ext >= file; --ext)
> + if (*ext == '.')
> + break;
> + if ( *ext != '.' )
> + return SVN_NO_ERROR;
> + /* invariant: ext points to first char after last "." */
> + ++ext;
> +
> + SVN_ERR( svn_mime__ensure_types_loaded(cont) );
> + SVN_ERR( svn_mime__hash(&ht) );
> + *mimetype = apr_hash_get(ht, ext, APR_HASH_KEY_STRING);
> + return SVN_NO_ERROR;
> +}
> +
> +svn_error_t*
> +svn_mime_guess_type_by_content(const char **mimetype,
> + const char *file,
> + apr_pool_t *pool)
> +{
> + static const char * const generic_binary = "application/octet-stream";
> +
> + svn_node_kind_t kind;
> + apr_file_t *fh;
> + apr_status_t apr_err;
> + unsigned char block[1024];
> + apr_size_t amt_read = sizeof (block);
> +
> + /* Default return value is NULL. */
> + *mimetype = NULL;
> +
> + /* See if this file even exists, and make sure it really is a file. */
> + SVN_ERR (svn_io_check_path (file, &kind, pool));
> + if (kind != svn_node_file)
> + return svn_error_createf (SVN_ERR_BAD_FILENAME, NULL,
> + "svn_io_detect_mimetype: "
> + "Can't detect mimetype of non-file '%s'",
> + file);
> +
> + SVN_ERR (svn_io_file_open (&fh, file, APR_READ, 0, pool));
> +
> + /* Read a block of data from FILE. */
> + apr_err = apr_file_read (fh, block, &amt_read);
> + if (apr_err && ! APR_STATUS_IS_EOF(apr_err))
> + return svn_error_createf (apr_err, NULL,
> + "svn_io_detect_mimetype: error reading '%s'",
> + file);
> +
> + /* Now close the file. No use keeping it open any more. */
> + apr_file_close (fh);
> +
> +
> + /* Right now, this function is going to be really stupid. It's
> + going to examine the first block of data, and make sure that 85%
> + of the bytes are such that their value is in the ranges 0x07-0x0D
> + or 0x20-0x7F, and that 100% of those bytes is not 0x00.
> +
> + If those criteria are not met, we're calling it binary. */
> + if (amt_read > 0)
> + {
> + apr_size_t i;
> + int binary_count = 0;
> +
> + /* Run through the data we've read, counting the 'binary-ish'
> + bytes. HINT: If we see a 0x00 byte, we'll set our count to its
> + max and stop reading the file. */
> + for (i = 0; i < amt_read; i++)
> + {
> + if (block[i] == 0)
> + {
> + binary_count = amt_read;
> + break;
> + }
> + if ((block[i] < 0x07)
> + || ((block[i] > 0x0D) && (block[i] < 0x20))
> + || (block[i] > 0x7F))
> + {
> + binary_count++;
> + }
> + }
> +
> + if (((binary_count * 1000) / amt_read) > 850)
> + {
> + *mimetype = generic_binary;
> + return SVN_NO_ERROR;
> + }
> + }
> +
> + return SVN_NO_ERROR;
> +}
> +
> Index: subversion/clients/cmdline/feedback.c
> ===================================================================
> --- subversion/clients/cmdline/feedback.c (revision 6416)
> +++ subversion/clients/cmdline/feedback.c (working copy)
> @@ -112,7 +112,7 @@
> do get it, and the mime-type is not textual, note that this
> is a binary addition. */
> if (mime_type && (svn_mime_type_is_binary (mime_type)))
> - printf ("A (bin) %s\n", path_native);
> + printf ("A (bin) %s (%s)\n", path_native, mime_type);
> else
> printf ("A %s\n", path_native);
> break;
> @@ -204,7 +204,7 @@
>
> case svn_wc_notify_commit_added:
> if (mime_type && svn_mime_type_is_binary (mime_type))
> - printf ("Adding (bin) %s\n", path_native);
> + printf ("Adding (bin) %s (%s)\n", path_native, mime_type);
> else
> printf ("Adding %s\n", path_native);
> break;
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
> For additional commands, e-mail: dev-help@subversion.tigris.org

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Wed Jul 9 18:49:23 2003

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.