[svn.haxx.se] · SVN Dev · SVN Users · SVN Org · TSVN Dev · TSVN Users · Subclipse Dev · Subclipse Users · this month's index

Faster autodetect of mime-type upon "svn add"

From: <junkio_at_cox.net>
Date: 2003-04-24 10:59:25 CEST

The attached patch teaches svn_io_detect_mimetype to guess mime
types from suffixes of filenames. This allows the user to say
"svn add foo.html bar.jpg" and svn:mime-type is set for these
files. The original code in r5718 would leave foo.html typeless
and bar.jpg is given application/octet-stream.

It currently uses a table of suffixes and corresponding mime
types (built from /etc/mime.types file Debian GNU/Linux ships as
part of the mime-support package). Although the patch should be
usable as it is, one improvement I would like to see is to get
this table out of the executable binary, and instead read from
an external file. Further, maybe the user's configuration file
should be able to name which file to use as the table. I can
surely implement that for Unix but the problem I am having is
that I do know how the subversion project arranges this kind of
thing portably (I am new touching subversion code). I do not
see a supplemental file that is read at runtime in the current
code to model after (well, the subversion configuration file is
such a file but that is kind-a special), to make the resulting
code portable across platforms, and also to make the modified
build process install that mime.types file at the correct place.

Helps, suggestions, and/or code takeovers are all welcome.

Index: subversion/libsvn_subr/io.c
===================================================================
--- subversion/libsvn_subr/io.c (revision 5718)
+++ subversion/libsvn_subr/io.c (working copy)
@@ -19,6 +19,7 @@
 
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <assert.h>
 
 #include <apr_lib.h>
@@ -1301,7 +1302,285 @@
   return SVN_NO_ERROR;
 }
 
+/* A built-in table to map filename suffixes to mime types.
+ * We might want to have an external file for this.
+ *
+ * The elements of the table *MUST* be sorted by suffix, since
+ * the code that uses this table uses bsearch(3) from <stdlib.h>.
+ * It should not have the usual NULL sentinel at the end. The caller
+ * uses the size of the array to know exactly how big the table is.
+ */
+static struct mime_types_table {
+ char const *suffix;
+ char const *mime_type;
+} mime_types[] = {
+ {"323", "text/h323"},
+ {"ai", "application/postscript"},
+ {"aif", "audio/x-aiff"},
+ {"aifc", "audio/x-aiff"},
+ {"aiff", "audio/x-aiff"},
+ {"art", "image/x-jg"},
+ {"asc", "text/plain"},
+ {"asf", "video/x-ms-asf"},
+ {"asx", "video/x-ms-asf"},
+ {"au", "audio/basic"},
+ {"avi", "video/x-msvideo"},
+ {"bat", "application/x-msdos-program"},
+ {"bcpio", "application/x-bcpio"},
+ {"bin", "application/octet-stream"},
+ {"bmp", "image/x-ms-bmp"},
+ {"book", "application/x-maker"},
+ {"cat", "application/vnd.ms-pki.seccat"},
+ {"cdf", "application/x-cdf"},
+ {"cdr", "image/x-coreldraw"},
+ {"cdt", "image/x-coreldrawtemplate"},
+ {"cdy", "application/vnd.cinderella"},
+ {"chrt", "application/x-kchart"},
+ {"class", "application/x-java-vm"},
+ {"cls", "text/x-tex"},
+ {"com", "application/x-msdos-program"},
+ {"cpio", "application/x-cpio"},
+ {"cpt", "image/x-corelphotopaint"},
+ {"crl", "application/x-pkcs7-crl"},
+ {"crt", "application/x-x509-ca-cert"},
+ {"csm", "application/cu-seeme"},
+ {"css", "text/css"},
+ {"csv", "text/comma-separated-values"},
+ {"cu", "application/cu-seeme"},
+ {"dcr", "application/x-director"},
+ {"deb", "application/x-debian-package"},
+ {"dif", "video/x-dv"},
+ {"diff", "text/plain"},
+ {"dir", "application/x-director"},
+ {"djv", "image/x-djvu"},
+ {"djvu", "image/x-djvu"},
+ {"dl", "video/dl"},
+ {"dll", "application/x-msdos-program"},
+ {"dms", "application/x-dms"},
+ {"doc", "application/msword"},
+ {"dot", "application/msword"},
+ {"dv", "video/x-dv"},
+ {"dvi", "application/x-dvi"},
+ {"dxr", "application/x-director"},
+ {"eps", "application/postscript"},
+ {"etx", "text/x-setext"},
+ {"exe", "application/x-msdos-program"},
+ {"ez", "application/andrew-inset"},
+ {"fb", "application/x-maker"},
+ {"fbdoc", "application/x-maker"},
+ {"fig", "application/x-xfig"},
+ {"fli", "video/fli"},
+ {"fm", "application/x-maker"},
+ {"frame", "application/x-maker"},
+ {"frm", "application/x-maker"},
+ {"gcf", "application/x-graphing-calculator"},
+ {"gf", "application/x-tex-gf"},
+ {"gif", "image/gif"},
+ {"gl", "video/gl"},
+ {"gnumeric", "application/x-gnumeric"},
+ {"gsf", "application/x-font"},
+ {"gsm", "audio/x-gsm"},
+ {"gtar", "application/x-gtar"},
+ {"hdf", "application/x-hdf"},
+ {"hqx", "application/mac-binhex40"},
+ {"hta", "application/hta"},
+ {"htm", "text/html"},
+ {"html", "text/html"},
+ {"ica", "application/x-ica"},
+ {"ice", "x-conference/x-cooltalk"},
+ {"ief", "image/ief"},
+ {"iges", "model/iges"},
+ {"igs", "model/iges"},
+ {"iii", "application/x-iphone"},
+ {"ins", "application/x-internet-signup"},
+ {"isp", "application/x-internet-signup"},
+ {"jar", "application/x-java-archive"},
+ {"jng", "image/x-jng"},
+ {"jnlp", "application/x-java-jnlp-file"},
+ {"jpe", "image/jpeg"},
+ {"jpeg", "image/jpeg"},
+ {"jpg", "image/jpeg"},
+ {"js", "application/x-javascript"},
+ {"kar", "audio/midi"},
+ {"kil", "application/x-killustrator"},
+ {"kpr", "application/x-kpresenter"},
+ {"kpt", "application/x-kpresenter"},
+ {"ksp", "application/x-kspread"},
+ {"kwd", "application/x-kword"},
+ {"kwt", "application/x-kword"},
+ {"latex", "application/x-latex"},
+ {"lha", "application/x-lha"},
+ {"lsf", "video/x-la-asf"},
+ {"lsx", "video/x-la-asf"},
+ {"ltx", "text/x-tex"},
+ {"lzh", "application/x-lzh"},
+ {"lzx", "application/x-lzx"},
+ {"m3u", "audio/x-mpegurl"},
+ {"maker", "application/x-maker"},
+ {"man", "application/x-troff-man"},
+ {"mdb", "application/msaccess"},
+ {"me", "application/x-troff-me"},
+ {"mesh", "model/mesh"},
+ {"mid", "audio/midi"},
+ {"midi", "audio/midi"},
+ {"mif", "application/x-mif"},
+ {"mml", "text/mathml"},
+ {"mng", "video/x-mng"},
+ {"moc", "text/x-moc"},
+ {"mov", "video/quicktime"},
+ {"movie", "video/x-sgi-movie"},
+ {"mp2", "audio/mpeg"},
+ {"mp3", "audio/mpeg"},
+ {"mpe", "video/mpeg"},
+ {"mpeg", "video/mpeg"},
+ {"mpega", "audio/mpeg"},
+ {"mpg", "video/mpeg"},
+ {"mpga", "audio/mpeg"},
+ {"ms", "application/x-troff-ms"},
+ {"msh", "model/mesh"},
+ {"msi", "application/x-msi"},
+ {"mxu", "video/vnd.mpegurl"},
+ {"nb", "application/mathematica"},
+ {"nc", "application/x-netcdf"},
+ {"o", "application/x-object"},
+ {"oda", "application/oda"},
+ {"ogg", "application/x-ogg"},
+ {"old", "application/x-trash"},
+ {"oza", "application/x-oz-application"},
+ {"p7r", "application/x-pkcs7-certreqresp"},
+ {"pac", "application/x-ns-proxy-autoconfig"},
+ {"pat", "image/x-coreldrawpattern"},
+ {"pbm", "image/x-portable-bitmap"},
+ {"pcf", "application/x-font"},
+ {"pcf.Z", "application/x-font"},
+ {"pcx", "image/pcx"},
+ {"pdb", "chemical/x-pdb"},
+ {"pdf", "application/pdf"},
+ {"pfa", "application/x-font"},
+ {"pfb", "application/x-font"},
+ {"pgm", "image/x-portable-graymap"},
+ {"pgn", "application/x-chess-pgn"},
+ {"pgp", "application/pgp-signature"},
+ {"php", "application/x-httpd-php"},
+ {"php3", "application/x-httpd-php3"},
+ {"php3p", "application/x-httpd-php3-preprocessed"},
+ {"php4", "application/x-httpd-php4"},
+ {"phps", "application/x-httpd-php-source"},
+ {"pht", "application/x-httpd-php"},
+ {"phtml", "application/x-httpd-php"},
+ {"pk", "application/x-tex-pk"},
+ {"pl", "application/x-perl"},
+ {"pls", "audio/x-scpls"},
+ {"pm", "application/x-perl"},
+ {"png", "image/png"},
+ {"pnm", "image/x-portable-anymap"},
+ {"pot", "application/vnd.ms-powerpoint"},
+ {"ppm", "image/x-portable-pixmap"},
+ {"pps", "application/vnd.ms-powerpoint"},
+ {"ppt", "application/vnd.ms-powerpoint"},
+ {"prf", "application/pics-rules"},
+ {"ps", "application/postscript"},
+ {"psd", "image/x-photoshop"},
+ {"qt", "video/quicktime"},
+ {"qtl", "application/x-quicktimeplayer"},
+ {"ra", "audio/x-realaudio"},
+ {"ram", "audio/x-pn-realaudio"},
+ {"ras", "image/x-cmu-raster"},
+ {"rgb", "image/x-rgb"},
+ {"rm", "audio/x-pn-realaudio"},
+ {"roff", "application/x-troff"},
+ {"rpm", "audio/x-pn-realaudio-plugin"},
+ {"rtf", "text/rtf"},
+ {"rtx", "text/richtext"},
+ {"sct", "text/scriptlet"},
+ {"sd2", "audio/x-sd2"},
+ {"sda", "application/vnd.stardivision.draw"},
+ {"sdc", "application/vnd.stardivision.calc"},
+ {"sdd", "application/vnd.stardivision.impress"},
+ {"sdp", "application/vnd.stardivision.impress-packed"},
+ {"sds", "application/vnd.stardivision.chart"},
+ {"sdw", "application/vnd.stardivision.writer"},
+ {"ser", "application/x-java-serialized-object"},
+ {"sgl", "application/vnd.stardivision.writer-global"},
+ {"shar", "application/x-shar"},
+ {"sid", "audio/prs.sid"},
+ {"sik", "application/x-trash"},
+ {"silo", "model/mesh"},
+ {"sit", "application/x-stuffit"},
+ {"skd", "application/x-koan"},
+ {"skm", "application/x-koan"},
+ {"skp", "application/x-koan"},
+ {"skt", "application/x-koan"},
+ {"smd", "application/vnd.stardivision.mail"},
+ {"smf", "application/vnd.stardivision.math"},
+ {"smi", "application/smil"},
+ {"smil", "application/smil"},
+ {"snd", "audio/basic"},
+ {"spl", "application/x-futuresplash"},
+ {"src", "application/x-wais-source"},
+ {"stl", "application/vnd.ms-pki.stl"},
+ {"sty", "text/x-tex"},
+ {"sv4cpio", "application/x-sv4cpio"},
+ {"sv4crc", "application/x-sv4crc"},
+ {"svg", "image/svg+xml"},
+ {"svgz", "image/svg+xml"},
+ {"swf", "application/x-shockwave-flash"},
+ {"swfl", "application/x-shockwave-flash"},
+ {"t", "application/x-troff"},
+ {"tar", "application/x-tar"},
+ {"taz", "application/x-gtar"},
+ {"tex", "text/x-tex"},
+ {"texi", "application/x-texinfo"},
+ {"texinfo", "application/x-texinfo"},
+ {"text", "text/plain"},
+ {"tgz", "application/x-gtar"},
+ {"tif", "image/tiff"},
+ {"tiff", "image/tiff"},
+ {"tr", "application/x-troff"},
+ {"tsp", "application/dsptype"},
+ {"tsv", "text/tab-separated-values"},
+ {"txt", "text/plain"},
+ {"uls", "text/iuls"},
+ {"ustar", "application/x-ustar"},
+ {"vcd", "application/x-cdlink"},
+ {"vcf", "text/x-vcard"},
+ {"vcs", "text/x-vcalendar"},
+ {"vor", "application/vnd.stardivision.writer"},
+ {"vrm", "x-world/x-vrml"},
+ {"vrml", "x-world/x-vrml"},
+ {"wad", "application/x-doom"},
+ {"wav", "audio/x-wav"},
+ {"wbmp", "image/vnd.wap.wbmp"},
+ {"wbxml", "application/vnd.wap.wbxml"},
+ {"wk", "application/x-123"},
+ {"wml", "text/vnd.wap.wml"},
+ {"wmlc", "application/vnd.wap.wmlc"},
+ {"wmls", "text/vnd.wap.wmlscript"},
+ {"wmlsc", "application/vnd.wap.wmlscriptc"},
+ {"wp5", "application/wordperfect5.1"},
+ {"wrl", "x-world/x-vrml"},
+ {"wsc", "text/scriptlet"},
+ {"wz", "application/x-wingz"},
+ {"xbm", "image/x-xbitmap"},
+ {"xhtml", "text/html"},
+ {"xlb", "application/vnd.ms-excel"},
+ {"xls", "application/vnd.ms-excel"},
+ {"xml", "text/xml"},
+ {"xpm", "image/x-xpixmap"},
+ {"xsl", "text/xml"},
+ {"xwd", "image/x-xwindowdump"},
+ {"xyz", "chemical/x-xyz"},
+ {"zip", "application/zip"},
+};
 
+/* Bsearch callback to look up a given file suffix from the above table. */
+static int
+suffix_compare(const void *key_, const void *mime_types_table_elem_)
+{
+ return strcmp((const char *)key_,
+ ((struct mime_types_table*)mime_types_table_elem_)->suffix);
+}
+
 svn_error_t *
 svn_io_detect_mimetype (const char **mimetype,
                         const char *file,
@@ -1314,6 +1593,7 @@
   apr_status_t apr_err;
   unsigned char block[1024];
   apr_size_t amt_read = sizeof (block);
+ char const *suffix = strrchr(file, '.');
 
   /* Default return value is NULL. */
   *mimetype = NULL;
@@ -1326,6 +1606,19 @@
                               "Can't detect mimetype of non-file '%s'",
                               file);
 
+ /* The filename check can be done without opening it, so do that first. */
+ if (suffix) {
+ struct mime_types_table *t;
+ suffix++; /* skip over the '.' */
+ t = bsearch(suffix, mime_types,
+ sizeof(mime_types) / sizeof(mime_types[0]),
+ sizeof(mime_types[0]), suffix_compare);
+ if (t) {
+ *mimetype = t->mime_type;
+ return SVN_NO_ERROR;
+ }
+ }
+
   SVN_ERR (svn_io_file_open (&fh, file, APR_READ, 0, pool));
 
   /* Read a block of data from FILE. */

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org
Received on Thu Apr 24 11:00:19 2003

This is an archived mail posted to the Subversion Dev mailing list.

This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.