
davi at haxent
Jul 20, 2006, 7:58 AM
Post #1 of 4
(638 views)
Permalink
|
|
[PATCH] revamped mod_disk_cache directory structure
|
|
Hi, This patch converts the mod_disk_cache cache directory structure to a uniformly distributed two level hierarchy. The admin specifies the number of level-1 and level-2 directories and the files are scattered across the level-2 directories. Also, with this patch it is possible to designate directories to separate partitions because the temporary files are created on the destination directory. For example, running Apache/proxy+cache for a small network: [root [at] cache cache]# sh files-per-directory.sh dir: 00/ subs: 139 files: 632 size: 4.8M dir: 01/ subs: 156 files: 765 size: 5.7M dir: 02/ subs: 144 files: 626 size: 4.8M dir: 03/ subs: 160 files: 714 size: 6.1M dir: 04/ subs: 169 files: 820 size: 5.9M dir: 05/ subs: 131 files: 590 size: 4.1M dir: 06/ subs: 148 files: 677 size: 5.3M dir: 07/ subs: 142 files: 644 size: 5.8M dir: 08/ subs: 148 files: 749 size: 5.8M dir: 09/ subs: 158 files: 711 size: 6.3M dir: 0A/ subs: 146 files: 666 size: 5.1M dir: 0B/ subs: 157 files: 701 size: 5.1M dir: 0C/ subs: 157 files: 671 size: 5.2M dir: 0D/ subs: 157 files: 711 size: 5.7M dir: 0E/ subs: 149 files: 704 size: 5.6M dir: 0F/ subs: 158 files: 742 size: 5.8M -- Davi Arnaut Index: modules/cache/cache_util.c =================================================================== --- modules/cache/cache_util.c (revision 423984) +++ modules/cache/cache_util.c (working copy) @@ -19,6 +19,7 @@ #include "mod_cache.h" #include <ap_provider.h> +#include <util_md5.h> /* -------------------------------------------------------------- */ @@ -489,54 +490,31 @@ y[sizeof(j) * 2] = '\0'; } -static void cache_hash(const char *it, char *val, int ndepth, int nlength) +static unsigned int cdb_string_hash(const char *str) { - apr_md5_ctx_t context; - unsigned char digest[16]; - char tmp[22]; - int i, k, d; - unsigned int x; - static const char enc_table[64] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_@"; + unsigned int hash = 5381; - apr_md5_init(&context); - apr_md5_update(&context, (const unsigned char *) it, strlen(it)); - apr_md5_final(digest, &context); + while (*str) + hash = 33 * hash + *str++; - /* encode 128 bits as 22 characters, using a modified uuencoding - * the encoding is 3 bytes -> 4 characters* i.e. 128 bits is - * 5 x 3 bytes + 1 byte -> 5 * 4 characters + 2 characters - */ - for (i = 0, k = 0; i < 15; i += 3) { - x = (digest[i] << 16) | (digest[i + 1] << 8) | digest[i + 2]; - tmp[k++] = enc_table[x >> 18]; - tmp[k++] = enc_table[(x >> 12) & 0x3f]; - tmp[k++] = enc_table[(x >> 6) & 0x3f]; - tmp[k++] = enc_table[x & 0x3f]; - } - - /* one byte left */ - x = digest[15]; - tmp[k++] = enc_table[x >> 2]; /* use up 6 bits */ - tmp[k++] = enc_table[(x << 4) & 0x3f]; - - /* now split into directory levels */ - for (i = k = d = 0; d < ndepth; ++d) { - memcpy(&val[i], &tmp[k], nlength); - k += nlength; - val[i + nlength] = '/'; - i += nlength + 1; - } - memcpy(&val[i], &tmp[k], 22 - k); - val[i + 22 - k] = '\0'; + return hash; } -CACHE_DECLARE(char *)ap_cache_generate_name(apr_pool_t *p, int dirlevels, - int dirlength, const char *name) +CACHE_DECLARE(char *)ap_cache_generate_name(apr_pool_t *p, unsigned int L1, + unsigned int L2, const char *name) { - char hashfile[66]; - cache_hash(name, hashfile, dirlevels, dirlength); - return apr_pstrdup(p, hashfile); + char *key; + char *md5_hash; + unsigned int cdb_hash; + + md5_hash = ap_md5_binary(p, (unsigned char *) name, (int) strlen(name)); + + cdb_hash = cdb_string_hash(md5_hash) / L2; + + key = apr_psprintf(p, "%02X/%02X/%s", (cdb_hash / L2) % L1, + cdb_hash % L2, md5_hash); + + return key; } /* Create a new table consisting of those elements from an input Index: modules/cache/mod_cache.h =================================================================== --- modules/cache/mod_cache.h (revision 423984) +++ modules/cache/mod_cache.h (working copy) @@ -274,8 +274,8 @@ CACHE_DECLARE(apr_time_t) ap_cache_hex2usec(const char *x); CACHE_DECLARE(void) ap_cache_usec2hex(apr_time_t j, char *y); -CACHE_DECLARE(char *) ap_cache_generate_name(apr_pool_t *p, int dirlevels, - int dirlength, +CACHE_DECLARE(char *) ap_cache_generate_name(apr_pool_t *p, unsigned int L1, + unsigned int L2, const char *name); CACHE_DECLARE(cache_provider_list *)ap_cache_get_providers(request_rec *r, cache_server_conf *conf, apr_uri_t uri); CACHE_DECLARE(int) ap_cache_liststr(apr_pool_t *p, const char *list, Index: modules/cache/mod_disk_cache.c =================================================================== --- modules/cache/mod_disk_cache.c (revision 423984) +++ modules/cache/mod_disk_cache.c (working copy) @@ -66,17 +66,38 @@ * Local static functions */ +static apr_status_t disk_mktemp(apr_file_t **fp, const char *dest, char **tempfile, + apr_int32_t flags, apr_size_t cache_root_len, + apr_pool_t *p) +{ + apr_status_t rv; + struct iovec iov[2]; + + iov[0].iov_base = (char *) dest; + iov[0].iov_len = cache_root_len + DIR_LEVELS_LEN; + + iov[1].iov_base = AP_TEMPFILE; + iov[1].iov_len = sizeof AP_TEMPFILE; + + *tempfile = apr_pstrcatv(p, iov, 2, NULL); + + rv = apr_file_mktemp(fp, *tempfile, flags, p); + + return rv; +} + static char *header_file(apr_pool_t *p, disk_cache_conf *conf, disk_cache_object_t *dobj, const char *name) { if (!dobj->hashfile) { - dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels, - conf->dirlength, name); + dobj->hashfile = ap_cache_generate_name(p, conf->dirlevel1, + conf->dirlevel2, name); } if (dobj->prefix) { return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/", - dobj->hashfile, CACHE_HEADER_SUFFIX, NULL); + dobj->hashfile + DIR_LEVELS_LEN, + CACHE_HEADER_SUFFIX, NULL); } else { return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile, @@ -88,13 +109,14 @@ disk_cache_object_t *dobj, const char *name) { if (!dobj->hashfile) { - dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels, - conf->dirlength, name); + dobj->hashfile = ap_cache_generate_name(p, conf->dirlevel1, + conf->dirlevel2, name); } if (dobj->prefix) { return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/", - dobj->hashfile, CACHE_DATA_SUFFIX, NULL); + dobj->hashfile + DIR_LEVELS_LEN, + CACHE_DATA_SUFFIX, NULL); } else { return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile, @@ -359,7 +381,6 @@ dobj->root_len = conf->cache_root_len; dobj->datafile = data_file(r->pool, conf, dobj, key); dobj->hdrsfile = header_file(r->pool, conf, dobj, key); - dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); return OK; } @@ -467,7 +488,6 @@ dobj->key = nkey; dobj->name = key; dobj->datafile = data_file(r->pool, conf, dobj, nkey); - dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); /* Open the data file */ flags = APR_READ|APR_BINARY; @@ -843,9 +863,9 @@ mkdir_structure(conf, dobj->hdrsfile, r->pool); - rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile, - APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL, - r->pool); + rv = disk_mktemp(&dobj->tfd, dobj->hdrsfile, &dobj->tempfile, + APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL, + conf->cache_root_len, r->pool); if (rv != APR_SUCCESS) { return rv; @@ -876,7 +896,6 @@ return rv; } - dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); tmp = regen_key(r->pool, r->headers_in, varray, dobj->name); dobj->prefix = dobj->hdrsfile; dobj->hashfile = NULL; @@ -885,11 +904,10 @@ } } + rv = disk_mktemp(&dobj->hfd, dobj->hdrsfile, &dobj->tempfile, + APR_CREATE | APR_WRITE | APR_BINARY | APR_BUFFERED | + APR_EXCL, conf->cache_root_len, r->pool); - rv = apr_file_mktemp(&dobj->hfd, dobj->tempfile, - APR_CREATE | APR_WRITE | APR_BINARY | - APR_BUFFERED | APR_EXCL, r->pool); - if (rv != APR_SUCCESS) { return rv; } @@ -969,8 +987,6 @@ return rv; } - dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL); - ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, "disk_cache: Stored headers for URL %s", dobj->name); return APR_SUCCESS; @@ -989,9 +1005,10 @@ * in file_cache_el_final(). */ if (!dobj->tfd) { - rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile, - APR_CREATE | APR_WRITE | APR_BINARY | - APR_BUFFERED | APR_EXCL, r->pool); + rv = disk_mktemp(&dobj->tfd, dobj->datafile, &dobj->tempfile, + APR_CREATE | APR_WRITE | APR_BINARY | + APR_BUFFERED | APR_EXCL, conf->cache_root_len, + r->pool); if (rv != APR_SUCCESS) { return rv; } @@ -1072,8 +1089,8 @@ disk_cache_conf *conf = apr_pcalloc(p, sizeof(disk_cache_conf)); /* XXX: Set default values */ - conf->dirlevels = DEFAULT_DIRLEVELS; - conf->dirlength = DEFAULT_DIRLENGTH; + conf->dirlevel1 = DEFAULT_DIRLEVEL1; + conf->dirlevel2 = DEFAULT_DIRLEVEL2; conf->maxfs = DEFAULT_MAX_FILE_SIZE; conf->minfs = DEFAULT_MIN_FILE_SIZE; @@ -1105,33 +1122,22 @@ * filename = "/key % prime1 /key %prime2/key %prime3" */ static const char -*set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, const char *arg) +*set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, const char *arg1, + const char *arg2) { disk_cache_conf *conf = ap_get_module_config(parms->server->module_config, &disk_cache_module); - int val = atoi(arg); - if (val < 1) + int val1 = atoi(arg1); + int val2 = atoi(arg2); + + if (val1 < 1 || val2 < 1) return "CacheDirLevels value must be an integer greater than 0"; - if (val * conf->dirlength > CACHEFILE_LEN) - return "CacheDirLevels*CacheDirLength value must not be higher than 20"; - conf->dirlevels = val; - return NULL; -} -static const char -*set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr, const char *arg) -{ - disk_cache_conf *conf = ap_get_module_config(parms->server->module_config, - &disk_cache_module); - int val = atoi(arg); - if (val < 1) - return "CacheDirLength value must be an integer greater than 0"; - if (val * conf->dirlevels > CACHEFILE_LEN) - return "CacheDirLevels*CacheDirLength value must not be higher than 20"; - conf->dirlength = val; + conf->dirlevel1 = val1; + conf->dirlevel2 = val2; + return NULL; } - static const char *set_cache_minfs(cmd_parms *parms, void *in_struct_ptr, const char *arg) { @@ -1153,10 +1159,8 @@ { AP_INIT_TAKE1("CacheRoot", set_cache_root, NULL, RSRC_CONF, "The directory to store cache files"), - AP_INIT_TAKE1("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF, + AP_INIT_TAKE2("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF, "The number of levels of subdirectories in the cache"), - AP_INIT_TAKE1("CacheDirLength", set_cache_dirlength, NULL, RSRC_CONF, - "The number of characters in subdirectory names"), AP_INIT_TAKE1("CacheMinFileSize", set_cache_minfs, NULL, RSRC_CONF, "The minimum file size to cache a document"), AP_INIT_TAKE1("CacheMaxFileSize", set_cache_maxfs, NULL, RSRC_CONF, Index: modules/cache/mod_disk_cache.h =================================================================== --- modules/cache/mod_disk_cache.h (revision 423984) +++ modules/cache/mod_disk_cache.h (working copy) @@ -24,6 +24,8 @@ #define VARY_FORMAT_VERSION 3 #define DISK_FORMAT_VERSION 4 +#define DIR_LEVELS_LEN 6 + #define CACHE_HEADER_SUFFIX ".header" #define CACHE_DATA_SUFFIX ".data" #define CACHE_VDIR_SUFFIX ".vary" @@ -78,16 +80,16 @@ */ /* TODO: Make defaults OS specific */ #define CACHEFILE_LEN 20 /* must be less than HASH_LEN/2 */ -#define DEFAULT_DIRLEVELS 3 -#define DEFAULT_DIRLENGTH 2 +#define DEFAULT_DIRLEVEL1 16 +#define DEFAULT_DIRLEVEL2 256 #define DEFAULT_MIN_FILE_SIZE 1 #define DEFAULT_MAX_FILE_SIZE 1000000 typedef struct { const char* cache_root; apr_size_t cache_root_len; - int dirlevels; /* Number of levels of subdirectories */ - int dirlength; /* Length of subdirectory names */ + unsigned int dirlevel1; /* Number of level 1 directories */ + unsigned int dirlevel2; /* Number of level 2 subdirectories */ apr_size_t minfs; /* minumum file size for cached files */ apr_size_t maxfs; /* maximum file size for cached files */ } disk_cache_conf;
|