/* 
 * copyright 2010-2012 Edscott Wilson Garcia (GPL-license)
 *
 *
 * Tests on a 4 GB ram box.
 * Step 1: Create a database with over 4M records. Total key size > 4GB. Table size > 40GB
 *         Check code, can DBH handle this?
 *
 *
 * This is very simple example program to test 64 bit 
 * functions of the Disk Based Hash (DBH) and
 * verify correct handling of dbh files greater than
 * 2 Gb in size (up to 256^8/2).

 * A dbh file is created from a specified filesystem.
 * Paths are indexed with g_string hash key
 * Hash key collisions are noted in dbh file COLLISIONS
 * path => Hash key<->path associations are noted in dbh file INDEX
 * Hash key => file are noted in dbh file TABLE 
 *
 * usage: ./filesystem path option
 * Option can be:
 *    "index" (create INDEX, COLLISIONS and TABLE dbh files)
 *    "dump"  (do a foreach on all records and print summary)
 *    "regen" (recreate TABLE dbh file with optimized fisical structure)
 *    "compare" (compare each file in TABLE with actual file on disk)
 *    "parallel"
 *    "thread"
 *    "fulltest" (all of the above)

 */
#include "config.h"
#define _GNU_SOURCE             /* See feature_test_macros(7) */
#include <features.h>
#include <pthread.h>

#ifdef HAVE_LSTAT
# define LSTAT lstat
#else
# define LSTAT stat
#endif
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <dbh.h>
#include <dirent.h>
#include <sys/types.h>
#include <inttypes.h>

#ifdef HAVE_GDBM_H
# include <gdbm.h>
#endif

#ifdef HAVE_SYS_WAIT_H
# include <sys/wait.h>
#endif

#ifdef HAVE_SYS_RESOURCE_H
#include <sys/time.h>
#include <sys/resource.h>
#endif

#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#ifdef HAVE_WINDOWS_H
#include <windows.h>
#endif
#ifndef O_BINARY
#define O_BINARY 0x0
#endif
       

#include <glib.h>

#define SKIP_DIR "/home"
#define DIRECTORY "/home/edscott/testfiles/"
const gchar *directory=DIRECTORY;

#define RANDOM_LIST DIRECTORY"randomlist.txt"

#define COLLISIONS DIRECTORY"performance.collisions.dbh"
#define TABLE DIRECTORY"performance.table.dbh"

#define QINDEX DIRECTORY"performance.qindex.dbh"
#define QTABLE DIRECTORY"performance.qtable.dbh"

#define GCOLLISIONS DIRECTORY"performance.gcollisions.dbf"
#define GTABLE DIRECTORY"performance.gtable.dbf"

#define REBUILT DIRECTORY"performance.index.rebuilt.dbh"
#define TEST_INDEX DIRECTORY"parperformance.index.dbh"

#define HELP \
"     Options:\n"\
"       create: Create table files of items within the specified \"path\"\n"\
"       random: Create a random list for tests\n"\
"       regen: Regenerate the DBH table (sweep/fanout)\n"\
"       test:  Random r/w tests\n"\
" *To test a DBH table larger than 4 GB, choose a \"path\" with more than 4GB.\n"\
"  Do not alter any item within \"path\" during the test or error will occur."


typedef struct dump_t{
    int original_count;
    long long original_sum;
    long long sum;
    int which;
    int count;
    gint natural;
}dump_t;


static
gchar *get_hash_key(unsigned char bucket, const char *pre_key){
    GString *gs = g_string_new(pre_key);
    gchar *key;
    key=g_strdup_printf("%c%10u", bucket, g_string_hash(gs));
    g_string_free(gs, TRUE);
    return key;
}

static gchar *skip_msg(const gchar *path, const gchar *file, const gchar *reason){
    if (reason) fprintf(stderr, "skipping \"%s/%s\" (%s)\n",path, file, reason);
    return NULL;
}

void out(long count, long walltime){
#ifdef HAVE_SYS_RESOURCE_H
    struct rusage usage;
    if (getrusage(RUSAGE_SELF, &usage)){
        fprintf(stderr, "rusage(): %s\n", strerror(errno));
        return;
    }
    if (!count) {
        fprintf(stdout, "# count walltime usertime systime resident shared data stack page_r page_f swaps block_i block_o\n");
        return;
    }
    // usertime systime resident shared data stack page_r page_f swaps block_i block_o
    //fprintf(stdout, "%d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld \n",
    fprintf(stdout, "%ld", count);
    fprintf(stdout, "\t%ld", walltime);
    fprintf(stdout, "\t%ld", (long)usage.ru_utime.tv_sec);
    fprintf(stdout, "\t%ld", (long)usage.ru_stime.tv_sec);
    fprintf(stdout, "\t%ld", (long)usage.ru_maxrss);
    fprintf(stdout, "\t%ld", (long)usage.ru_ixrss);
    fprintf(stdout, "\t%ld", usage.ru_idrss);
    fprintf(stdout, "\t%ld", usage.ru_isrss);
    fprintf(stdout, "\t%ld", usage.ru_minflt);
    fprintf(stdout, "\t%ld", usage.ru_majflt);
    fprintf(stdout, "\t%ld", usage.ru_nswap);
    fprintf(stdout, "\t%ld", usage.ru_inblock);
    fprintf(stdout, "\t%ld", usage.ru_oublock);
    fprintf(stdout, "\n");
    fflush(stdout);
#endif
    return;
}
static gchar *
get_fullpath(const gchar *path, struct dirent *d, struct stat *st){
    if(strcmp(d->d_name, ".")==0)  return NULL; //skip_msg(path, d->d_name, NULL);
    if(strcmp(d->d_name, "..")==0) return NULL; //skip_msg(path, d->d_name, NULL);
    if(strstr(d->d_name, ".dbh"))  return NULL; //skip_msg(path, d->d_name, NULL);
    if(strstr(d->d_name, ".dbf"))  return NULL; //skip_msg(path, d->d_name, NULL);
    gchar *fullpath=g_build_filename(path,d->d_name,NULL);
    if (LSTAT(fullpath,st)<0 ){
        g_free(fullpath); 
        return skip_msg(path, d->d_name, "cannot stat");
    }
    if (st->st_size == 0) {
        g_free(fullpath); 
        return skip_msg(path, d->d_name, NULL);
        return skip_msg(path, d->d_name, "st->st_size == 0");
    }
    // Let's put a 0.2 MB limit for recordsize in the test. 
    if (st->st_size > 200000LL) { 
        g_free(fullpath); 

        return skip_msg(path, d->d_name, NULL);
//        return skip_msg(path, d->d_name, "file is too big (this is arbitrary)");
    }
    if (!S_ISREG(st->st_mode) && !S_ISDIR(st->st_mode)) {
        g_free(fullpath); 
        return NULL; //skip_msg(path, d->d_name, "non regular file");
    }
    return fullpath;
}
//////////////////////////////////////////////////////////////////////


static int 
check_filesystem(const char *path, size_t limit, size_t *records_p, size_t *size_p, size_t *key_storage_p)
{
    DIR *directory; 
    struct dirent *d;

    errno=0;
    directory = opendir(path);
    if(!directory) {
	fprintf(stderr,"Cannot open %s (%s)\n" ,path, strerror(errno));
	return -1;
    }
#define     _BSD_SOURCE 1

while((d = readdir(directory)) != NULL)
    {

	gboolean is_dir=FALSE;
        struct stat st;
        
        gchar *fullpath=get_fullpath(path, d, &st);
        if (!fullpath) continue;

	if (S_ISDIR(st.st_mode)) is_dir=TRUE;

        if (is_dir) {
             if (strcmp(fullpath, SKIP_DIR)){
		int retval=0;
		retval = check_filesystem(fullpath, limit, records_p, size_p, key_storage_p);
		if (retval < 0) return -1;
             }
	} else if (st.st_size <= limit){
            (*records_p)++;
            (*size_p) += st.st_size;
            (*key_storage_p) += (strlen(fullpath));
        }


	g_free(fullpath);
    }
    closedir(directory);
    return (1);
}

//////////////////////////////////////////////////////////////////////

static int 
qread_filesystem(DBHashTable *dbh, DBHashTable *index, const char *path, dump_t *dump_p)
{
    DIR *directory; 
    int count = 0;
    struct dirent *d;
    

    directory = opendir(path);
    if(!directory) {
	fprintf(stderr,"Cannot open %s\n" ,path);
	return -1;
    }

    // We add 1 to q_number to save the null terminating char.
    unsigned char q_number[DBH_KEYLENGTH(dbh)+1];
    unsigned char q_key[DBH_KEYLENGTH(index)+1];
    //fprintf(stderr, "keylength=%d\n", DBH_KEYLENGTH(index));

#define     _BSD_SOURCE 1
while((d = readdir(directory)) != NULL)
    {
        // allocate fullpath...
        struct stat st;
        
        gchar *fullpath=get_fullpath(path, d, &st);
        if (!fullpath) continue;
                
        if (S_ISDIR(st.st_mode)) {
             // skip value
             if (strcmp(fullpath, SKIP_DIR)){
                int retval;
		retval = qread_filesystem(dbh, index, fullpath, dump_p);
		if (retval > 0) count += retval;
             }
	} else { // not a dir.
             // This is useful if our data size grows over 1024 B:
             if (DBH_MAXIMUM_RECORD_SIZE(dbh) < st.st_size) {
                 dbh_set_size(dbh,st.st_size);
                 fprintf(stderr, "dbh_set_size set to %lld (%s)\n",(long long)st.st_size, fullpath);
             }
             // This is binary mode in unix. 
             int fd=open(fullpath,O_RDONLY|O_BINARY);
             if (fd < 0) {
                fprintf(stderr, "cannot open %s for read\n",fullpath);
                g_free(fullpath); continue;
             }
             // This works instead of dbh_set_data():
             // read data directly into DBH_DATA(dbh)
             dbh_set_recordsize(dbh,st.st_size);
             if (read(fd,DBH_DATA(dbh),st.st_size) < 0){
                fprintf(stderr, "problem reading %lld bytes from %s\n",
                        (long long)st.st_size,fullpath);
                close(fd);
                g_free(fullpath); continue;
             }
             close(fd);
             
             // index...
             // set the index key
             memset(q_key, 0, DBH_KEYLENGTH(index)+1);
             strncpy((gchar *)q_key, fullpath, DBH_KEYLENGTH(index));
             dbh_set_key(index, q_key);
             // set the index data
             memset(q_number, 0, DBH_KEYLENGTH(dbh)+1);
             dbh_genkey(q_number, DBH_KEYLENGTH(dbh), dump_p->natural++);
             gint recordsize = strlen((gchar *)q_number)+1;
             //fprintf(stderr,"Recordsize: %d\n", recordsize);
             dbh_set_recordsize(index, recordsize);
             dbh_set_data(index, q_number, recordsize);
             // update the index        
             //fprintf(stderr, "key: %s, data %s\n", (gchar *)DBH_KEY(index), (gchar *)DBH_DATA(index));
             dbh_update(index);
             //fprintf(stderr, "index q_number:\"%s\" data:\"%s\"\n", q_number, (char *)DBH_DATA(index));
            
             // table...
             // use the q number as the access key
             dbh_set_key(dbh, q_number);
             dbh_update(dbh);
             dump_p->sum += st.st_size;
             count++;
             //fprintf(stdout,"%s: adding %ld bytes of data to table\n",fullpath, st.st_size);
	 //
	}
 
	g_free(fullpath);
    }
    closedir(directory);
//	printf ("%s -> %d files\n",path,count);
    return (count);
}

static int 
read_filesystem(DBHashTable *dbh, DBHashTable *collisions, const char *path, dump_t *dump_p)
{
    DIR *directory; 
    int count = 0;
    struct dirent *d;

    directory = opendir(path);
    if(!directory) {
	fprintf(stderr,"Cannot open %s\n" ,path);
	return -1;
    }
#define     _BSD_SOURCE 1
while((d = readdir(directory)) != NULL)
    {

	gboolean is_dir=FALSE;
	unsigned char bucket='A';
        struct stat st;
        
        gchar *fullpath=get_fullpath(path, d, &st);
        if (!fullpath) continue;


	// while hash key is already used, keep moving to next bucket.
	gchar  *key=NULL;
        while (1) {
	    key=get_hash_key(bucket,fullpath);
            dbh_set_key (dbh,(unsigned char *)key);
            if (!dbh_load(dbh)) break;
	    fprintf(stderr, "HASH colision: %s -> %s\n", key,fullpath);
	    bucket++;
            g_free(key);
	} 
        g_free(key);
        


	
        // If we are beyond first bucket, a collision has occured.
        if (bucket > 'A'){
	    char collision_key[255];
	    memset(collision_key,0,255);
	    strncpy(collision_key,fullpath, 254);

	    dbh_set_key (collisions,(unsigned char *)collision_key);
	    
            dbh_set_size(collisions,DBH_KEYLENGTH(dbh));
	    dbh_set_data(collisions,(void *)DBH_KEY(dbh),DBH_KEYLENGTH(dbh));
	    dbh_update(collisions);
	} 



	if (S_ISDIR(st.st_mode)) is_dir=TRUE;

        if (!is_dir) {
	 // This is useful if our data size grows over 1024 B:
	 if (DBH_MAXIMUM_RECORD_SIZE(dbh) < st.st_size) {
	     dbh_set_size(dbh,st.st_size);
	     fprintf(stderr, "dbh_set_size set to %lld\n",(long long)st.st_size);
	 }
	 int fd=open(fullpath,O_RDONLY);
	 if (fd < 0) {
	    fprintf(stderr, "cannot open %s for read\n",fullpath);
	    g_free(fullpath); continue;
	 }
	 // This works instead of dbh_set_data():
	 if (read(fd,DBH_DATA(dbh),st.st_size) < 0){
	    fprintf(stderr, "problem reading %lld bytes from %s\n",
		    (long long)st.st_size,fullpath);
	    close(fd);
	    g_free(fullpath); continue;
	 }
	 close(fd);
	 dbh_set_recordsize(dbh,st.st_size);
	 dbh_update(dbh);
	 dump_p->sum += st.st_size;
	 count++;
         //fprintf(stderr,"%s\n",fullpath);
	 //
	}
        if (is_dir) {
             if (strcmp(fullpath, SKIP_DIR)){
		int retval;
		retval = read_filesystem(dbh,collisions, fullpath, dump_p);
		if (retval > 0) count += retval;
             }
	}

	g_free(fullpath);
    }
    closedir(directory);
//	printf ("%s -> %d files\n",path,count);
    return (count);
}


// Keep these variables global so that recursion won't gobble up all memory...
    size_t data_size = 1024;
    void *data_ptr = NULL;

static int 
gread_filesystem(GDBM_FILE dbf, GDBM_FILE collisions, const char *path, dump_t *dump_p)
{
    DIR *directory; 
    int count = 0;
    struct dirent *d;

    directory = opendir(path);
    if(!directory) {
	fprintf(stderr,"Cannot open %s\n" ,path);
	return -1;
    }
    if (!data_ptr) data_ptr = (void *)malloc(data_size);
    if (!data_ptr) g_error("should not happen, malloc 1024\n");
	
    gchar  *key_s=NULL;
#define     _BSD_SOURCE 1
while((d = readdir(directory)) != NULL)
    {

	gboolean is_dir=FALSE;
	unsigned char bucket='A';
        // allocate fullpath...
        struct stat st;
        
        gchar *fullpath=get_fullpath(path, d, &st);
        if (!fullpath) continue;
                

	// while hash key is already used, keep moving to next bucket.
        datum key;
        datum content;

        g_free(key_s);
        while (1) {
	    key_s=get_hash_key(bucket,fullpath);
            // Set key
            key.dptr = key_s;
            key.dsize = 11;
            // Does the key collide?
            if (!gdbm_exists(dbf, key)) break;
	    fprintf(stderr, "HASH colision: %s -> %s\n", key_s, fullpath);
	    bucket++;
            g_free(key_s);
	} 

        // up above and on eoc: g_free(key_s);
        


	
        // If we are beyond first bucket, a collision has occured.
        if (bucket > 'A'){
	    char collision_key[255];
	    memset(collision_key,0,255);
	    strncpy(collision_key,fullpath, 254);

            // set the collision key:
            key.dptr = collision_key;
            key.dsize = 254; // XXX we could probably do better here, using variable key size...

            // set the data:
            content.dptr = key_s;
            content.dsize = 11;

            // Update the record
            gdbm_store(collisions, key, content, GDBM_INSERT);

	} 



	if (S_ISDIR(st.st_mode)) is_dir=TRUE;

        if (!is_dir) {

	 // This is useful if our data size grows over 1024 B:
	 if (data_size < st.st_size) {
             g_free(data_ptr);
             data_size = st.st_size;
             data_ptr = (void *)malloc(data_size);
             if (!data_ptr){
                 fprintf(stderr, "Cannot allocate %lld bytes for gdbm data_ptr. Terminating now...\n", (long long)data_size);
                 exit(1);
             }
	     fprintf(stderr, "gdbm size set to %lld\n",(long long)st.st_size);
	 }
	 int fd=open(fullpath,O_RDONLY);
	 if (fd < 0) {
	    fprintf(stderr, "cannot open %s for read\n",fullpath);
	    g_free(fullpath); continue;
	 }
	 // This works instead of dbh_set_data():
	 if (read(fd, data_ptr, st.st_size) < 0){
	    fprintf(stderr, "problem reading %lld bytes from %s\n",
		    (long long)st.st_size,fullpath);
	    close(fd);
	    g_free(fullpath); continue;
	 }
	 close(fd);

         // Set key
         key.dptr = key_s;
         key.dsize = 11;

         // Set data
         content.dptr = data_ptr;
         content.dsize = st.st_size;

         // update data
         gdbm_store (dbf, key, content, GDBM_INSERT);
	 
         dump_p->sum += st.st_size;
	 count++;
         //fprintf(stderr,"%s\n",fullpath);
	 //
	}
        if (is_dir) {
             if (strcmp(fullpath, SKIP_DIR)){
		int retval;
		retval = gread_filesystem(dbf,collisions, fullpath, dump_p);
		if (retval > 0) count += retval;
             }
	}

	g_free(fullpath);
    }
    closedir(directory);
    // free final leftover 
    g_free(key_s);
    
//	printf ("%s -> %d files\n",path,count);
    return (count);
}

#if 0

static void  operate (DBHashTable *dbh){
    dump_t *dump_p = dbh->user_data;
    dump_p->count++;
    //sum += strlen((char *)DBH_DATA(dbh));
    dump_p->sum += DBH_RECORD_SIZE(dbh);
}

static DBHashTable *dbh_key;
static void  compare (DBHashTable *dbh){
    dbh_set_key (dbh_key,(unsigned char *)DBH_KEY(dbh));
    dbh_load(dbh_key);
    char *path=DBH_DATA(dbh_key);
    int fd=open(path,O_RDONLY);
	 if (fd < 0) {
	    printf("cannot open %s for read\n",path);
	    return;
	 }
	 // This works instead of dbh_set_data():
    struct stat st;
    LSTAT(path,&st);
    void *p=malloc(st.st_size);
     if (p == NULL) {
	 fprintf(stderr, "malloc: %s\n", strerror(errno));
	exit(1);
     }     
	 if (read(fd,p,st.st_size) < 0){
	    printf("problem reading %lld bytes from %s\n",
		    (long long)st.st_size,path);
	    close(fd);
	    free(p);
	    return;
	 }
	 close(fd);
    if (memcmp(p,DBH_DATA(dbh),st.st_size) != 0) {
	printf("%s does not compare!\n",path);
    } else {
	static int count=0;
	if (count++ % 1000 == 0) {
	    printf ("."); fflush(stdout);
	}
    }
    free(p);
}
#endif

#if 0
static int
dump(dump_t *dump_p) {
    //char **argv, int which, int original_count, long long original_sum){
    dump_p->count=0;   dump_p->sum=0;
    const char *text;
    if (dump_p->which) text = "Sweep"; else text = "Fanout";
    fprintf(stdout,"%s is now being performed by pid %d\n", text, getpid());
    // PARALLEL SAFE need not be specified on READ_ONLY
    DBHashTable *dbh=dbh_new(TABLE, NULL, DBH_READ_ONLY);
    dbh->user_data = dump_p;
    if (dump_p->which) dbh_foreach_sweep (dbh,operate);
    else dbh_foreach_fanout (dbh,operate);
    dbh_close(dbh);
    /*
    if (strcmp(dump_p->argv[1],"fulltest")==0) {
	if (dump_p->sum != dump_p->original_sum){
	  //g_warning("Original sum does not match %s sum (%I64d != %I64d)\nTest FAILED.\n",
	  g_warning("Original sum does not match %s sum (%lld != %lld)\nTest FAILED.\n",
		text, dump_p->original_sum, dump_p->sum);
	  exit(1);
	} 
	if (dump_p->count != dump_p->original_count){
	    g_warning("Original count does not match %s count (%d != %d)\nTest FAILED.\n",
		text, dump_p->original_count, dump_p->count);
	  exit(1);
	}
    }*/
    fprintf(stdout,
"  Sweep data:\n"\
"    Items in the DBH table (filesystem count) = %d\n"\
"    Sum of data items size saved in DBH table = %lld\n",
	    dump_p->count, dump_p->sum);
    /*
    if (strcmp(dump_p->argv[2],"fulltest")==0) {
	fprintf(stderr, "Test %s PASSED\n", text);
    }*/
    return 1;
}
#endif


#if 0
static void
check_files(void){
    if (!g_file_test(TABLE, G_FILE_TEST_EXISTS)){
      g_warning("Index file %s has not yet been created\n", 
	      TABLE);
      exit(1);
    }
    if (!g_file_test(COLLISIONS, G_FILE_TEST_EXISTS)){
      g_warning("DBH table %s has not yet been created\n", 
	      COLLISIONS);
      exit(1);
    }
}
#endif

 void  rebuild (DBHashTable *dbh_thread){
    DBHashTable *rebuilt_dbh = dbh_thread->user_data;
    // Adquire mutex.
    dbh_mutex_lock(rebuilt_dbh);

    // Copy key and data to rebuilt_dbh
    dbh_set_key(rebuilt_dbh, DBH_KEY(dbh_thread));
    dbh_set_recordsize (rebuilt_dbh, DBH_RECORD_SIZE(dbh_thread));
    dbh_set_data(rebuilt_dbh, DBH_DATA(dbh_thread), DBH_RECORD_SIZE(dbh_thread));
    // Write to rebuilt dbh
    dbh_update(rebuilt_dbh);
    // Release mutex
    dbh_mutex_unlock(rebuilt_dbh);
    return;
}

long long checksum=0;
GSList *random_list = NULL;
GSList *random_numlist = NULL;
GSList *random_qlist = NULL;
GSList *random_glist = NULL;
gint random_count=0;
void get_random_list(DBHashTable *dbh, void *data){
    // flip a coin...
    //static struct drand48_data rand_buffer;
    static unsigned int s=7;
    if (random_list == NULL){
        srand48(time(NULL));
    }
    double coin = drand48();
    //fprintf(stderr, "coin 1= %4.2lf\n", coin);
 //   if (coin < 0.5) return;
    
    coin = drand48();
    //fprintf(stderr, "coin 2= %4.2lf\n", coin);
    const gchar *path = (gchar *)DBH_KEY(dbh);
    if (coin > 0.5){
        random_list = g_slist_prepend(random_list, g_strdup(path));
        random_numlist = g_slist_prepend(random_numlist, GINT_TO_POINTER(++random_count));
    }
    else {
        random_list = g_slist_append(random_list, g_strdup(path));
        random_numlist = g_slist_append(random_numlist, GINT_TO_POINTER(++random_count));
    }
    //fprintf(stderr, "random key in list: \"%s\"\n", path);
    checksum += s;
    s++;
}
#if 0
void dump_qindex(DBHashTable *xdbh){
    
    fprintf(stderr, "DUMP key: %s, data: %s\n", (gchar *)DBH_KEY(xdbh), (gchar *)DBH_DATA(xdbh));
    return;
}
#endif


static int score(char **argv){
        struct stat st;
        if (stat(argv[2], &st) < 0 || !S_ISDIR(st.st_mode)){
            fprintf(stderr, "%s is not a directory.\n", argv[1]);
        }
        size_t records=0, size=0, limit=2000000, key_storage=0;
        fprintf(stderr, "Checking %s for table creation with files <= %ld Mbytes...\n", argv[2], (long)(limit/1000000));
        check_filesystem(argv[2], limit, &records, &size, &key_storage);
        fprintf(stderr, "%s has %ld records for a total of %ld Mb\n", argv[2], (long)records, (long)(size/1000000));
        fprintf(stderr, "full variable key storage=%ld KB versus fixed size = %ld KB \n",(long)key_storage/1000, (long)(256*records/1000));
        return (1);
}

static int
create_gdbm_table(const gchar *path){
    time_t gdbm_creation_time;
    time_t start = time(NULL);

    dump_t dump_v;
    memset(&dump_v, 0, sizeof(dump_t));

    fprintf(stderr, "///////////////////  GDBM g_hash key table generation /////////////////\n");
    fprintf(stderr,"Creating index now, process 0x%x recursively reading %s\n", getpid(), path);
    // This table is the bucket index file. The index file is also
    // the data table.
    //
    GDBM_FILE dbf = gdbm_open(GTABLE, 0, GDBM_NEWDB, 0770, NULL);
    // This table handles collisions. If a path is indexed here (first 254
    // bytes of the string), then the data element is the actual hash table
    // key. This avoids a collision with a path that has already been indexed.
    GDBM_FILE collisions = gdbm_open(GCOLLISIONS, 0, GDBM_NEWDB, 0770, NULL);
    // Read the filesystem data into the DBH table.
    dump_v.sum = 0;
    dump_v.original_count=gread_filesystem(dbf, collisions, path, &dump_v);
    dump_v.original_sum = dump_v.sum;
    gdbm_close(dbf);
    gdbm_close(collisions);
    gdbm_creation_time = time(NULL) - start;
    fprintf(stderr,
"  Index created:\n"\
"    Items in the GDBM table (filesystem count) = %d\n"\
"    Sum of data items size saved in GDBM table = %lld\ntime = %lld s.",
	    dump_v.original_count, dump_v.original_sum,
            (long long) gdbm_creation_time);

   fprintf(stderr, "gdbm creation time = %lld s.\n", (long long)gdbm_creation_time);
   return (1);

}

  // This DBH uses more than one bucket in order to handle hashtable
  // key collisions.
static int
create_dbh_table(gchar *path){

    dump_t dump_v;
    memset(&dump_v, 0, sizeof(dump_t));
    time_t dbh_creation_time;
    time_t start = time(NULL);
    fprintf(stderr, "///////////////////  DBH g_hash key table generation /////////////////\n");
    fprintf(stderr,"Creating index now, process 0x%x recursively reading %s\n", getpid(), path);
    // This table is the bucket index file. The index file is also
    // the data table.
    //
    // Our key length here is one extra byte to handle bucket id for collisions.
    unsigned char key_length = 11;
    DBHashTable *dbh = dbh_new(TABLE, &key_length, DBH_CREATE);
    // This table handles collisions. If a path is indexed here (first 254
    // bytes of the string), then the data element is the actual hash table
    // key. This avoids a collision with a path that has already been indexed.
    key_length = 254;
    DBHashTable *collisions = dbh_new(COLLISIONS, &key_length, DBH_CREATE);
    // Read the filesystem data into the DBH table.
    dump_v.sum = 0;
    dump_v.original_count=read_filesystem(dbh, collisions, path, &dump_v);
    dump_v.original_sum = dump_v.sum;
    dbh_close(dbh);
    dbh_close(collisions);
    fprintf(stderr,
"  Index created:\n"\
"    Items in the DBH table (filesystem count) = %d\n"\
"    Sum of data items size saved in DBH table = %lld\n",
	    dump_v.original_count, dump_v.original_sum);
    dbh_creation_time = time(NULL) - start;
    fprintf(stderr, "dbh creation time = %lld s.\n", (long long)dbh_creation_time);
    return 1;
  
}


// quantified key index.
static int
create_qdbh_table(gchar *path)   {
    dump_t dump_v;
    memset(&dump_v, 0, sizeof(dump_t));
    time_t q_creation_time;
    time_t start = time(NULL);
    fprintf(stderr, "///////////////////  DBH quantified key table generation ///////////////////////\n");
    fprintf(stderr,"Creating qindex now, process 0x%x recursively reading %s\n", getpid(), path);
    // This table is the bucket index file. The index file is also
    // the data table.
    unsigned char key_length = 10;
    DBHashTable *dbh = dbh_new(QTABLE, &key_length, DBH_CREATE);
    key_length = 254;
    DBHashTable *index = dbh_new(QINDEX, &key_length, DBH_CREATE);
    // Read the filesystem data into the DBH table.
    dump_v.natural = 1;
    dump_v.sum = 0;
    dump_v.original_count=qread_filesystem(dbh,index, path,&dump_v);
    dump_v.original_sum = dump_v.sum;
    dbh_close(dbh);
    dbh_close(index);
    fprintf(stderr,
"  Q Index created: (%lld records, %lld data, %lld erased, %lld format)\n"\
"  Q Table created: (%lld records, %lld data, %lld erased, %lld format)\n"\
"    Items in the DBH table (filesystem count) = %d\n"\
"    Sum of data items size saved in DBH table = %lld\n",
     DBH_RECORDS(index), DBH_DATA_SPACE(index), DBH_ERASED_SPACE(index), DBH_FORMAT_SPACE(index),
     DBH_RECORDS(dbh), DBH_DATA_SPACE(dbh), DBH_ERASED_SPACE(dbh), DBH_FORMAT_SPACE(dbh),
	    dump_v.original_count, dump_v.original_sum);
    q_creation_time = time(NULL) - start;
    //index=dbh_new(QINDEX,&key_length, DBH_READ_ONLY);
    //dbh_foreach_sweep(index, dump_qindex);
    //dbh_close(index);
    fprintf(stderr, "qdbh creation time = %lld s.\n", (long long)q_creation_time);
    return 1;
  }

static int
mkdir_output(void){
    if (g_mkdir_with_parents(DIRECTORY, 0770) < 0){
	if (!g_file_test(DIRECTORY, G_FILE_TEST_IS_DIR)){
	    g_warning("mkdir(%s): %s\n", DIRECTORY, strerror(errno));
	    exit(1);
	}
    }
    if (!g_file_test(DIRECTORY, G_FILE_TEST_IS_DIR)){
	g_warning("Failed test: g_file_test(%s, G_FILE_TEST_IS_DIR)\n",
		DIRECTORY );
	exit(1);
    }
    return 1;
}

static gint load_grow_dbh(DBHashTable *in_table, const gchar *key){
    DBHashTable *table;
    if (in_table)table = in_table;
    else table = dbh_new("grow.dbh", NULL, DBH_READ_ONLY);

    dbh_set_key(table, (unsigned char *)key);
    int retval = 0;
    if (!dbh_load(table)) {
      fprintf(stderr, "cannot load table key  \"%s\"\n", key);
      retval=-1;
    }
    
    if (in_table==NULL) dbh_close(table);
    return retval;
}
static gint load_grow_dbf(GDBM_FILE in_gtable, const gchar *inkey){
    gint retval = 0;
      GDBM_FILE gtable;
      if (in_gtable) gtable = in_gtable;
      else gtable = gdbm_open("grow.dbf", 0, GDBM_READER, 0770, NULL);
           
      datum key;
      


      int ik = atoi(inkey);
        key.dptr = (void *)&ik;
        key.dsize = sizeof(int);

      datum record = gdbm_fetch(gtable, key);
      if (!record.dptr) {
          fprintf(stderr, "cannot load gdbm table key  \"%s\"\n", inkey);
          retval=-1;
      }
      g_free(record.dptr);
      if (in_gtable==NULL) gdbm_close(gtable);
      return retval;
}

static gint load_dbh_item(const gchar *item){
    gint retval = 0;
    gchar collision_key[256];
    DBHashTable *collisions = dbh_new(COLLISIONS, NULL, DBH_READ_ONLY);
    DBHashTable *table = dbh_new(TABLE, NULL, DBH_READ_ONLY);

    memset(collision_key, 0, DBH_KEYLENGTH(collisions)+1);
    memcpy(collision_key, item, strlen(item));
    dbh_set_key(collisions, (unsigned char *)collision_key);
    if (!dbh_load(collisions)){
      // get regular hash key in 'A' bucket.
      gchar *k=get_hash_key('A', item);
      dbh_set_key(table, (unsigned char *) k);
      g_free(k);
    } else {
      dbh_set_key(table, DBH_DATA(collisions));
    }
    if (!dbh_load(table)) {
      fprintf(stderr, "cannot load table key  \"%s\"\n", item);
      retval=-1;
    }
    
    dbh_close(table);
    dbh_close(collisions);
      return retval;
}

static gint load_qdbh_item(const gchar *item){
    gint retval = 0;
      DBHashTable *table = dbh_new(QTABLE, NULL, DBH_READ_ONLY);
      DBHashTable *index = dbh_new(QINDEX, NULL, DBH_READ_ONLY);
    //fprintf(stderr, "keylength=%d/%d\n", DBH_KEYLENGTH(index),key_length);
      
      gchar index_key[DBH_KEYLENGTH(index)];
          
      memset(index_key, 0, DBH_KEYLENGTH(index));
      memcpy(index_key, item, strlen(item));
      dbh_set_key(index, (unsigned char *)index_key);
      if (!dbh_load(index)){
          fprintf(stderr, "Cannot load index key %s\n", item);
          retval=-1;
      } else {
          dbh_set_key(table, DBH_DATA(index));
          if (!dbh_load(table)) fprintf(stderr, "cannot load qtable key \"%s\"\n", item);
      }
      
      dbh_close(index);
      dbh_close(table);
      return retval;
}

static gint load_gdbm_item(const gchar *item){
    gint retval = 0;
      GDBM_FILE gcollisions = gdbm_open(GCOLLISIONS, 0, GDBM_READER, 0770, NULL);
      GDBM_FILE gtable = gdbm_open(GTABLE, 0, GDBM_READER, 0770, NULL);
          
      gchar collision_key[256];
      gint c_key_length=11;
      memset(collision_key, 0, c_key_length+1);
      memcpy(collision_key, item, strlen(item));
      datum gkey;
          
      gkey.dptr = collision_key;
      gkey.dsize = c_key_length;
          
      datum record = gdbm_fetch(gcollisions, gkey);
      if (record.dptr == NULL){
          // get regular hash key in 'A' bucket.
          gchar *k=get_hash_key('A', item);
          gkey.dptr = g_strdup(k);
          gkey.dsize = 11;
          g_free(k);
      } else {
          gkey.dptr = record.dptr;
          gkey.dsize = 11;
      }
      record = gdbm_fetch(gtable, gkey);
      if (!record.dptr) {
          fprintf(stderr, "cannot load gdbm table key  \"%s\"\n", item);
          retval=-1;
      }
      g_free(record.dptr);
      g_free(gkey.dptr);
      gdbm_close(gcollisions);
      gdbm_close(gtable);
      return retval;
}

#if 0
typedef struct grow_t {
        DBHashTable *dbh;
        long records;
        long size;
        time_t start;
} grow_t;
static void grow_f(DBHashTable *dbh, void *data){
    char junk[256];
    grow_t *grow_p = data;
    //const gchar *path = (gchar *)DBH_KEY(dbh);
    double r = 1.0 * rand() /RAND_MAX;
    int size = r * 255;
    dbh_set_data(grow_p->dbh, junk, size);
    grow_p->size += size;
    grow_p->records++;
   // dbh_genkey (DBH_KEY(grow_p->dbh), DBH_KEYLENGTH(grow_p->dbh), grow_p->records);
                                                        
   // dbh_update(grow_p->dbh);
    if (grow_p->records % 1000 == 0) {
        fprintf(stderr, "%ld records done, size=%ld (%ld s)\n",
                grow_p->records, grow_p->size, (long)(time(NULL) - grow_p->start));
    }
    
}
#endif



#define KEY_LEN 10
#define V_PER_THREAD 10000
#define MAX_KEYS 25*1000*1000
#define MAX_THREADS 5
#define JUNK_LENGTH 64


typedef struct r2_t {
    time_t  start;
    gint  index;
    gint process;
    unsigned char key[V_PER_THREAD][KEY_LEN+1];
    int size[V_PER_THREAD];
}r2_t;

void *w_grow_f(void *data){
    r2_t *r2_p = data;
//    fprintf(stderr, "process=%d index=%d\n", r2_p->process, r2_p->index);
//return data;
    gint i;
    unsigned char key[KEY_LEN+1];
    memset(key,0,KEY_LEN+1);
    for (i=0; i <  V_PER_THREAD; i++){
        dbh_genkey(key, KEY_LEN, i + r2_p->index);
        //strcpy(r2_p->key+i,key);
        memcpy(r2_p->key+i,key, KEY_LEN+1);
	double r = 1.0 * rand() /RAND_MAX;
        r2_p->size[i] = r * JUNK_LENGTH;
        if (!r2_p->size[i]) r2_p->size[i]=5;

    }

    int quantum=0; 
    unsigned char *p = key;
    for (;p && *p; p++) quantum += (*p-'0');
    i--;
    fprintf(stderr, "[%d] %d:%.2lfM: %s [%s](%ld s.)\n", r2_p->process, quantum, 1.0*(i+r2_p->index)/1000000.01, 
                key, (char *)(r2_p->key+(V_PER_THREAD-1)),
                (long)(time(NULL) - r2_p->start));
    
    return data;
}

void grow_write_f(void *process_data, void *data){
    void **arg = data;
    char junk[JUNK_LENGTH];
    r2_t *r2_p = process_data;
    DBHashTable *dbh = arg[0];
    GDBM_FILE dbf = arg[1];
    int k;
    for (k=0; k<V_PER_THREAD; k++){
        dbh_set_key(dbh, (unsigned char *)(r2_p->key+k));
        dbh_set_data(dbh, junk, r2_p->size[k]);
        dbh_update(dbh);
        datum key;
        datum content;
	int ik = k+r2_p->index;
        key.dptr = (void *)&ik;
        key.dsize = sizeof(int);
	content.dptr = junk;
	content.dsize = r2_p->size[k];
        gdbm_store(dbf, key, content, GDBM_INSERT);

    }
    g_free(process_data);
}

static void grow(void){
    gint max_threads = MAX_THREADS;
    gint current_threads = 0;
    pthread_t thread_id[max_threads];
    r2_t r2_v[max_threads];

    gint index = 0;
    time_t start = time(NULL);

    for (; current_threads < max_threads; current_threads++){
        if (!index) index = 1;
        else index += V_PER_THREAD;
        r2_v[current_threads].process = current_threads+1;
        r2_v[current_threads].index = index;
        r2_v[current_threads].start = start;

        pthread_create(thread_id+current_threads, NULL, 
                w_grow_f, (void *)(r2_v+current_threads));
    }

    unsigned char keylength = KEY_LEN;
    DBHashTable *dbh = 
        dbh_new("/home/edscott/testfiles/grow.dbh", &keylength, DBH_CREATE);
    GDBM_FILE dbf = gdbm_open("/home/edscott/testfiles/grow.dbf", 0, GDBM_NEWDB|GDBM_SYNC, 0770, NULL);
    void *arg[]={dbh, dbf};
    gint i=0;
    GThreadPool *writepool = g_thread_pool_new(grow_write_f, arg, 1, TRUE, NULL);
    while (1){
        void *return_data;
        if (thread_id[i] && pthread_tryjoin_np(thread_id[i], &return_data)==0){
            fprintf(stderr, "joined thread [%d], now writing\n", i);
            // process data, threadpool
	    r2_t *thread_r2_p = malloc(sizeof(r2_t));
	    if (!thread_r2_p) g_error("Cannot malloc thread_r2_p: %s\n", strerror(errno));
	    memcpy(thread_r2_p, return_data, sizeof(r2_t));

            g_thread_pool_push (writepool, thread_r2_p, NULL);
            r2_t *r2_p = return_data;
            // start new thread
            if (index < MAX_KEYS){
                index += V_PER_THREAD;
                r2_p->index = index;
                pthread_create(thread_id+i, NULL, w_grow_f, (void *)(r2_p));
            } else {
                fprintf(stderr, "Not starting new thread, limit reached: %d\n", index);
                thread_id[i] = 0;
                if (--current_threads == 0) {
                    fprintf(stderr, "All threads are done.\n");
                    break;
                }
            }
        }
        if (++i >= max_threads){
            sleep(1);
            i=0;
        }
    }
    g_thread_pool_free(writepool, FALSE, TRUE);
    fprintf(stderr, "Threadpool write is complete %ld s. for %d records\n",
            (long)(time(NULL)-start), index);                                                   
    fprintf(stderr, "Threadpool write is complete.\n");                                                   
    dbh_close(dbh);
    gdbm_close(dbf);
}



#if 0
    grow_t grow_v;
    memset(&grow_v, 0, sizeof(grow_t));
    grow_v.start=time(NULL);
    // Go though q-index
    unsigned char keylength;
    DBHashTable *index = dbh_new(QINDEX, &keylength, 0);
   // unsigned char keylength = DBH_KEYLENGTH(index);
    grow_v.dbh = dbh_new("/home/edscott/testfiles/grow.dbh", &keylength, DBH_CREATE);

    fprintf(stderr, "growing file\n");
    dbh_foreach(index, grow_f, &grow_v);
    fprintf(stderr, "grow done: %ld seconds.\n",(long)(time(NULL) - grow_v.start));
    dbh_close(index);
    dbh_close(grow_v.dbh);
#endif
/*
static void grow_test(){
    time_t dbh_time=0;
    time_t dbf_time=0;
    DBHashTable *dbh = 
        dbh_new("/home/edscott/testfiles/grow.dbh", &keylength, DBH_READONLY);
    GDBM_FILE dbf = 
        gdbm_open("/home/edscott/testfiles/grow.dbf", 0, 0, 0770, NULL);
    double r = (1.0)*rand()/RAND_MAX;
    r *= MAX_KEYS;
    int item = r;
    unsigned char key[KEY_LEN+1];
    memset(key,0,KEY_LEN+1);
    time_t start=time(NULL);
        dbh_genkey(key, KEY_LEN, item);
        dbh_set_key(dbh, (unsigned char *)(r2_p->key+k));
        dbh_load
        dbh_time += (time(NULL) - start); 

}
*/


int main(int argc, char **argv){

  if (argc < 2) {
   fprintf(stderr,"insufficient arguments (%d < 2), usage: %s option [path] (option)\n%s\n",
	   argc, argv[0], HELP);
   exit(1);
  }

    if (argc == 3 && strcmp(argv[1], "create")==0){
        if (argc < 3) {
            fprintf(stderr, "option %s requires a path\n", argv[1]);
            exit (1);
        }
        mkdir_output();
        // First, take a look at what we got to build a test datafile
        score(argv);
        // Create gdbm table
        create_gdbm_table(argv[2]);
        // Create DBH table
        create_dbh_table(argv[2]);
        // Create double DBH table
        create_qdbh_table(argv[2]);
        exit (0);

    }



  if (strcmp(argv[1],"regen")==0 ) {
    DBHashTable *dbh = dbh_new(QTABLE, NULL, 0);
    DBHashTable *index = dbh_new(QINDEX, NULL, 0);
    fprintf(stderr, "regen QINDEX\n");
    dbh_regen_sweep(&index);
    fprintf(stderr, "regen QTABLE\n");
    dbh_regen_sweep(&dbh);
    dbh_close(dbh);
    dbh_close(index);

    DBHashTable *table = dbh_new(TABLE, NULL, 0);
    DBHashTable *collisions = dbh_new(COLLISIONS, NULL, 0);
    fprintf(stderr, "regen COLLISIONS\n");
    dbh_regen_sweep(&collisions);
    fprintf(stderr, "regen TABLE\n");
    dbh_regen_sweep(&table);
    dbh_close(table);
    dbh_close(collisions);
    exit(0);
  }

  GSList *list = NULL;
  if (strcmp(argv[1],"random")==0 ) {
      fprintf(stderr, "preparing random list...\n");
      // prepare a random access list with 25% of data records.
      do {
          unsigned char key_length;
          random_count = 0;
          DBHashTable *random_src = dbh_new(QINDEX, &key_length, DBH_READ_ONLY);
          if (!random_src) {
              fprintf(stderr, "cannot create random list until tables are generated\n");
              fprintf(stderr, "%s found -> %d\n", QINDEX, g_file_test(QINDEX, G_FILE_TEST_EXISTS));
              exit(1);
          }

          dbh_foreach(random_src, get_random_list, NULL);
          fprintf(stderr, "random list has %d/%d items (randomness=%lld)\n", 
                  g_slist_length(random_list), (int)DBH_RECORDS(random_src)/3, checksum);
          if (g_slist_length(random_list) < (int)DBH_RECORDS(random_src)/3){
              // free list data...
              GSList *tlist = random_list;
              for (;tlist && tlist->data; tlist = tlist->next) g_free(tlist->data);
              g_slist_free(random_list);
              g_slist_free(random_numlist);
              random_list=NULL;
              random_numlist=NULL;
          } 
          dbh_close(random_src);
          checksum = 0;
      } while (random_list == NULL);
      fprintf(stderr,"Random list is ready. Now writing out...\n");
      FILE *outlist = fopen(RANDOM_LIST,"w");
      if (!outlist){
          fprintf(stderr, "cannot open %s for write\n", RANDOM_LIST);
          exit(1);
      }
      for (list=random_list; list && list->data; list = list->next){
        fprintf(outlist, "%s\n", (gchar *)list->data);
      }
      fclose(outlist);
      fprintf(stderr,"Random list done.\n");
      exit(0);
  }


  if (strstr(argv[1],"grow")) {
      if (strstr(argv[1],"growlist")){
         unsigned char key[KEY_LEN+1];
         memset(key,0,KEY_LEN+1);
         int i;
         time_t start = time(NULL);
         double r;
         for (i=0; i<MAX_KEYS/2; i++){
retry:
            r = (1.0)*rand()/RAND_MAX;
            r *= MAX_KEYS;
            int item = r;
            if (!item) goto retry;
            unsigned char key[KEY_LEN+1];
            memset(key,0,KEY_LEN+1);
            
            dbh_genkey(key, KEY_LEN, item);
            fprintf(stdout, "%d:%s\n", item, key);
            if (i%100 == 0) {
                double t =  (double)(time(NULL) - start)/60.0;
                fprintf(stderr, "records: %d in %lf minutes\n", i, t);
            }
         }
              
      } 
      if (strstr(argv[1],"growtest")) {
          fprintf(stderr, "doing growtest\n");
          if (!g_file_test("growlist.txt", G_FILE_TEST_EXISTS)){
              fprintf(stderr, "%s does not exist. Run random option first\n", "growlist.txt");
              exit(1);
          } else {
              FILE *inlist = fopen("growlist.txt","r");
              if (!inlist){
                  fprintf(stderr, "cannot open %s for read\n", "growlist.txt");
                  exit(1);
              }
              gchar buffer[300];
              fprintf(stderr, "Reading random list file...\n");
              gint count=1;
              while (fgets(buffer, 300, inlist) && !feof(inlist)){
                  if (strchr(buffer, '\n')) *strchr(buffer, '\n')=0;
                  random_list = g_slist_prepend(random_list, g_strdup(buffer));
                  count++;
                  //if (count > 50)break;
                  //if (count %10000 == 0) fprintf(stderr, "read %d records...\n", count);

              }
              fclose(inlist);
              fprintf(stderr, "Finished reading random list file (%d records).\n", count);
          }
          GSList *list = random_list;
          time_t start=time(NULL);
          // test dbh
          int count;
          DBHashTable *g_dbh=NULL;
          if (strstr(argv[1],"growtest2")) {
              g_dbh = dbh_new("grow.dbh", NULL, DBH_READ_ONLY);
          }
          for (list = random_list,count=1; list && list->data; list=list->next, count++){
              gchar *key = strchr((gchar *)(list->data),':');
              if (!key) {
                  g_warning("key is null for %s\n",(gchar *)(list->data)); 
                  continue;
              }
              key++;
              //fprintf(stderr,"key=%s data=%s\n", key, (gchar *)(list->data));
              load_grow_dbh(g_dbh, key);
              if (count % 10000 == 0){
                  fprintf(stdout, "%d  %ld\n", count, (long)(time(NULL)-start));
              }
               if (count % 1000 == 0){
                  fprintf(stderr, "dbh: %d records in  %ld seconds\n", count, (long)(time(NULL)-start));
              }
         }
          if (g_dbh) dbh_close(g_dbh);
          start=time(NULL);

          GDBM_FILE g_dbf=NULL;
          if (strstr(argv[1],"growtest2")) {
              g_dbf = gdbm_open("grow.dbf", 0, GDBM_READER, 0770, NULL);
          }
          for (list = random_list,count=1; list && list->data; list=list->next, count++){
              gchar *key = (gchar *)(list->data);
              *strchr(key,':') = 0;
              //fprintf(stderr,"key=%s data=%s\n", key, (gchar *)(list->data));
              load_grow_dbf(g_dbf,key);
              if (count % 10000 == 0){
                  fprintf(stdout, "%d  %ld\n", count, (long)(time(NULL)-start));
              }
              if (count % 1000 == 0){
                  fprintf(stderr, "dbf: %d records in  %ld seconds\n", count, (long)(time(NULL)-start));
              }
          }
          if (g_dbf) gdbm_close(g_dbf);

      }
              

     
      //else grow();
      exit(0);
  }


  if (!strstr(argv[1],"test")) {
      fprintf(stderr, "unknown option: %s\n", argv[1]);
      exit(1);
  }


   {


      if (!g_file_test(RANDOM_LIST, G_FILE_TEST_EXISTS)){
          fprintf(stderr, "%s does not exist. Run random option first\n", RANDOM_LIST);
          exit(1);
      } else {
          FILE *inlist = fopen(RANDOM_LIST,"r");
          if (!inlist){
              fprintf(stderr, "cannot open %s for read\n", RANDOM_LIST);
              exit(1);
          }
          gchar buffer[300];
          fprintf(stderr, "Reading random list file...\n");
          gint count=1;
          while (fgets(buffer, 300, inlist) && !feof(inlist)){
              if (strchr(buffer, '\n')) *strchr(buffer, '\n')=0;
              random_list = g_slist_prepend(random_list, g_strdup(buffer));
              count++;
              //if (count %10000 == 0) fprintf(stderr, "read %d records...\n", count);

          }
          fclose(inlist);
          fprintf(stderr, "Finished reading random list file (%d records).\n", count);
      }
  }


  dump_t dump_v;
  memset(&dump_v, 0, sizeof(dump_t));

  time_t start;


    fprintf(stderr, "Starting test...\n");
    //gchar *tests[]={"test-g","test-d","test-q",NULL};
    gchar **p;
    gchar **q;

    gchar *strings[256];
    memset(strings, 0, 256*sizeof(gchar **));
      
    srand((int)time(NULL));

    //for (p=tests; p && *p; p++)
    p=argv+1;
    fprintf(stdout, "# GNUplot output for %s: %s\n", 
        *p,
        strstr(*p,"test-g")?"GDBM":
        strstr(*p,"test-q")?"QDBH":
        strstr(*p,"test-d")?"DBH":"wtf");
    {
      fprintf(stderr, "Testing %s...\n", *p);
      out(0, 0);
      start=time(NULL);
      long t_items=0;
      gint i;
      gint item = RAND_MAX;
      long items;
      long top_items = 100000;
      //long top_items = 100;
      gint k;

      for (items=10, k=0; items <= top_items; items *= 10, k++) {
        if (!strings[k]) strings[k] = g_strdup_printf("%ld", items); 
        for(i=0; i< items; i++){
          while (item > g_slist_length(random_list) || !item) {
              double r = 1.0 * rand() / RAND_MAX * g_slist_length(random_list);
              item=r;
          }
          if (strstr(*p,"test-g"))
              load_gdbm_item((gchar *) ((g_slist_nth(random_list, item))->data));
          if (strstr(*p,"test-q"))
              load_qdbh_item((gchar *) ((g_slist_nth(random_list, item))->data));
          else 
              load_dbh_item((gchar *) ((g_slist_nth(random_list, item))->data));

          item=0;
        

        }
        fprintf(stderr, "%s table loaded %ld items in %ld seconds\n", 
        strstr(*p,"test-g")?"GDBM":
        strstr(*p,"test-q")?"QDBH":
        strstr(*p,"test-d")?"DBH":"wtf",
        items, (long)(time(NULL)-start));
        t_items += items;
        out(t_items, (long)(time(NULL)-start));

        gchar *g = g_strdup_printf("%s\t%ld", strings[k],(long)(time(NULL)-start));
        g_free(strings[k]);
        strings[k] = g;        
        
        start=time(NULL);

      }
    }
    //fprintf(stdout, "# GNUplot output: column content:\n# records gdbm(s) dbh(s) qdbh(s)  \n");
    for (q=strings; q && *q; q++){
      //    fprintf(stdout, "%s\n", *q);fflush(stdout);
    }


  
      
      exit(0);
   
   for (list = random_list; list && list->data; list = list->next) g_free(list->data);
   g_slist_free(random_list);

  exit(0);
}



////////////////////////////////////

#if 0 
  // Full or specific test follows. 
  check_files();
  // Dump test
  if (strcmp(argv[2],"dump")==0 || strcmp(argv[2],"fulltest")==0) {
      // Find out how many items and total size of data records
      // a sweep/fanout of DBH table will find
      int i; for(i=1; i>=0; i--) {dump_v.which = i; dump(&dump_v);}
  }

  // Regen tests
  if (strcmp(argv[2],"regen")==0 || strcmp(argv[2],"fulltest")==0) {
    fprintf(stderr, "///////////////////  Serial tests //////////////////////////\n");
    fprintf(stdout,"Performing regen_sweep now...\n");
    DBHashTable *dbh;
    dbh=dbh_new(TABLE, NULL, 0);
    dbh_regen_sweep(&dbh);
    dbh_close(dbh);
    // Find out how many items and total size of data records
    // a sweep of DBH table will find
    dump_v.which = 1;
    dump(&dump_v);
    fprintf(stdout,"Performing regen_fanout now...\n");
    dbh=dbh_new(TABLE, NULL, 0);
    dbh_regen_fanout(&dbh);
    dbh_close(dbh);
    // Find out how many items and total size of data records
    // a sweep of DBH table will find
    dump_v.which = 0;
    dump(&dump_v);
  }
#endif

      // DBH test with q number keys //////////////////////////////////
#if 0
      // this will mislead following test...
      start=time(NULL);
      DBHashTable *table = dbh_new(QTABLE, NULL, DBH_READ_ONLY);
      GSList *list = random_numlist;
      unsigned char k[DBH_KEYLENGTH(table)+1];
      memset(k, 0, DBH_KEYLENGTH(table)+1);
      for (;list && list->data; list = list->next){
          dbh_genkey(k, DBH_KEYLENGTH(table), GPOINTER_TO_INT(list->data));
          dbh_set_key(table, k);
          if (!dbh_load(table)) fprintf(stderr, "q-numload cannot load item %s (%d)\n", k, GPOINTER_TO_INT(list->data));
          else loaded++;
      }
      dbh_close(table);
    
      fprintf(stderr,"q-numload loaded %d items, random access time = %ld s.\n", loaded, (long)(time(NULL) - start));

#endif
