From 4c8e9a5a4cff71288247bb12eb025cd35dd3c7f9 Mon Sep 17 00:00:00 2001 From: Francois Fleuret Date: Sun, 27 Jan 2013 16:30:28 +0100 Subject: [PATCH] Embryo of indexing works (and goes pretty fast!) --- mymail.c | 104 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 78 insertions(+), 26 deletions(-) diff --git a/mymail.c b/mymail.c index da9bed1..f657306 100644 --- a/mymail.c +++ b/mymail.c @@ -37,16 +37,21 @@ #include #include #include +#include #define VERSION "0.1" #define BUFFER_SIZE 16384 struct parsable_field { - char *regexp; - char *db_filename; + char *name; + char *regexp_string; + regex_t regexp; + FILE *db_file; }; +char *db_filename_prefix; + /********************************************************************/ /* malloc with error checking. */ @@ -72,11 +77,11 @@ void usage(FILE *out) { } void read_file(const char *input_filename, - int nb_fields_to_parse, struct parsable_field *fields_to_parse, - FILE **db_files) { + int nb_fields_to_parse, struct parsable_field *fields_to_parse) { char raw_line[BUFFER_SIZE]; FILE *file; int in_header; + unsigned int position_in_file; file = fopen(input_filename, "r"); @@ -87,10 +92,15 @@ void read_file(const char *input_filename, in_header = 0; + position_in_file = 0; + while(fgets(raw_line, BUFFER_SIZE, file)) { if(strncmp(raw_line, "From ", 5) == 0) { if(in_header) { - fprintf(stderr, "Got a 'From ' in the header.\n"); + fprintf(stderr, + "Got a 'From ' in the header in %s:%u.\n", + input_filename, position_in_file); + fprintf(stderr, "%s", raw_line); exit(EXIT_FAILURE); } in_header = 1; @@ -98,11 +108,25 @@ void read_file(const char *input_filename, if(in_header) { in_header = 0; } } + /* if(in_header) { */ + /* printf("LINE.H %s", raw_line); */ + /* } else { */ + /* printf("LINE.B %s", raw_line); */ + /* } */ + if(in_header) { - printf("LINE.H %s", raw_line); - } else { - printf("LINE.B %s", raw_line); + int f; + regmatch_t matches; + for(f = 0; f < nb_fields_to_parse; f++) { + if(regexec(&fields_to_parse[f].regexp, raw_line, 1, &matches, 0) == 0) { + fprintf(fields_to_parse[f].db_file, "%s:%d %s", + input_filename, position_in_file, + raw_line + matches.rm_eo); + } + } } + + position_in_file += strlen(raw_line); } fclose(file); @@ -116,8 +140,7 @@ int ignore_entry(const char *name) { } void process_entry(const char *dir_name, - int nb_fields_to_parse, struct parsable_field *fields_to_parse, - FILE **db_files) { + int nb_fields_to_parse, struct parsable_field *fields_to_parse) { DIR *dir; struct dirent *dir_e; struct stat sb; @@ -143,14 +166,14 @@ void process_entry(const char *dir_name, while((dir_e = readdir(dir))) { if(!ignore_entry(dir_e->d_name)) { snprintf(subname, PATH_MAX, "%s/%s", dir_name, dir_e->d_name); - process_entry(subname, nb_fields_to_parse, fields_to_parse, db_files); + process_entry(subname, nb_fields_to_parse, fields_to_parse); } } closedir(dir); } else { if(S_ISREG(sb.st_mode)) { - printf("Processing regular file '%s'.\n", dir_name); - read_file(dir_name, nb_fields_to_parse, fields_to_parse, db_files); + /* printf("Processing regular file '%s'.\n", dir_name); */ + read_file(dir_name, nb_fields_to_parse, fields_to_parse); } } } @@ -166,11 +189,22 @@ enum static struct option long_options[] = { { "help", no_argument, 0, 'h' }, + { "db-prefix", 1, 0, 'p' }, { 0, 0, 0, 0 } }; static struct parsable_field fields_to_parse[] = { - { "^[Tt][Oo]:", "/tmp/mymail-to" } + { + "from", + "^[Ff][Rr][Oo][Mm]: *", + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 0 + }, + + { + "dest", + "^\\([Tt][Oo]\\|[Cc][Cc]\\|[Bb][Cc][Cc]\\): *", + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 0 + }, }; int main(int argc, char **argv) { @@ -178,11 +212,10 @@ int main(int argc, char **argv) { const int nb_fields_to_parse = sizeof(fields_to_parse) / sizeof(struct parsable_field); char c; int f; - FILE **db_files; setlocale(LC_ALL, ""); - while ((c = getopt_long(argc, argv, "h", + while ((c = getopt_long(argc, argv, "hp:", long_options, NULL)) != -1) { switch(c) { @@ -191,12 +224,20 @@ int main(int argc, char **argv) { show_help = 1; break; + case 'p': + db_filename_prefix = strdup(optarg); + break; + default: error = 1; break; } } + if(!db_filename_prefix) { + db_filename_prefix = strdup("/tmp/mymail_"); + } + if(error) { usage(stderr); exit(EXIT_FAILURE); @@ -207,30 +248,41 @@ int main(int argc, char **argv) { exit(EXIT_SUCCESS); } - db_files = safe_malloc(nb_fields_to_parse * sizeof(FILE *)); - for(f = 0; f < nb_fields_to_parse; f++) { - db_files[f] = fopen(fields_to_parse[f].db_filename, "w"); - if(!db_files[f]) { + char db_filename[BUFFER_SIZE]; + sprintf(db_filename, "%s%s", db_filename_prefix, fields_to_parse[f].name); + fields_to_parse[f].db_file = fopen(db_filename, "w"); + if(!fields_to_parse[f].db_file) { fprintf(stderr, "mymail: Can not open \"%s\" for writing: %s\n", - fields_to_parse[f].db_filename, + db_filename, strerror(errno)); + exit(EXIT_FAILURE); + } + + printf("Initialized %s.\n", db_filename); + + if(regcomp(&fields_to_parse[f].regexp, + fields_to_parse[f].regexp_string, + REG_ICASE)) { + fprintf(stderr, + "mymail: Syntax error in regexp \"%s\" for field \"%s\".\n", + fields_to_parse[f].regexp_string, + fields_to_parse[f].name); + exit(EXIT_FAILURE); } } while(optind < argc) { process_entry(argv[optind], - nb_fields_to_parse, fields_to_parse, - db_files); + nb_fields_to_parse, fields_to_parse); optind++; } for(f = 0; f < nb_fields_to_parse; f++) { - fclose(db_files[f]); + fclose(fields_to_parse[f].db_file); + regfree(&fields_to_parse[f].regexp); } - free(db_files); - exit(EXIT_SUCCESS); } -- 2.20.1