From ab41ec8cf3775e3204c5e472a1fa0c95ec433f1c Mon Sep 17 00:00:00 2001 From: Francois Fleuret Date: Thu, 31 Jan 2013 10:20:14 +0100 Subject: [PATCH] Now handles multi-criteria search. --- mymail.c | 183 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 110 insertions(+), 73 deletions(-) diff --git a/mymail.c b/mymail.c index c7d41d7..a241a86 100644 --- a/mymail.c +++ b/mymail.c @@ -47,6 +47,8 @@ #define MYMAIL_DB_MAGIC_TOKEN "mymail_index_file" #define VERSION "0.1" +#define MAX_NB_SEARCH_PATTERNS 10 + #define BUFFER_SIZE 65536 struct parsable_field { @@ -59,7 +61,6 @@ char *db_filename; char *db_root_path; int multi_db_files; -char *search_pattern; int paranoid; int action_index; @@ -84,7 +85,7 @@ void *safe_malloc(size_t n) { void *p = malloc(n); if(!p && n != 0) { fprintf(stderr, - "mymail: can not allocate memory: %s\n", strerror(errno)); + "mymail: cannot allocate memory: %s\n", strerror(errno)); exit(EXIT_FAILURE); } return p; @@ -123,78 +124,99 @@ int ignore_entry(const char *name) { (name[0] == '.' && name[1] != '/'); } -void search_in_db(const char *search_name, const char *search_regexp_string, +void search_in_db(int nb_search_patterns, + char **search_name, char **search_regexp_string, FILE *db_file) { - - char raw_line[BUFFER_SIZE]; + int hits[MAX_NB_SEARCH_PATTERNS]; + char raw_db_line[BUFFER_SIZE]; + char raw_mbox_line[BUFFER_SIZE]; char current_mail_filename[PATH_MAX + 1]; unsigned long int current_position_in_mail; char *name, *value; - regex_t regexp; - int already_written; + regex_t regexp[MAX_NB_SEARCH_PATTERNS]; + int already_written, n; - if(regcomp(®exp, - search_regexp_string, - REG_ICASE)) { - fprintf(stderr, - "mymail: Syntax error in regexp \"%s\" for field \"%s\".\n", - search_regexp_string, - search_name); - exit(EXIT_FAILURE); + for(n = 0; n < nb_search_patterns; n++) { + if(regcomp(®exp[n], + search_regexp_string[n], + REG_ICASE)) { + fprintf(stderr, + "mymail: Syntax error in regexp \"%s\" for field \"%s\".\n", + search_regexp_string[n], + search_name[n]); + exit(EXIT_FAILURE); + } } current_position_in_mail = 0; already_written = 0; - while(fgets(raw_line, BUFFER_SIZE, db_file)) { - name = raw_line; - value = segment_next_field(raw_line); + for(n = 0; n < nb_search_patterns; n++) { hits[n] = 0; } + + while(fgets(raw_db_line, BUFFER_SIZE, db_file)) { + name = raw_db_line; + value = segment_next_field(raw_db_line); if(strcmp("mail", name) == 0) { - char *position_in_file_string = value; - char *mail_filename = segment_next_field(value); - current_position_in_mail = atol(position_in_file_string); - strcpy(current_mail_filename, mail_filename); - remove_eof(current_mail_filename); - already_written = 0; - } + char *position_in_file_string; + char *mail_filename; - else if(!already_written) { - if(strcmp(search_name, name) == 0 && regexec(®exp, value, 0, 0, 0) == 0) { + for(n = 0; n < nb_search_patterns && hits[n]; n++); + + if(n == nb_search_patterns) { FILE *mail_file; mail_file = fopen(current_mail_filename, "r"); if(!mail_file) { - fprintf(stderr, "mymail: Can not open '%s'.\n", current_mail_filename); + fprintf(stderr, "mymail: Cannot open mbox '%s'.\n", current_mail_filename); exit(EXIT_FAILURE); } fseek(mail_file, current_position_in_mail, SEEK_SET); - if(fgets(raw_line, BUFFER_SIZE, mail_file)) { - printf("%s", raw_line); - while(fgets(raw_line, BUFFER_SIZE, mail_file) && - strncmp(raw_line, "From ", 5)) { - printf("%s", raw_line); + if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) { + printf("%s", raw_mbox_line); + while(fgets(raw_mbox_line, BUFFER_SIZE, mail_file) && + strncmp(raw_mbox_line, "From ", 5)) { + printf("%s", raw_mbox_line); } } fclose(mail_file); - already_written = 1; + } + + for(n = 0; n < nb_search_patterns; n++) { hits[n] = 0; } + + position_in_file_string = value; + mail_filename = segment_next_field(value); + current_position_in_mail = atol(position_in_file_string); + strcpy(current_mail_filename, mail_filename); + + remove_eof(current_mail_filename); + already_written = 0; + } + + else { + for(n = 0; n < nb_search_patterns; n++) { + hits[n] |= + (strcmp(search_name[n], name) == 0 && regexec(®exp[n], value, 0, 0, 0) == 0); } } } - regfree(®exp); + for(n = 0; n < nb_search_patterns; n++) { + regfree(®exp[n]); + } } void recursive_search_in_db(const char *entry_name, - const char *search_name, const char *search_regexp_string) { + int nb_search_patterns, + char **search_name, char **search_regexp_string) { DIR *dir; struct dirent *dir_e; struct stat sb; - char raw_line[BUFFER_SIZE]; + char raw_db_line[BUFFER_SIZE]; char subname[PATH_MAX + 1]; if(lstat(entry_name, &sb) != 0) { fprintf(stderr, - "mymail: Can not stat \"%s\": %s\n", + "mymail: Cannot stat \"%s\": %s\n", entry_name, strerror(errno)); exit(EXIT_FAILURE); @@ -206,7 +228,9 @@ void recursive_search_in_db(const char *entry_name, while((dir_e = readdir(dir))) { if(!ignore_entry(dir_e->d_name)) { snprintf(subname, PATH_MAX, "%s/%s", entry_name, dir_e->d_name); - recursive_search_in_db(subname, search_name, search_regexp_string); + recursive_search_in_db(subname, + nb_search_patterns, + search_name, search_regexp_string); } } closedir(dir); @@ -219,14 +243,14 @@ void recursive_search_in_db(const char *entry_name, if(!db_file) { fprintf(stderr, - "mymail: Can not open \"%s\" for reading: %s\n", + "mymail: Cannot open \"%s\" for reading: %s\n", db_filename, strerror(errno)); exit(EXIT_FAILURE); } - if(fgets(raw_line, BUFFER_SIZE, db_file)) { - if(strncmp(raw_line, MYMAIL_DB_MAGIC_TOKEN, strlen(MYMAIL_DB_MAGIC_TOKEN))) { + if(fgets(raw_db_line, BUFFER_SIZE, db_file)) { + if(strncmp(raw_db_line, MYMAIL_DB_MAGIC_TOKEN, strlen(MYMAIL_DB_MAGIC_TOKEN))) { fprintf(stderr, "mymail: Header line in '%s' does not match the mymail db format.\n", entry_name); @@ -234,12 +258,13 @@ void recursive_search_in_db(const char *entry_name, } } else { fprintf(stderr, - "mymail: Can not read the header line in '%s'.\n", + "mymail: Cannot read the header line in '%s'.\n", entry_name); exit(EXIT_FAILURE); } - search_in_db(search_name, search_regexp_string, db_file); + search_in_db(nb_search_patterns, search_name, search_regexp_string, + db_file); fclose(db_file); } @@ -249,14 +274,14 @@ void recursive_search_in_db(const char *entry_name, /*********************************************************************/ void index_one_mbox_line(int nb_fields_to_parse, struct parsable_field *fields_to_parse, - char *raw_line, FILE *db_file) { + char *raw_mbox_line, FILE *db_file) { regmatch_t matches; int f; for(f = 0; f < nb_fields_to_parse; f++) { - if(regexec(&fields_to_parse[f].regexp, raw_line, 1, &matches, 0) == 0) { + if(regexec(&fields_to_parse[f].regexp, raw_mbox_line, 1, &matches, 0) == 0) { fprintf(db_file, "%s %s\n", fields_to_parse[f].name, - raw_line + matches.rm_eo); + raw_mbox_line + matches.rm_eo); } } } @@ -264,7 +289,7 @@ void index_one_mbox_line(int nb_fields_to_parse, struct parsable_field *fields_t void index_mbox(const char *mbox_filename, int nb_fields_to_parse, struct parsable_field *fields_to_parse, FILE *db_file) { - char raw_line[BUFFER_SIZE], full_line[BUFFER_SIZE]; + char raw_mbox_line[BUFFER_SIZE], full_line[BUFFER_SIZE]; char *end_of_full_line; FILE *file; int in_header, new_header; @@ -273,7 +298,7 @@ void index_mbox(const char *mbox_filename, file = fopen(mbox_filename, "r"); if(!file) { - fprintf(stderr, "mymail: Can not open '%s'.\n", mbox_filename); + fprintf(stderr, "mymail: Cannot open '%s'.\n", mbox_filename); if(paranoid) { exit(EXIT_FAILURE); } return; } @@ -285,18 +310,18 @@ void index_mbox(const char *mbox_filename, end_of_full_line = 0; full_line[0] = '\0'; - while(fgets(raw_line, BUFFER_SIZE, file)) { - if(strncmp(raw_line, "From ", 5) == 0) { + while(fgets(raw_mbox_line, BUFFER_SIZE, file)) { + if(strncmp(raw_mbox_line, "From ", 5) == 0) { if(in_header) { fprintf(stderr, "Got a ^\"From \" in the header in %s:%lu.\n", mbox_filename, position_in_file); - fprintf(stderr, "%s", raw_line); + fprintf(stderr, "%s", raw_mbox_line); if(paranoid) { exit(EXIT_FAILURE); } } in_header = 1; new_header = 1; - } else if(strncmp(raw_line, "\n", 1) == 0) { + } else if(strncmp(raw_mbox_line, "\n", 1) == 0) { if(in_header) { in_header = 0; } } @@ -306,8 +331,8 @@ void index_mbox(const char *mbox_filename, new_header = 0; } - if(raw_line[0] == ' ' || raw_line[0] == '\t') { - char *start = raw_line; + if(raw_mbox_line[0] == ' ' || raw_mbox_line[0] == '\t') { + char *start = raw_mbox_line; while(*start == ' ' || *start == '\t') start++; *(end_of_full_line++) = ' '; strcpy(end_of_full_line, start); @@ -318,12 +343,12 @@ void index_mbox(const char *mbox_filename, } else { - /* if(!((raw_line[0] >= 'a' && raw_line[0] <= 'z') || */ - /* (raw_line[0] >= 'A' && raw_line[0] <= 'Z'))) { */ + /* if(!((raw_mbox_line[0] >= 'a' && raw_mbox_line[0] <= 'z') || */ + /* (raw_mbox_line[0] >= 'A' && raw_mbox_line[0] <= 'Z'))) { */ /* fprintf(stderr, */ /* "Header line syntax error %s:%lu.\n", */ /* mbox_filename, position_in_file); */ - /* fprintf(stderr, "%s", raw_line); */ + /* fprintf(stderr, "%s", raw_mbox_line); */ /* } */ if(full_line[0]) { @@ -331,7 +356,7 @@ void index_mbox(const char *mbox_filename, } end_of_full_line = full_line; - strcpy(end_of_full_line, raw_line); + strcpy(end_of_full_line, raw_mbox_line); while(*end_of_full_line && *end_of_full_line != '\n') { end_of_full_line++; } @@ -340,7 +365,7 @@ void index_mbox(const char *mbox_filename, } - position_in_file += strlen(raw_line); + position_in_file += strlen(raw_mbox_line); } fclose(file); @@ -356,7 +381,7 @@ void recursive_index_mbox(FILE *db_file, if(lstat(entry_name, &sb) != 0) { fprintf(stderr, - "mymail: Can not stat \"%s\": %s\n", + "mymail: Cannot stat \"%s\": %s\n", entry_name, strerror(errno)); exit(EXIT_FAILURE); @@ -417,21 +442,26 @@ static struct parsable_field fields_to_parse[] = { }; +/*********************************************************************/ + int main(int argc, char **argv) { int error = 0, show_help = 0; const int nb_fields_to_parse = sizeof(fields_to_parse) / sizeof(struct parsable_field); char c; int f; + int nb_search_patterns; + char *search_pattern[MAX_NB_SEARCH_PATTERNS]; paranoid = 0; action_index = 0; - search_pattern = 0; db_filename = 0; db_root_path = 0; multi_db_files = 0; setlocale(LC_ALL, ""); + nb_search_patterns = 0; + while ((c = getopt_long(argc, argv, "hvimp:s:d:p:", long_options, NULL)) != -1) { @@ -462,11 +492,11 @@ int main(int argc, char **argv) { break; case 's': - if(search_pattern) { - fprintf(stderr, "mymail: Search pattern already defined.\n"); + if(nb_search_patterns == MAX_NB_SEARCH_PATTERNS) { + fprintf(stderr, "mymail: Too many search patterns.\n"); exit(EXIT_FAILURE); } - search_pattern = strdup(optarg); + search_pattern[nb_search_patterns++] = strdup(optarg); break; default: @@ -520,7 +550,7 @@ int main(int argc, char **argv) { if(!db_file) { fprintf(stderr, - "mymail: Can not open \"%s\" for writing: %s\n", + "mymail: Cannot open \"%s\" for writing: %s\n", db_filename, strerror(errno)); exit(EXIT_FAILURE); @@ -556,11 +586,15 @@ int main(int argc, char **argv) { else { - if(search_pattern) { - char *search_name; - char *search_regexp_string; - search_name = search_pattern; - search_regexp_string = segment_next_field(search_pattern); + if(nb_search_patterns > 0) { + char *search_name[MAX_NB_SEARCH_PATTERNS]; + char *search_regexp_string[MAX_NB_SEARCH_PATTERNS]; + int n; + + for(n = 0; n < nb_search_patterns; n++) { + search_name[n] = search_pattern[n]; + search_regexp_string[n] = segment_next_field(search_pattern[n]); + } if(!*search_regexp_string) { fprintf(stderr, @@ -568,9 +602,12 @@ int main(int argc, char **argv) { exit(EXIT_FAILURE); } - recursive_search_in_db(db_root_path, search_name, search_regexp_string); + recursive_search_in_db(db_root_path, + nb_search_patterns, search_name, search_regexp_string); - free(search_pattern); + for(n = 0; n < nb_search_patterns; n++) { + free(search_pattern[n]); + } } } -- 2.20.1