From e40b536f28d3d6af2e49a48dc830f15a2fd88ab0 Mon Sep 17 00:00:00 2001 From: Francois Fleuret Date: Thu, 31 Jan 2013 22:47:32 +0100 Subject: [PATCH] Starting to convert the ID from strings to integers to speed things up. --- mymail.c | 120 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 40 deletions(-) diff --git a/mymail.c b/mymail.c index 313b8d5..dcb44bf 100644 --- a/mymail.c +++ b/mymail.c @@ -51,8 +51,25 @@ #define BUFFER_SIZE 65536 +enum { + ID_MAIL, + ID_FROM, + ID_DEST, + ID_SUBJECT, + ID_FROMDEST, + MAX_ID +}; + +static char *field_names[] = { + "mail", + "from", + "dest", + "subj", + "fromdest" +}; + struct parsable_field { - char *name; + int id; char *regexp_string; regex_t regexp; }; @@ -124,26 +141,32 @@ int ignore_entry(const char *name) { (name[0] == '.' && name[1] != '/'); } +int mbox_line_match_search(int search_id, regex_t *search_regexp, + int mbox_id, char *mbox_value) { + return search_id == mbox_id && regexec(search_regexp, mbox_value, 0, 0, 0) == 0; +} + void search_in_db(int nb_search_patterns, - char **search_name, char **search_regexp_string, + int *search_ids, char **search_regexp_strings, FILE *db_file) { int hits[MAX_NB_SEARCH_PATTERNS]; char raw_db_line[BUFFER_SIZE]; char raw_mbox_line[BUFFER_SIZE]; char current_mail_filename[PATH_MAX + 1]; unsigned long int current_position_in_mail; - char *name, *value; - regex_t regexp[MAX_NB_SEARCH_PATTERNS]; - int already_written, n; + char *mbox_name, *mbox_value; + int mbox_id; + regex_t search_regexps[MAX_NB_SEARCH_PATTERNS]; + int already_written, m, n; for(n = 0; n < nb_search_patterns; n++) { - if(regcomp(®exp[n], - search_regexp_string[n], + if(regcomp(&search_regexps[n], + search_regexp_strings[n], REG_ICASE)) { fprintf(stderr, "mymail: Syntax error in regexp \"%s\" for field \"%s\".\n", - search_regexp_string[n], - search_name[n]); + search_regexp_strings[n], + field_names[search_ids[n]]); exit(EXIT_FAILURE); } } @@ -154,10 +177,10 @@ void search_in_db(int nb_search_patterns, for(n = 0; n < nb_search_patterns; n++) { hits[n] = 0; } while(fgets(raw_db_line, BUFFER_SIZE, db_file)) { - name = raw_db_line; - value = segment_next_field(raw_db_line); + mbox_name = raw_db_line; + mbox_value = segment_next_field(raw_db_line); - if(strcmp("mail", name) == 0) { + if(strcmp("mail", mbox_name) == 0) { char *position_in_file_string; char *mail_filename; @@ -165,12 +188,16 @@ void search_in_db(int nb_search_patterns, if(n == nb_search_patterns) { FILE *mail_file; + mail_file = fopen(current_mail_filename, "r"); + if(!mail_file) { fprintf(stderr, "mymail: Cannot open mbox '%s'.\n", current_mail_filename); exit(EXIT_FAILURE); } + fseek(mail_file, current_position_in_mail, SEEK_SET); + if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) { printf("%s", raw_mbox_line); while(fgets(raw_mbox_line, BUFFER_SIZE, mail_file) && @@ -178,13 +205,14 @@ void search_in_db(int nb_search_patterns, printf("%s", raw_mbox_line); } } + fclose(mail_file); } for(n = 0; n < nb_search_patterns; n++) { hits[n] = 0; } - position_in_file_string = value; - mail_filename = segment_next_field(value); + position_in_file_string = mbox_value; + mail_filename = segment_next_field(mbox_value); current_position_in_mail = atol(position_in_file_string); strcpy(current_mail_filename, mail_filename); @@ -193,22 +221,27 @@ void search_in_db(int nb_search_patterns, } else { + mbox_id = -1; + for(m = 0; (m < MAX_ID) && mbox_id == -1; m++) { + if(strncmp(field_names[m], mbox_name, strlen(mbox_name)) == 0) { + mbox_id = m; + } + } for(n = 0; n < nb_search_patterns; n++) { - hits[n] |= - (strncmp(search_name[n], name, strlen(search_name[n])) == 0 && - regexec(®exp[n], value, 0, 0, 0) == 0); + hits[n] |= mbox_line_match_search(search_ids[n], &search_regexps[n], + mbox_id, mbox_value); } } } for(n = 0; n < nb_search_patterns; n++) { - regfree(®exp[n]); + regfree(&search_regexps[n]); } } void recursive_search_in_db(const char *entry_name, int nb_search_patterns, - char **search_name, char **search_regexp_string) { + int *search_ids, char **search_regexp_strings) { DIR *dir; struct dirent *dir_e; struct stat sb; @@ -231,7 +264,7 @@ void recursive_search_in_db(const char *entry_name, snprintf(subname, PATH_MAX, "%s/%s", entry_name, dir_e->d_name); recursive_search_in_db(subname, nb_search_patterns, - search_name, search_regexp_string); + search_ids, search_regexp_strings); } } closedir(dir); @@ -264,7 +297,7 @@ void recursive_search_in_db(const char *entry_name, exit(EXIT_FAILURE); } - search_in_db(nb_search_patterns, search_name, search_regexp_string, + search_in_db(nb_search_patterns, search_ids, search_regexp_strings, db_file); fclose(db_file); @@ -281,7 +314,7 @@ void index_one_mbox_line(int nb_fields_to_parse, struct parsable_field *fields_t for(f = 0; f < nb_fields_to_parse; f++) { if(regexec(&fields_to_parse[f].regexp, raw_mbox_line, 1, &matches, 0) == 0) { fprintf(db_file, "%s %s\n", - fields_to_parse[f].name, + field_names[fields_to_parse[f].id], raw_mbox_line + matches.rm_eo); } } @@ -344,13 +377,15 @@ void index_mbox(const char *mbox_filename, } else { - /* if(!((raw_mbox_line[0] >= 'a' && raw_mbox_line[0] <= 'z') || */ - /* (raw_mbox_line[0] >= 'A' && raw_mbox_line[0] <= 'Z'))) { */ - /* fprintf(stderr, */ - /* "Header line syntax error %s:%lu.\n", */ - /* mbox_filename, position_in_file); */ - /* fprintf(stderr, "%s", raw_mbox_line); */ - /* } */ + /* + if(!((raw_mbox_line[0] >= 'a' && raw_mbox_line[0] <= 'z') || + (raw_mbox_line[0] >= 'A' && raw_mbox_line[0] <= 'Z'))) { + fprintf(stderr, + "Header line syntax error %s:%lu.\n", + mbox_filename, position_in_file); + fprintf(stderr, "%s", raw_mbox_line); + } + */ if(full_line[0]) { index_one_mbox_line(nb_fields_to_parse, fields_to_parse, full_line, db_file); @@ -423,19 +458,19 @@ static struct option long_options[] = { static struct parsable_field fields_to_parse[] = { { - "from", + ID_FROM, "^\\([Ff][Rr][Oo][Mm]:\\|From\\) *", { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { - "dest", + ID_DEST, "^\\([Tt][Oo]\\|[Cc][Cc]\\|[Bb][Cc][Cc]\\): *", { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { - "subj", + ID_SUBJECT, "^[Ss][Uu][Bb][Jj][Ee][Cc][Tt]: *", { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, @@ -559,7 +594,7 @@ int main(int argc, char **argv) { fprintf(stderr, "mymail: Syntax error in regexp \"%s\" for field \"%s\".\n", fields_to_parse[f].regexp_string, - fields_to_parse[f].name); + field_names[fields_to_parse[f].id]); exit(EXIT_FAILURE); } } @@ -583,23 +618,28 @@ int main(int argc, char **argv) { else { if(nb_search_patterns > 0) { - char *search_name[MAX_NB_SEARCH_PATTERNS]; - char *search_regexp_string[MAX_NB_SEARCH_PATTERNS]; - int n; + int search_ids[MAX_NB_SEARCH_PATTERNS]; + char *search_regexp_strings[MAX_NB_SEARCH_PATTERNS]; + int m, n; for(n = 0; n < nb_search_patterns; n++) { - search_name[n] = search_pattern[n]; - search_regexp_string[n] = segment_next_field(search_pattern[n]); + search_regexp_strings[n] = segment_next_field(search_pattern[n]); + search_ids[n] = -1; + for(m = 0; (m < MAX_ID) && search_ids[n] == -1; m++) { + if(strncmp(field_names[m], search_pattern[n], strlen(search_pattern[n])) == 0) { + search_ids[n] = m; + } + } } - if(!*search_regexp_string) { + if(!*search_regexp_strings) { fprintf(stderr, "Syntax error in the search pattern.\n"); exit(EXIT_FAILURE); } recursive_search_in_db(db_root_path, - nb_search_patterns, search_name, search_regexp_string); + nb_search_patterns, search_ids, search_regexp_strings); for(n = 0; n < nb_search_patterns; n++) { free(search_pattern[n]); -- 2.20.1