X-Git-Url: https://fleuret.org/cgi-bin/gitweb/gitweb.cgi?a=blobdiff_plain;f=mymail.c;h=f9b0148dcdd2cd916605589986fe689fb9da360c;hb=3ca38393b46c2bbba39f7b0bd4ca3ea918384b1d;hp=54febff35e85eb3d8b1fb07995f49ba2881d38e7;hpb=5419b44224e0c8e8ea650d51c52136429aa298d7;p=mymail.git diff --git a/mymail.c b/mymail.c index 54febff..f9b0148 100644 --- a/mymail.c +++ b/mymail.c @@ -25,7 +25,7 @@ directories containing mbox files, and create a db file containing for each mail a list of fields computed from the header, or (2) read such a db file and get all the mails matching regexp-defined - conditions on the fields. + conditions on the fields, to create a resulting mbox file. It is low-tech, simple, light and fast. @@ -46,13 +46,15 @@ #include #define MYMAIL_DB_MAGIC_TOKEN "mymail_index_file" -#define VERSION "0.9.1" +#define VERSION "0.9.2" -#define MAX_NB_SEARCH_CONDITIONS 10 +#define MAX_NB_SEARCH_CONDITIONS 32 #define BUFFER_SIZE 65536 #define TOKEN_BUFFER_SIZE 1024 +#define LEADING_FROM_LINE_REGEXP "^From [^ ]* \\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\) \\(Jan\\|Feb\\|Mar\\|Apr\\|May\\|Jun\\|Jul\\|Aug\\|Sep\\|Oct\\|Nov\\|Dec\\) [ 123][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]\n$" + regex_t leading_from_line_regexp; /* Global variables! */ @@ -60,6 +62,7 @@ regex_t leading_from_line_regexp; int paranoid; int quiet; char *default_search_field; +int ignore_dot_files; /********************************************************************/ @@ -228,12 +231,12 @@ time_t time_for_past_day(int day) { int ignore_entry(const char *name) { return - /* strcmp(name, ".") == 0 || */ - /* strcmp(name, "..") == 0 || */ - (name[0] == '.' && name[1] != '/'); + strcmp(name, ".") == 0 || + strcmp(name, "..") == 0 || + (ignore_dot_files && name[0] == '.' && name[1] != '/'); } -int is_a_leading_from_line(int last_mbox_line_was_empty, char *mbox_line) { +int is_a_leading_from_line(char *mbox_line) { return /* @@ -246,7 +249,6 @@ int is_a_leading_from_line(int last_mbox_line_was_empty, char *mbox_line) { */ - /* last_mbox_line_was_empty && */ strncmp(mbox_line, "From ", 5) == 0 && regexec(&leading_from_line_regexp, mbox_line, 0, 0, 0) == 0; } @@ -300,7 +302,6 @@ void update_body_hits(char *mail_filename, int position_in_mail, int *hits) { FILE *mail_file; int header, n; - int last_mbox_line_was_empty; char raw_mbox_line[BUFFER_SIZE]; int nb_body_hits; @@ -320,9 +321,10 @@ void update_body_hits(char *mail_filename, int position_in_mail, if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) { while(nb_body_hits < nb_body_conditions) { - last_mbox_line_was_empty = (raw_mbox_line[0] == '\n'); + /* last_mbox_line_was_empty = (raw_mbox_line[0] == '\n'); */ + /* if(last_mbox_line_was_empty) { header = 0; } */ - if(last_mbox_line_was_empty) { header = 0; } + if(raw_mbox_line[0] == '\n') { header = 0; } if(!header) { for(n = 0; n < nb_search_conditions; n++) { @@ -337,7 +339,7 @@ void update_body_hits(char *mail_filename, int position_in_mail, } if(!fgets(raw_mbox_line, BUFFER_SIZE, mail_file) || - (is_a_leading_from_line(last_mbox_line_was_empty, raw_mbox_line))) + (is_a_leading_from_line(raw_mbox_line))) break; } } @@ -345,10 +347,9 @@ void update_body_hits(char *mail_filename, int position_in_mail, fclose(mail_file); } -void write_mail(const char *mail_filename, unsigned long int position_in_mail, +void extract_mail(const char *mail_filename, unsigned long int position_in_mail, FILE *output_file) { char raw_mbox_line[BUFFER_SIZE]; - int last_mbox_line_was_empty; FILE *mail_file; mail_file = fopen(mail_filename, "r"); @@ -363,14 +364,12 @@ void write_mail(const char *mail_filename, unsigned long int position_in_mail, fseek(mail_file, position_in_mail, SEEK_SET); if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) { - last_mbox_line_was_empty = 0; fprintf(output_file, "%s", raw_mbox_line); while(1) { if(!fgets(raw_mbox_line, BUFFER_SIZE, mail_file) || - (is_a_leading_from_line(last_mbox_line_was_empty, raw_mbox_line)) + (is_a_leading_from_line(raw_mbox_line)) ) break; - last_mbox_line_was_empty = (raw_mbox_line[0] == '\n'); fprintf(output_file, "%s", raw_mbox_line); } } @@ -378,10 +377,10 @@ void write_mail(const char *mail_filename, unsigned long int position_in_mail, fclose(mail_file); } -void search_in_db(const char *db_filename, - int nb_search_conditions, - struct search_condition *search_conditions, - FILE *output_file) { +int search_in_db(const char *db_filename, + int nb_search_conditions, + struct search_condition *search_conditions, + FILE *output_file) { int hits[MAX_NB_SEARCH_CONDITIONS]; char raw_db_line[BUFFER_SIZE]; @@ -392,6 +391,9 @@ void search_in_db(const char *db_filename, int already_written, m, n; int nb_body_conditions, nb_fulfilled_body_conditions; FILE *db_file; + int nb_extracted_mails; + + nb_extracted_mails = 0; if(!quiet) { printf("Searching in '%s' ... ", db_filename); @@ -475,7 +477,8 @@ void search_in_db(const char *db_filename, } if(nb_body_conditions == nb_fulfilled_body_conditions) { - write_mail(current_mail_filename, current_position_in_mail, output_file); + nb_extracted_mails++; + extract_mail(current_mail_filename, current_position_in_mail, output_file); } } } @@ -508,16 +511,19 @@ void search_in_db(const char *db_filename, printf("done.\n"); fflush(stdout); } + + return nb_extracted_mails; } -void recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp, - int nb_search_conditions, - struct search_condition *search_conditions, - FILE *output_file) { +int recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp, + int nb_search_conditions, + struct search_condition *search_conditions, + FILE *output_file) { DIR *dir; struct dirent *dir_e; struct stat sb; char subname[PATH_MAX + 1]; + int nb_extracted_mails = 0; if(lstat(entry_name, &sb) != 0) { fprintf(stderr, @@ -533,9 +539,9 @@ void recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp, while((dir_e = readdir(dir))) { if(!ignore_entry(dir_e->d_name)) { snprintf(subname, PATH_MAX, "%s/%s", entry_name, dir_e->d_name); - recursive_search_in_db(subname, db_filename_regexp, - nb_search_conditions, search_conditions, - output_file); + nb_extracted_mails += recursive_search_in_db(subname, db_filename_regexp, + nb_search_conditions, search_conditions, + output_file); } } closedir(dir); @@ -546,9 +552,12 @@ void recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp, while(*s) { if(*s == '/') { filename = s+1; } s++; } if(regexec(db_filename_regexp, filename, 0, 0, 0) == 0) { - search_in_db(entry_name, nb_search_conditions, search_conditions, output_file); + nb_extracted_mails += + search_in_db(entry_name, nb_search_conditions, search_conditions, output_file); } } + + return nb_extracted_mails; } /*********************************************************************/ @@ -572,7 +581,7 @@ void index_mbox(const char *mbox_filename, char raw_mbox_line[BUFFER_SIZE], full_line[BUFFER_SIZE]; char *end_of_full_line; FILE *file; - int in_header, new_header, last_mbox_line_was_empty; + int in_header, new_header; unsigned long int position_in_file; file = fopen(mbox_filename, "r"); @@ -589,10 +598,9 @@ void index_mbox(const char *mbox_filename, position_in_file = 0; end_of_full_line = 0; full_line[0] = '\0'; - last_mbox_line_was_empty = 1; while(fgets(raw_mbox_line, BUFFER_SIZE, file)) { - if(is_a_leading_from_line(last_mbox_line_was_empty, raw_mbox_line)) { + if(is_a_leading_from_line(raw_mbox_line)) { if(in_header) { fprintf(stderr, "Got a ^\"From \" in the header in %s:%lu.\n", @@ -606,8 +614,6 @@ void index_mbox(const char *mbox_filename, if(in_header) { in_header = 0; } } - last_mbox_line_was_empty = (raw_mbox_line[0] == '\n'); - if(in_header) { if(new_header) { fprintf(db_file, "mail %lu %s\n", position_in_file, mbox_filename); @@ -880,9 +886,7 @@ int main(int argc, char **argv) { FILE *output_file; struct search_condition search_conditions[MAX_NB_SEARCH_CONDITIONS]; - if(regcomp(&leading_from_line_regexp, - "^From [^ ]* \\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\) \\(Jan\\|Feb\\|Mar\\|Apr\\|May\\|Jun\\|Jul\\|Aug\\|Sep\\|Oct\\|Nov\\|Dec\\) [ 123][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]\n$", - 0)) { + if(regcomp(&leading_from_line_regexp, LEADING_FROM_LINE_REGEXP, 0)) { fprintf(stderr, "mymail: Cannot compile leading \"from\" line regexp. That is strange.\n"); exit(EXIT_FAILURE); @@ -896,6 +900,7 @@ int main(int argc, char **argv) { db_filename_list = 0; quiet = 0; default_search_field = 0; + ignore_dot_files = 1; setlocale(LC_ALL, ""); @@ -1065,6 +1070,7 @@ int main(int argc, char **argv) { } else { + int nb_extracted_mails = 0; if(nb_search_conditions > 0) { @@ -1081,9 +1087,9 @@ int main(int argc, char **argv) { exit(EXIT_FAILURE); } - recursive_search_in_db(db_root_path, &db_filename_regexp, - nb_search_conditions, search_conditions, - output_file); + nb_extracted_mails += recursive_search_in_db(db_root_path, &db_filename_regexp, + nb_search_conditions, search_conditions, + output_file); regfree(&db_filename_regexp); } @@ -1100,7 +1106,8 @@ int main(int argc, char **argv) { s = parse_token(db_filename, PATH_MAX + 1, ';', s); if(db_filename[0]) { - search_in_db(db_filename, nb_search_conditions, search_conditions, output_file); + nb_extracted_mails += + search_in_db(db_filename, nb_search_conditions, search_conditions, output_file); } } } @@ -1108,10 +1115,20 @@ int main(int argc, char **argv) { /* Search in all db files listed in the command arguments */ while(optind < argc) { - search_in_db(argv[optind], nb_search_conditions, search_conditions, output_file); + nb_extracted_mails += + search_in_db(argv[optind], nb_search_conditions, search_conditions, output_file); optind++; } } + + if(!quiet) { + if(nb_extracted_mails > 0) { + printf("Found %d matching mails.\n", nb_extracted_mails); + } else { + printf("No matching mail found.\n"); + } + } + } for(n = 0; n < nb_search_conditions; n++) {