Added a regexp-based check for the "leading From lines". This sucks horribly.
authorFrancois Fleuret <francois@fleuret.org>
Mon, 4 Feb 2013 21:36:30 +0000 (22:36 +0100)
committerFrancois Fleuret <francois@fleuret.org>
Mon, 4 Feb 2013 21:36:30 +0000 (22:36 +0100)
mymail.c

index a8fa8c9..24b2aa0 100644 (file)
--- a/mymail.c
+++ b/mymail.c
@@ -52,6 +52,8 @@
 
 #define BUFFER_SIZE 65536
 
+regex_t leading_from_line_regexp;
+
 char *db_filename;
 char *db_filename_regexp_string;
 char *db_root_path;
@@ -216,6 +218,11 @@ int ignore_entry(const char *name) {
     (name[0] == '.' && name[1] != '/');
 }
 
+int is_a_leading_from_line(char *s) {
+  return strncmp(s, "From ", 5) == 0 &&
+    regexec(&leading_from_line_regexp, s, 0, 0, 0) == 0;
+}
+
 int mbox_line_match_search(struct search_condition *condition,
                            int mbox_id, char *mbox_value) {
 
@@ -345,7 +352,8 @@ void search_in_db(FILE *db_file,
                 }
 
                 if(!fgets(raw_mbox_line, BUFFER_SIZE, mail_file) ||
-                   (last_mbox_line_was_empty && strncmp(raw_mbox_line, "From ", 5) == 0))
+                   (last_mbox_line_was_empty &&
+                    is_a_leading_from_line(raw_mbox_line)))
                   break;
               }
             }
@@ -372,7 +380,9 @@ void search_in_db(FILE *db_file,
               fprintf(output_file, "%s", raw_mbox_line);
               while(1) {
                 if(!fgets(raw_mbox_line, BUFFER_SIZE, mail_file) ||
-                   (last_mbox_line_was_empty && strncmp(raw_mbox_line, "From ", 5) == 0))
+                   (last_mbox_line_was_empty &&
+                    is_a_leading_from_line(raw_mbox_line))
+                   )
                   break;
                 last_mbox_line_was_empty = (raw_mbox_line[0] == '\n');
                 fprintf(output_file, "%s", raw_mbox_line);
@@ -529,7 +539,8 @@ void index_mbox(const char *mbox_filename,
   last_mbox_line_was_empty = 1;
 
   while(fgets(raw_mbox_line, BUFFER_SIZE, file)) {
-    if(last_mbox_line_was_empty && strncmp(raw_mbox_line, "From ", 5) == 0) {
+    if(last_mbox_line_was_empty &&
+       is_a_leading_from_line(raw_mbox_line)) {
       if(in_header) {
         fprintf(stderr,
                 "Got a ^\"From \" in the header in %s:%lu.\n",
@@ -657,6 +668,17 @@ int main(int argc, char **argv) {
   char *search_condition_strings[MAX_NB_SEARCH_CONDITIONS];
   FILE *output_file;
 
+  if(regcomp(&leading_from_line_regexp,
+             "^From [^ ]*@[^ ]*  \\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\) \\(Jan\\|Feb\\|Mar\\|Apr\\|Jun\\|Jul\\|Aug\\|Sep\\|Oct\\|Nov\\|Dec\\) [ 123][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]\n$",
+             0)) {
+    fprintf(stderr,
+            "mymail: Cannot compile leading from line regexp. That is strange.\n");
+    exit(EXIT_FAILURE);
+  }
+
+  /* printf("%d\n", regexec(&leading_from_line_regexp, "From root@idiap.ch  Mon Apr 18 08:25:06 2011\n", 0, 0, 0)); */
+  /* exit(EXIT_SUCCESS); */
+
   paranoid = 0;
   action_index = 0;
   db_filename = 0;
@@ -974,5 +996,7 @@ int main(int argc, char **argv) {
   free(db_root_path);
   free(db_filename_list);
 
+  regfree(&leading_from_line_regexp);
+
   exit(EXIT_SUCCESS);
 }