Added a regexp-based check for the "leading From lines". This sucks horribly.
[mymail.git] / mymail.c
index 03ecfb8..24b2aa0 100644 (file)
--- a/mymail.c
+++ b/mymail.c
@@ -43,6 +43,7 @@
 #include <limits.h>
 #include <dirent.h>
 #include <regex.h>
+#include <time.h>
 
 #define MYMAIL_DB_MAGIC_TOKEN "mymail_index_file"
 #define VERSION "0.9.1"
 
 #define BUFFER_SIZE 65536
 
+regex_t leading_from_line_regexp;
+
 char *db_filename;
 char *db_filename_regexp_string;
 char *db_root_path;
 char *db_filename_list;
+char output_filename[PATH_MAX + 1];
 
 int paranoid;
 int action_index;
+int quiet;
+
+time_t being_today;
 
 /********************************************************************/
 
 enum {
   ID_MAIL = 0,
+  ID_LEADING_LINE,
   ID_FROM,
   ID_TO,
   ID_SUBJECT,
   ID_DATE,
   ID_PARTICIPANT,
   ID_BODY,
+  ID_INTERVAL,
   MAX_ID
 };
 
 static char *field_names[] = {
   "mail",
+  "lead",
   "from",
   "to",
   "subject",
   "date",
   "part",
-  "body"
+  "body",
+  "interval"
 };
 
 /********************************************************************/
@@ -88,6 +99,7 @@ struct search_condition {
   int field_id;
   int negation;
   regex_t regexp;
+  time_t interval_start, interval_stop;
 };
 
 /********************************************************************/
@@ -99,9 +111,15 @@ struct parsable_field {
 };
 
 static struct parsable_field fields_to_parse[] = {
+  {
+    ID_LEADING_LINE,
+    "^From ",
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+  },
+
   {
     ID_FROM,
-    "^\\(From \\|[Ff][Rr][Oo][Mm]:\\|[R][r][E][e][P][p][L][l][Y][y]-[T][t][O][o]:\\)",
+    "^\\([Ff][Rr][Oo][Mm]:\\|[Rr][Ee][Pp][Ll][Yy]-[Tt][Oo]:\\|[Ss][Ee][Nn][Dd][Ee][Rr]:\\)",
     { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
   },
 
@@ -173,6 +191,8 @@ void print_usage(FILE *out) {
   fprintf(out, "         show this help\n");
   fprintf(out, " -v, --version\n");
   fprintf(out, "         print the version number\n");
+  fprintf(out, " -q, --quiet\n");
+  fprintf(out, "         do not print information during search\n");
   fprintf(out, " -p <db filename pattern>, --db-pattern <db filename pattern>\n");
   fprintf(out, "         set the db filename pattern for recursive search\n");
   fprintf(out, " -r <db root path>, --db-root <db root path>\n");
@@ -185,6 +205,8 @@ void print_usage(FILE *out) {
   fprintf(out, "         set the db filename for indexing\n");
   fprintf(out, " -i, --index\n");
   fprintf(out, "         index mails\n");
+  fprintf(out, " -o <output filename>, --output <output filename>\n");
+  fprintf(out, "         set the result file, use stdout if unset\n");
 }
 
 /*********************************************************************/
@@ -196,18 +218,57 @@ int ignore_entry(const char *name) {
     (name[0] == '.' && name[1] != '/');
 }
 
+int is_a_leading_from_line(char *s) {
+  return strncmp(s, "From ", 5) == 0 &&
+    regexec(&leading_from_line_regexp, s, 0, 0, 0) == 0;
+}
+
 int mbox_line_match_search(struct search_condition *condition,
                            int mbox_id, char *mbox_value) {
-  return
-    (condition->field_id == mbox_id ||
-     (condition->field_id == ID_PARTICIPANT && (mbox_id == ID_FROM || mbox_id == ID_TO)))
-    &&
-    regexec(&condition->regexp, mbox_value, 0, 0, 0) == 0;
+
+  if(condition->field_id == ID_INTERVAL) {
+    if(mbox_id == ID_LEADING_LINE) {
+      char *c;
+      time_t t;
+      struct tm tm;
+
+      c = mbox_value;
+      while(*c && *c != ' ') c++; while(*c && *c == ' ') c++;
+      strptime(c, "%a %b %e %k:%M:%S %Y", &tm);
+      t = mktime(&tm);
+
+      return (t >= condition->interval_start &&
+              (condition->interval_stop == 0 ||
+               t <= condition->interval_stop));
+    } else {
+      return 0;
+    }
+  } else {
+    return
+      (
+
+       (condition->field_id == mbox_id)
+
+       ||
+
+       (condition->field_id == ID_PARTICIPANT && (mbox_id == ID_LEADING_LINE ||
+                                                  mbox_id == ID_FROM ||
+                                                  mbox_id == ID_TO))
+       ||
+
+       (condition->field_id == ID_FROM && mbox_id == ID_LEADING_LINE)
+
+       )
+      &&
+      regexec(&condition->regexp, mbox_value, 0, 0, 0) == 0;
+  }
 }
 
-void search_in_db(int nb_search_conditions,
+void search_in_db(FILE *db_file,
+                  int nb_search_conditions,
                   struct search_condition *search_conditions,
-                  FILE *db_file) {
+                  FILE *output_file) {
+
   int hits[MAX_NB_SEARCH_CONDITIONS];
   char raw_db_line[BUFFER_SIZE];
   char raw_mbox_line[BUFFER_SIZE];
@@ -291,7 +352,8 @@ void search_in_db(int nb_search_conditions,
                 }
 
                 if(!fgets(raw_mbox_line, BUFFER_SIZE, mail_file) ||
-                   (last_mbox_line_was_empty && strncmp(raw_mbox_line, "From ", 5) == 0))
+                   (last_mbox_line_was_empty &&
+                    is_a_leading_from_line(raw_mbox_line)))
                   break;
               }
             }
@@ -315,13 +377,15 @@ void search_in_db(int nb_search_conditions,
 
             if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) {
               last_mbox_line_was_empty = 1;
-              printf("%s", raw_mbox_line);
+              fprintf(output_file, "%s", raw_mbox_line);
               while(1) {
                 if(!fgets(raw_mbox_line, BUFFER_SIZE, mail_file) ||
-                   (last_mbox_line_was_empty && strncmp(raw_mbox_line, "From ", 5) == 0))
+                   (last_mbox_line_was_empty &&
+                    is_a_leading_from_line(raw_mbox_line))
+                   )
                   break;
                 last_mbox_line_was_empty = (raw_mbox_line[0] == '\n');
-                printf("%s", raw_mbox_line);
+                fprintf(output_file, "%s", raw_mbox_line);
               }
             }
 
@@ -358,7 +422,8 @@ void search_in_db(int nb_search_conditions,
 
 void recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp,
                             int nb_search_conditions,
-                            struct search_condition *search_conditions) {
+                            struct search_condition *search_conditions,
+                            FILE *output_file) {
   DIR *dir;
   struct dirent *dir_e;
   struct stat sb;
@@ -380,7 +445,8 @@ void recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp,
       if(!ignore_entry(dir_e->d_name)) {
         snprintf(subname, PATH_MAX, "%s/%s", entry_name, dir_e->d_name);
         recursive_search_in_db(subname, db_filename_regexp,
-                               nb_search_conditions, search_conditions);
+                               nb_search_conditions, search_conditions,
+                               output_file);
       }
     }
     closedir(dir);
@@ -393,6 +459,11 @@ void recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp,
     if(regexec(db_filename_regexp, filename, 0, 0, 0) == 0) {
       FILE *db_file = fopen(entry_name, "r");
 
+      if(!quiet) {
+        printf("Searching in '%s' ... ", entry_name);
+        fflush(stdout);
+      }
+
       if(!db_file) {
         fprintf(stderr,
                 "mymail: Cannot open \"%s\" for reading: %s\n",
@@ -415,9 +486,14 @@ void recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp,
         exit(EXIT_FAILURE);
       }
 
-      search_in_db(nb_search_conditions, search_conditions, db_file);
+      search_in_db(db_file, nb_search_conditions, search_conditions, output_file);
 
       fclose(db_file);
+
+      if(!quiet) {
+        printf("done.\n");
+        fflush(stdout);
+      }
     }
   }
 }
@@ -463,7 +539,8 @@ void index_mbox(const char *mbox_filename,
   last_mbox_line_was_empty = 1;
 
   while(fgets(raw_mbox_line, BUFFER_SIZE, file)) {
-    if(last_mbox_line_was_empty && strncmp(raw_mbox_line, "From ", 5) == 0) {
+    if(last_mbox_line_was_empty &&
+       is_a_leading_from_line(raw_mbox_line)) {
       if(in_header) {
         fprintf(stderr,
                 "Got a ^\"From \" in the header in %s:%lu.\n",
@@ -569,12 +646,14 @@ enum {
 static struct option long_options[] = {
   { "help", no_argument, 0, 'h' },
   { "version", no_argument, 0, 'v' },
+  { "quiet", no_argument, 0, 'q' },
   { "db-file", 1, 0, 'd' },
   { "db-pattern", 1, 0, 'p' },
   { "db-root", 1, 0, 'r' },
   { "db-list", 1, 0, 'l' },
   { "search", 1, 0, 's' },
   { "index", 0, 0, 'i' },
+  { "output", 1, 0, 'o' },
   { 0, 0, 0, 0 }
 };
 
@@ -587,22 +666,31 @@ int main(int argc, char **argv) {
   int f;
   int nb_search_conditions;
   char *search_condition_strings[MAX_NB_SEARCH_CONDITIONS];
+  FILE *output_file;
 
-  /* for(f = 0; f < argc; f++) { */
-  /* printf("arg %d \"%s\"\n", f, argv[f]); */
-  /* } */
+  if(regcomp(&leading_from_line_regexp,
+             "^From [^ ]*@[^ ]*  \\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\) \\(Jan\\|Feb\\|Mar\\|Apr\\|Jun\\|Jul\\|Aug\\|Sep\\|Oct\\|Nov\\|Dec\\) [ 123][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]\n$",
+             0)) {
+    fprintf(stderr,
+            "mymail: Cannot compile leading from line regexp. That is strange.\n");
+    exit(EXIT_FAILURE);
+  }
+
+  /* printf("%d\n", regexec(&leading_from_line_regexp, "From root@idiap.ch  Mon Apr 18 08:25:06 2011\n", 0, 0, 0)); */
+  /* exit(EXIT_SUCCESS); */
 
   paranoid = 0;
   action_index = 0;
   db_filename = 0;
   db_root_path = 0;
   db_filename_list = 0;
+  quiet = 0;
 
   setlocale(LC_ALL, "");
 
   nb_search_conditions = 0;
 
-  while ((c = getopt_long(argc, argv, "hvip:s:d:r:l:",
+  while ((c = getopt_long(argc, argv, "hvqip:s:d:r:l:o:",
                           long_options, NULL)) != -1) {
 
     switch(c) {
@@ -615,6 +703,10 @@ int main(int argc, char **argv) {
       print_version(stdout);
       break;
 
+    case 'q':
+      quiet = 1;
+      break;
+
     case 'i':
       action_index = 1;
       break;
@@ -627,6 +719,10 @@ int main(int argc, char **argv) {
       db_filename_regexp_string = strdup(optarg);
       break;
 
+    case 'o':
+      strncpy(output_filename, optarg, PATH_MAX);
+      break;
+
     case 'r':
       db_root_path = strdup(optarg);
       break;
@@ -685,6 +781,21 @@ int main(int argc, char **argv) {
     }
   }
 
+  if(output_filename[0]) {
+    output_file = fopen(output_filename, "w");
+
+    if(!output_file) {
+      fprintf(stderr,
+              "mymail: Cannot open result file \"%s\" for writing: %s\n",
+              output_filename,
+              strerror(errno));
+      exit(EXIT_FAILURE);
+    }
+  } else {
+    output_file = stdout;
+    quiet = 1;
+  }
+
   if(error) {
     print_usage(stderr);
     exit(EXIT_FAILURE);
@@ -729,6 +840,7 @@ int main(int argc, char **argv) {
       optind++;
     }
 
+    fflush(db_file);
     fclose(db_file);
 
     for(f = 0; f < nb_fields_to_parse; f++) {
@@ -745,7 +857,6 @@ int main(int argc, char **argv) {
 
       for(n = 0; n < nb_search_conditions; n++) {
         search_field = search_condition_strings[n];
-        search_regexp_string = segment_next_field(search_condition_strings[n]);
 
         if(search_field[0] == '!') {
           search_field++;
@@ -754,28 +865,45 @@ int main(int argc, char **argv) {
           search_conditions[n].negation = 0;
         }
 
-        search_conditions[n].field_id = -1;
-        for(m = 0; (m < MAX_ID) && search_conditions[n].field_id == -1; m++) {
-          if(strncmp(field_names[m], search_field, strlen(search_field)) == 0) {
-            search_conditions[n].field_id = m;
-          }
+        if(strcmp(search_field, "today") == 0) {
+          search_conditions[n].field_id = ID_INTERVAL;
+          search_conditions[n].interval_start = time(0) - 3600 * 24;
+          search_conditions[n].interval_stop = 0;
         }
 
-        if(search_conditions[n].field_id == -1) {
-          fprintf(stderr,
-                  "mymail: Syntax error in field name \"%s\".\n",
-                  search_field);
-          exit(EXIT_FAILURE);
+        else if(strcmp(search_field, "yesterday") == 0) {
+          search_conditions[n].field_id = ID_INTERVAL;
+          search_conditions[n].interval_start = time(0) - 2 * 3600 * 24;
+          search_conditions[n].interval_stop = time(0) - 3600 * 24;
         }
 
-        if(regcomp(&search_conditions[n].regexp,
-                   search_regexp_string,
-                   REG_ICASE)) {
-          fprintf(stderr,
-                  "mymail: Syntax error in regexp \"%s\" for field \"%s\".\n",
-                  search_regexp_string,
-                  field_names[search_conditions[n].field_id]);
-          exit(EXIT_FAILURE);
+        else {
+          search_regexp_string = segment_next_field(search_condition_strings[n]);
+
+          search_conditions[n].field_id = -1;
+
+          for(m = 0; (m < MAX_ID) && search_conditions[n].field_id == -1; m++) {
+            if(strncmp(field_names[m], search_field, strlen(search_field)) == 0) {
+              search_conditions[n].field_id = m;
+            }
+          }
+
+          if(search_conditions[n].field_id == -1) {
+            fprintf(stderr,
+                    "mymail: Syntax error in field name \"%s\".\n",
+                    search_field);
+            exit(EXIT_FAILURE);
+          }
+
+          if(regcomp(&search_conditions[n].regexp,
+                     search_regexp_string,
+                     REG_ICASE)) {
+            fprintf(stderr,
+                    "mymail: Syntax error in regexp \"%s\" for field \"%s\".\n",
+                    search_regexp_string,
+                    field_names[search_conditions[n].field_id]);
+            exit(EXIT_FAILURE);
+          }
         }
       }
 
@@ -793,7 +921,8 @@ int main(int argc, char **argv) {
         }
 
         recursive_search_in_db(db_root_path, &db_filename_regexp,
-                               nb_search_conditions, search_conditions);
+                               nb_search_conditions, search_conditions,
+                               output_file);
 
         regfree(&db_filename_regexp);
       }
@@ -824,7 +953,7 @@ int main(int argc, char **argv) {
               exit(EXIT_FAILURE);
             }
 
-            search_in_db(nb_search_conditions, search_conditions, db_file);
+            search_in_db(db_file, nb_search_conditions, search_conditions, output_file);
 
             fclose(db_file);
           }
@@ -844,7 +973,7 @@ int main(int argc, char **argv) {
           exit(EXIT_FAILURE);
         }
 
-        search_in_db(nb_search_conditions, search_conditions, db_file);
+        search_in_db(db_file, nb_search_conditions, search_conditions, output_file);
 
         fclose(db_file);
         optind++;
@@ -857,10 +986,17 @@ int main(int argc, char **argv) {
     }
   }
 
+  if(output_file != stdout) {
+    fflush(output_file);
+    fclose(output_file);
+  }
+
   free(db_filename);
   free(db_filename_regexp_string);
   free(db_root_path);
   free(db_filename_list);
 
+  regfree(&leading_from_line_regexp);
+
   exit(EXIT_SUCCESS);
 }