Now handles multi-criteria search.
[mymail.git] / mymail.c
index 001d14d..a241a86 100644 (file)
--- a/mymail.c
+++ b/mymail.c
 #include <dirent.h>
 #include <regex.h>
 
+#define MYMAIL_DB_MAGIC_TOKEN "mymail_index_file"
 #define VERSION "0.1"
 
+#define MAX_NB_SEARCH_PATTERNS 10
+
 #define BUFFER_SIZE 65536
 
 struct parsable_field {
@@ -55,7 +58,9 @@ struct parsable_field {
 };
 
 char *db_filename;
-char *search_pattern;
+char *db_root_path;
+
+int multi_db_files;
 
 int paranoid;
 int action_index;
@@ -80,7 +85,7 @@ void *safe_malloc(size_t n) {
   void *p = malloc(n);
   if(!p && n != 0) {
     fprintf(stderr,
-            "mymail: can not allocate memory: %s\n", strerror(errno));
+            "mymail: cannot allocate memory: %s\n", strerror(errno));
     exit(EXIT_FAILURE);
   }
   return p;
@@ -88,104 +93,212 @@ void *safe_malloc(size_t n) {
 
 /*********************************************************************/
 
-void usage(FILE *out) {
+void print_version(FILE *out) {
   fprintf(out, "mymail version %s (%s)\n", VERSION, UNAME);
+}
+
+void print_usage(FILE *out) {
+  print_version(out);
   fprintf(out, "Written by Francois Fleuret <francois@fleuret.org>.\n");
   fprintf(out, "\n");
   fprintf(out, "Usage: mymail [options] [<filename1> [<filename2> ...]]\n");
   fprintf(out, "\n");
+  fprintf(out, " -h, --help\n");
+  fprintf(out, "         show this help\n");
+  fprintf(out, " -v, --version\n");
+  fprintf(out, "         print the version number\n");
+  fprintf(out, " -i, --index\n");
+  fprintf(out, "         index mails\n");
+  fprintf(out, " -d <db filename>, --db-file <db filename>\n");
+  fprintf(out, "         set the data-base file\n");
+  fprintf(out, " -s <search pattern>, --search <search pattern>\n");
+  fprintf(out, "         search for matching mails in the data-base file\n");
 }
 
 /*********************************************************************/
 
-void search_in_db(const char *search_name, const char *search_regexp_string,
+int ignore_entry(const char *name) {
+  return
+    /* strcmp(name, ".") == 0 || */
+    /* strcmp(name, "..") == 0 || */
+    (name[0] == '.' && name[1] != '/');
+}
+
+void search_in_db(int nb_search_patterns,
+                  char **search_name, char **search_regexp_string,
                   FILE *db_file) {
-  char raw_line[BUFFER_SIZE];
-  char current_mail_filename[BUFFER_SIZE];
+  int hits[MAX_NB_SEARCH_PATTERNS];
+  char raw_db_line[BUFFER_SIZE];
+  char raw_mbox_line[BUFFER_SIZE];
+  char current_mail_filename[PATH_MAX + 1];
   unsigned long int current_position_in_mail;
   char *name, *value;
-  regex_t regexp;
-  int already_written;
+  regex_t regexp[MAX_NB_SEARCH_PATTERNS];
+  int already_written, n;
 
-  if(regcomp(&regexp,
-             search_regexp_string,
-             REG_ICASE)) {
-    fprintf(stderr,
-            "mymail: Syntax error in regexp \"%s\" for field \"%s\".\n",
-            search_regexp_string,
-            search_name);
-    exit(EXIT_FAILURE);
+  for(n = 0; n < nb_search_patterns; n++) {
+    if(regcomp(&regexp[n],
+               search_regexp_string[n],
+               REG_ICASE)) {
+      fprintf(stderr,
+              "mymail: Syntax error in regexp \"%s\" for field \"%s\".\n",
+              search_regexp_string[n],
+              search_name[n]);
+      exit(EXIT_FAILURE);
+    }
   }
 
   current_position_in_mail = 0;
   already_written = 0;
 
-  while(fgets(raw_line, BUFFER_SIZE, db_file)) {
-    name = raw_line;
-    value = segment_next_field(raw_line);
+  for(n = 0; n < nb_search_patterns; n++) { hits[n] = 0; }
+
+  while(fgets(raw_db_line, BUFFER_SIZE, db_file)) {
+    name = raw_db_line;
+    value = segment_next_field(raw_db_line);
 
     if(strcmp("mail", name) == 0) {
-      char *position_in_file_string = value;
-      char *mail_filename = segment_next_field(value);
-      current_position_in_mail = atol(position_in_file_string);
-      strcpy(current_mail_filename, mail_filename);
-      remove_eof(current_mail_filename);
-      already_written = 0;
-    }
+      char *position_in_file_string;
+      char *mail_filename;
+
+      for(n = 0; n < nb_search_patterns && hits[n]; n++);
 
-    else if(!already_written) {
-      if(strcmp(search_name, name) == 0 && regexec(&regexp, value, 0, 0, 0) == 0) {
+      if(n == nb_search_patterns) {
         FILE *mail_file;
         mail_file = fopen(current_mail_filename, "r");
         if(!mail_file) {
-          fprintf(stderr, "mymail: Can not open '%s'.\n", current_mail_filename);
+          fprintf(stderr, "mymail: Cannot open mbox '%s'.\n", current_mail_filename);
           exit(EXIT_FAILURE);
         }
         fseek(mail_file, current_position_in_mail, SEEK_SET);
-        if(fgets(raw_line, BUFFER_SIZE, mail_file)) {
-          printf("%s", raw_line);
-          while(fgets(raw_line, BUFFER_SIZE, mail_file) &&
-                strncmp(raw_line, "From ", 5)) {
-            printf("%s", raw_line);
+        if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) {
+          printf("%s", raw_mbox_line);
+          while(fgets(raw_mbox_line, BUFFER_SIZE, mail_file) &&
+                strncmp(raw_mbox_line, "From ", 5)) {
+            printf("%s", raw_mbox_line);
           }
         }
         fclose(mail_file);
-        already_written = 1;
+      }
+
+      for(n = 0; n < nb_search_patterns; n++) { hits[n] = 0; }
+
+      position_in_file_string = value;
+      mail_filename = segment_next_field(value);
+      current_position_in_mail = atol(position_in_file_string);
+      strcpy(current_mail_filename, mail_filename);
+
+      remove_eof(current_mail_filename);
+      already_written = 0;
+    }
+
+    else {
+      for(n = 0; n < nb_search_patterns; n++) {
+        hits[n] |=
+          (strcmp(search_name[n], name) == 0 && regexec(&regexp[n], value, 0, 0, 0) == 0);
       }
     }
   }
 
-  regfree(&regexp);
+  for(n = 0; n < nb_search_patterns; n++) {
+    regfree(&regexp[n]);
+  }
+}
+
+void recursive_search_in_db(const char *entry_name,
+                            int nb_search_patterns,
+                            char **search_name, char **search_regexp_string) {
+  DIR *dir;
+  struct dirent *dir_e;
+  struct stat sb;
+  char raw_db_line[BUFFER_SIZE];
+  char subname[PATH_MAX + 1];
+
+  if(lstat(entry_name, &sb) != 0) {
+    fprintf(stderr,
+            "mymail: Cannot stat \"%s\": %s\n",
+            entry_name,
+            strerror(errno));
+    exit(EXIT_FAILURE);
+  }
+
+  dir = opendir(entry_name);
+
+  if(dir) {
+    while((dir_e = readdir(dir))) {
+      if(!ignore_entry(dir_e->d_name)) {
+        snprintf(subname, PATH_MAX, "%s/%s", entry_name, dir_e->d_name);
+        recursive_search_in_db(subname,
+                               nb_search_patterns,
+                               search_name, search_regexp_string);
+      }
+    }
+    closedir(dir);
+  } else {
+    const char *s = entry_name, *filename = entry_name;
+    while(*s) { if(*s == '/') { filename = s+1; } s++; }
+
+    if(strcmp(filename, db_filename) == 0) {
+      FILE *db_file = fopen(entry_name, "r");
+
+      if(!db_file) {
+        fprintf(stderr,
+                "mymail: Cannot open \"%s\" for reading: %s\n",
+                db_filename,
+                strerror(errno));
+        exit(EXIT_FAILURE);
+      }
+
+      if(fgets(raw_db_line, BUFFER_SIZE, db_file)) {
+        if(strncmp(raw_db_line, MYMAIL_DB_MAGIC_TOKEN, strlen(MYMAIL_DB_MAGIC_TOKEN))) {
+          fprintf(stderr,
+                  "mymail: Header line in '%s' does not match the mymail db format.\n",
+                  entry_name);
+          exit(EXIT_FAILURE);
+        }
+      } else {
+        fprintf(stderr,
+                "mymail: Cannot read the header line in '%s'.\n",
+                entry_name);
+        exit(EXIT_FAILURE);
+      }
+
+      search_in_db(nb_search_patterns, search_name, search_regexp_string,
+                   db_file);
+
+      fclose(db_file);
+    }
+  }
 }
 
 /*********************************************************************/
 
 void index_one_mbox_line(int nb_fields_to_parse, struct parsable_field *fields_to_parse,
-                         char *raw_line, FILE *db_file) {
+                         char *raw_mbox_line, FILE *db_file) {
   regmatch_t matches;
   int f;
   for(f = 0; f < nb_fields_to_parse; f++) {
-    if(regexec(&fields_to_parse[f].regexp, raw_line, 1, &matches, 0) == 0) {
+    if(regexec(&fields_to_parse[f].regexp, raw_mbox_line, 1, &matches, 0) == 0) {
       fprintf(db_file, "%s %s\n",
               fields_to_parse[f].name,
-              raw_line + matches.rm_eo);
+              raw_mbox_line + matches.rm_eo);
     }
   }
 }
 
-void index_mbox(const char *input_filename,
+void index_mbox(const char *mbox_filename,
                 int nb_fields_to_parse, struct parsable_field *fields_to_parse,
                 FILE *db_file) {
-  char raw_line[BUFFER_SIZE], full_line[BUFFER_SIZE];
+  char raw_mbox_line[BUFFER_SIZE], full_line[BUFFER_SIZE];
   char *end_of_full_line;
   FILE *file;
   int in_header, new_header;
   unsigned long int position_in_file;
 
-  file = fopen(input_filename, "r");
+  file = fopen(mbox_filename, "r");
 
   if(!file) {
-    fprintf(stderr, "mymail: Can not open '%s'.\n", input_filename);
+    fprintf(stderr, "mymail: Cannot open '%s'.\n", mbox_filename);
     if(paranoid) { exit(EXIT_FAILURE); }
     return;
   }
@@ -195,32 +308,31 @@ void index_mbox(const char *input_filename,
 
   position_in_file = 0;
   end_of_full_line = 0;
+  full_line[0] = '\0';
 
-  while(fgets(raw_line, BUFFER_SIZE, file)) {
-    if(strncmp(raw_line, "From ", 5) == 0) {
+  while(fgets(raw_mbox_line, BUFFER_SIZE, file)) {
+    if(strncmp(raw_mbox_line, "From ", 5) == 0) {
       if(in_header) {
         fprintf(stderr,
                 "Got a ^\"From \" in the header in %s:%lu.\n",
-                input_filename, position_in_file);
-        fprintf(stderr, "%s", raw_line);
+                mbox_filename, position_in_file);
+        fprintf(stderr, "%s", raw_mbox_line);
         if(paranoid) { exit(EXIT_FAILURE); }
       }
       in_header = 1;
       new_header = 1;
-    } else if(strncmp(raw_line, "\n", 1) == 0) {
+    } else if(strncmp(raw_mbox_line, "\n", 1) == 0) {
       if(in_header) { in_header = 0; }
     }
 
-    /* printf("PARSE %d %s", in_header, raw_line); */
-
     if(in_header) {
       if(new_header) {
-        fprintf(db_file, "mail %lu %s\n", position_in_file, input_filename);
+        fprintf(db_file, "mail %lu %s\n", position_in_file, mbox_filename);
         new_header = 0;
       }
 
-      if(raw_line[0] == ' ' || raw_line[0] == '\t') {
-        char *start = raw_line;
+      if(raw_mbox_line[0] == ' ' || raw_mbox_line[0] == '\t') {
+        char *start = raw_mbox_line;
         while(*start == ' ' || *start == '\t') start++;
         *(end_of_full_line++) = ' ';
         strcpy(end_of_full_line, start);
@@ -231,12 +343,12 @@ void index_mbox(const char *input_filename,
       }
 
       else {
-        /* if(!((raw_line[0] >= 'a' && raw_line[0] <= 'z') || */
-             /* (raw_line[0] >= 'A' && raw_line[0] <= 'Z'))) { */
+        /* if(!((raw_mbox_line[0] >= 'a' && raw_mbox_line[0] <= 'z') || */
+             /* (raw_mbox_line[0] >= 'A' && raw_mbox_line[0] <= 'Z'))) { */
           /* fprintf(stderr, */
                   /* "Header line syntax error %s:%lu.\n", */
-                  /* input_filename, position_in_file); */
-          /* fprintf(stderr, "%s", raw_line); */
+                  /* mbox_filename, position_in_file); */
+          /* fprintf(stderr, "%s", raw_mbox_line); */
         /* } */
 
         if(full_line[0]) {
@@ -244,7 +356,7 @@ void index_mbox(const char *input_filename,
         }
 
         end_of_full_line = full_line;
-        strcpy(end_of_full_line, raw_line);
+        strcpy(end_of_full_line, raw_mbox_line);
         while(*end_of_full_line && *end_of_full_line != '\n') {
           end_of_full_line++;
         }
@@ -253,48 +365,40 @@ void index_mbox(const char *input_filename,
 
     }
 
-    position_in_file += strlen(raw_line);
+    position_in_file += strlen(raw_mbox_line);
   }
 
   fclose(file);
 }
 
-int ignore_entry(const char *name) {
-  return
-    /* strcmp(name, ".") == 0 || */
-    /* strcmp(name, "..") == 0 || */
-    (name[0] == '.' && name[1] != '/');
-}
-
-void process_entry(const char *dir_name,
-                   int nb_fields_to_parse, struct parsable_field *fields_to_parse,
-                   FILE *db_file) {
+void recursive_index_mbox(FILE *db_file,
+                          const char *entry_name,
+                          int nb_fields_to_parse, struct parsable_field *fields_to_parse) {
   DIR *dir;
   struct dirent *dir_e;
   struct stat sb;
   char subname[PATH_MAX + 1];
 
-  if(lstat(dir_name, &sb) != 0) {
+  if(lstat(entry_name, &sb) != 0) {
     fprintf(stderr,
-            "mymail: Can not stat \"%s\": %s\n",
-            dir_name,
+            "mymail: Cannot stat \"%s\": %s\n",
+            entry_name,
             strerror(errno));
     exit(EXIT_FAILURE);
   }
 
-  dir = opendir(dir_name);
+  dir = opendir(entry_name);
 
   if(dir) {
-    printf("Processing directory '%s'.\n", dir_name);
     while((dir_e = readdir(dir))) {
       if(!ignore_entry(dir_e->d_name)) {
-        snprintf(subname, PATH_MAX, "%s/%s", dir_name, dir_e->d_name);
-        process_entry(subname, nb_fields_to_parse, fields_to_parse, db_file);
+        snprintf(subname, PATH_MAX, "%s/%s", entry_name, dir_e->d_name);
+        recursive_index_mbox(db_file, subname, nb_fields_to_parse, fields_to_parse);
       }
     }
     closedir(dir);
   } else {
-    index_mbox(dir_name, nb_fields_to_parse, fields_to_parse, db_file);
+    index_mbox(entry_name, nb_fields_to_parse, fields_to_parse, db_file);
   }
 }
 
@@ -308,9 +412,12 @@ enum {
 
 static struct option long_options[] = {
   { "help", no_argument, 0, 'h' },
-  { "db-file", 1, 0, 'p' },
+  { "version", no_argument, 0, 'v' },
+  { "db-file", 1, 0, 'd' },
+  { "db-root", 1, 0, 'p' },
   { "search-pattern", 1, 0, 's' },
   { "index", 0, 0, 'i' },
+  { "multi-db-files", 0, 0, 'm' },
   { 0, 0, 0, 0 }
 };
 
@@ -326,21 +433,36 @@ static struct parsable_field fields_to_parse[] = {
     "^\\([Tt][Oo]\\|[Cc][Cc]\\|[Bb][Cc][Cc]\\): *",
     { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
   },
+
+  {
+    "subj",
+    "^[Ss][Uu][Bb][Jj][Ee][Cc][Tt]: *",
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+  },
+
 };
 
+/*********************************************************************/
+
 int main(int argc, char **argv) {
   int error = 0, show_help = 0;
   const int nb_fields_to_parse = sizeof(fields_to_parse) / sizeof(struct parsable_field);
   char c;
   int f;
+  int nb_search_patterns;
+  char *search_pattern[MAX_NB_SEARCH_PATTERNS];
 
   paranoid = 0;
   action_index = 0;
-  search_pattern = 0;
+  db_filename = 0;
+  db_root_path = 0;
+  multi_db_files = 0;
 
   setlocale(LC_ALL, "");
 
-  while ((c = getopt_long(argc, argv, "hip:s:",
+  nb_search_patterns = 0;
+
+  while ((c = getopt_long(argc, argv, "hvimp:s:d:p:",
                           long_options, NULL)) != -1) {
 
     switch(c) {
@@ -349,21 +471,32 @@ int main(int argc, char **argv) {
       show_help = 1;
       break;
 
+    case 'v':
+      print_version(stdout);
+      break;
+
     case 'i':
       action_index = 1;
       break;
 
-    case 'p':
+    case 'm':
+      multi_db_files = 1;
+      break;
+
+    case 'd':
       db_filename = strdup(optarg);
-      printf("db_filename=\"%s\"\n", db_filename);
+      break;
+
+    case 'p':
+      db_root_path = strdup(optarg);
       break;
 
     case 's':
-      if(search_pattern) {
-        fprintf(stderr, "mymail: Search pattern already defined.\n");
+      if(nb_search_patterns == MAX_NB_SEARCH_PATTERNS) {
+        fprintf(stderr, "mymail: Too many search patterns.\n");
         exit(EXIT_FAILURE);
       }
-      search_pattern = strdup(optarg);
+      search_pattern[nb_search_patterns++] = strdup(optarg);
       break;
 
     default:
@@ -374,25 +507,50 @@ int main(int argc, char **argv) {
 
   if(!db_filename) {
     char *default_db_filename = getenv("MYMAIL_DB_FILE");
-    if(!default_db_filename) { default_db_filename = "/tmp/mymail.db"; }
+
+    if(!default_db_filename) {
+      if(multi_db_files) {
+        default_db_filename = "mymail.db";
+      } else {
+        default_db_filename = "/tmp/mymail.db";
+      }
+    }
+
     db_filename = strdup(default_db_filename);
   }
 
+  if(!db_root_path) {
+    char *default_db_root_path = getenv("MYMAIL_DB_ROOT");
+
+    if(!default_db_root_path) {
+      if(multi_db_files) {
+        default_db_root_path = "mymail.db";
+      } else {
+        default_db_root_path = "/tmp/mymail.db";
+      }
+    }
+
+    db_root_path = strdup(default_db_root_path);
+  }
+
   if(error) {
-    usage(stderr);
+    print_usage(stderr);
     exit(EXIT_FAILURE);
   }
 
   if(show_help) {
-    usage(stdout);
+    print_usage(stdout);
     exit(EXIT_SUCCESS);
   }
 
   if(action_index) {
-    FILE *db_file = fopen(db_filename, "w");
+    FILE *db_file;
+
+    db_file = fopen(db_filename, "w");
+
     if(!db_file) {
       fprintf(stderr,
-              "mymail: Can not open \"%s\" for writing: %s\n",
+              "mymail: Cannot open \"%s\" for writing: %s\n",
               db_filename,
               strerror(errno));
       exit(EXIT_FAILURE);
@@ -410,9 +568,12 @@ int main(int argc, char **argv) {
       }
     }
 
+    fprintf(db_file, "%s version_%s raw version\n", MYMAIL_DB_MAGIC_TOKEN, VERSION);
+
     while(optind < argc) {
-      process_entry(argv[optind],
-                    nb_fields_to_parse, fields_to_parse, db_file);
+      recursive_index_mbox(db_file,
+                           argv[optind],
+                           nb_fields_to_parse, fields_to_parse);
       optind++;
     }
 
@@ -424,41 +585,34 @@ int main(int argc, char **argv) {
   }
 
   else {
-    if(search_pattern) {
-      FILE *db_file;
-      char *search_name;
-      char *search_regexp_string;
-      search_name = search_pattern;
-      search_regexp_string = segment_next_field(search_pattern);
-      if(!*search_regexp_string) {
-        fprintf(stderr,
-                "Syntax error in the search pattern.\n");
-        exit(EXIT_FAILURE);
-      }
 
-      /* printf("Starting search in %s for field \"%s\" matching \"%s\".\n", */
-      /* db_filename, */
-      /* search_name, */
-      /* search_regexp_string); */
+    if(nb_search_patterns > 0) {
+      char *search_name[MAX_NB_SEARCH_PATTERNS];
+      char *search_regexp_string[MAX_NB_SEARCH_PATTERNS];
+      int n;
 
-      db_file = fopen(db_filename, "r");
+      for(n = 0; n < nb_search_patterns; n++) {
+        search_name[n] = search_pattern[n];
+        search_regexp_string[n] = segment_next_field(search_pattern[n]);
+      }
 
-      if(!db_file) {
+      if(!*search_regexp_string) {
         fprintf(stderr,
-                "mymail: Can not open \"%s\" for reading: %s\n",
-                db_filename,
-                strerror(errno));
+                "Syntax error in the search pattern.\n");
         exit(EXIT_FAILURE);
       }
 
-      search_in_db(search_name, search_regexp_string, db_file);
+      recursive_search_in_db(db_root_path,
+                             nb_search_patterns, search_name, search_regexp_string);
 
-      fclose(db_file);
-      free(search_pattern);
+      for(n = 0; n < nb_search_patterns; n++) {
+        free(search_pattern[n]);
+      }
     }
   }
 
   free(db_filename);
+  free(db_root_path);
 
   exit(EXIT_SUCCESS);
 }