Now deals properly with multi-lines header lines. Does not deal with multipart mails.
authorFrancois Fleuret <francois@fleuret.org>
Mon, 28 Jan 2013 14:04:51 +0000 (15:04 +0100)
committerFrancois Fleuret <francois@fleuret.org>
Mon, 28 Jan 2013 14:04:51 +0000 (15:04 +0100)
mymail.c

index c21be1c..125366a 100644 (file)
--- a/mymail.c
+++ b/mymail.c
@@ -160,10 +160,24 @@ void search_in_db(const char *search_name, const char *search_regexp_string,
 
 /*********************************************************************/
 
+void index_one_mbox_line(int nb_fields_to_parse, struct parsable_field *fields_to_parse,
+                         char *raw_line, FILE *db_file) {
+  regmatch_t matches;
+  int f;
+  for(f = 0; f < nb_fields_to_parse; f++) {
+    if(regexec(&fields_to_parse[f].regexp, raw_line, 1, &matches, 0) == 0) {
+      fprintf(db_file, "%s %s\n",
+              fields_to_parse[f].name,
+              raw_line + matches.rm_eo);
+    }
+  }
+}
+
 void index_mbox(const char *input_filename,
                 int nb_fields_to_parse, struct parsable_field *fields_to_parse,
                 FILE *db_file) {
-  char raw_line[BUFFER_SIZE];
+  char raw_line[BUFFER_SIZE], full_line[BUFFER_SIZE];
+  char *end_of_full_line;
   FILE *file;
   int in_header, new_header;
   unsigned long int position_in_file;
@@ -196,20 +210,46 @@ void index_mbox(const char *input_filename,
       if(in_header) { in_header = 0; }
     }
 
+    /* printf("PARSE %d %s", in_header, raw_line); */
+
     if(in_header) {
-      int f;
-      regmatch_t matches;
       if(new_header) {
         fprintf(db_file, "mail %lu %s\n", position_in_file, input_filename);
         new_header = 0;
       }
-      for(f = 0; f < nb_fields_to_parse; f++) {
-        if(regexec(&fields_to_parse[f].regexp, raw_line, 1, &matches, 0) == 0) {
-          fprintf(db_file, "%s %s",
-                  fields_to_parse[f].name,
-                  raw_line + matches.rm_eo);
+
+      if(raw_line[0] == ' ' || raw_line[0] == '\t') {
+        char *start = raw_line;
+        while(*start == ' ' || *start == '\t') start++;
+        *(end_of_full_line++) = ' ';
+        strcpy(end_of_full_line, start);
+        while(*end_of_full_line && *end_of_full_line != '\n') {
+          end_of_full_line++;
         }
+        *end_of_full_line = '\0';
       }
+
+      else {
+        /* if(!((raw_line[0] >= 'a' && raw_line[0] <= 'z') || */
+             /* (raw_line[0] >= 'A' && raw_line[0] <= 'Z'))) { */
+          /* fprintf(stderr, */
+                  /* "Header line syntax error %s:%lu.\n", */
+                  /* input_filename, position_in_file); */
+          /* fprintf(stderr, "%s", raw_line); */
+        /* } */
+
+        if(full_line[0]) {
+          index_one_mbox_line(nb_fields_to_parse, fields_to_parse, full_line, db_file);
+        }
+
+        end_of_full_line = full_line;
+        strcpy(end_of_full_line, raw_line);
+        while(*end_of_full_line && *end_of_full_line != '\n') {
+          end_of_full_line++;
+        }
+        *end_of_full_line = '\0';
+      }
+
     }
 
     position_in_file += strlen(raw_line);