/*********************************************************************/
+void index_one_mbox_line(int nb_fields_to_parse, struct parsable_field *fields_to_parse,
+ char *raw_line, FILE *db_file) {
+ regmatch_t matches;
+ int f;
+ for(f = 0; f < nb_fields_to_parse; f++) {
+ if(regexec(&fields_to_parse[f].regexp, raw_line, 1, &matches, 0) == 0) {
+ fprintf(db_file, "%s %s\n",
+ fields_to_parse[f].name,
+ raw_line + matches.rm_eo);
+ }
+ }
+}
+
void index_mbox(const char *input_filename,
int nb_fields_to_parse, struct parsable_field *fields_to_parse,
FILE *db_file) {
- char raw_line[BUFFER_SIZE];
+ char raw_line[BUFFER_SIZE], full_line[BUFFER_SIZE];
+ char *end_of_full_line;
FILE *file;
int in_header, new_header;
unsigned long int position_in_file;
if(in_header) { in_header = 0; }
}
+ /* printf("PARSE %d %s", in_header, raw_line); */
+
if(in_header) {
- int f;
- regmatch_t matches;
if(new_header) {
fprintf(db_file, "mail %lu %s\n", position_in_file, input_filename);
new_header = 0;
}
- for(f = 0; f < nb_fields_to_parse; f++) {
- if(regexec(&fields_to_parse[f].regexp, raw_line, 1, &matches, 0) == 0) {
- fprintf(db_file, "%s %s",
- fields_to_parse[f].name,
- raw_line + matches.rm_eo);
+
+ if(raw_line[0] == ' ' || raw_line[0] == '\t') {
+ char *start = raw_line;
+ while(*start == ' ' || *start == '\t') start++;
+ *(end_of_full_line++) = ' ';
+ strcpy(end_of_full_line, start);
+ while(*end_of_full_line && *end_of_full_line != '\n') {
+ end_of_full_line++;
}
+ *end_of_full_line = '\0';
}
+
+ else {
+ /* if(!((raw_line[0] >= 'a' && raw_line[0] <= 'z') || */
+ /* (raw_line[0] >= 'A' && raw_line[0] <= 'Z'))) { */
+ /* fprintf(stderr, */
+ /* "Header line syntax error %s:%lu.\n", */
+ /* input_filename, position_in_file); */
+ /* fprintf(stderr, "%s", raw_line); */
+ /* } */
+
+ if(full_line[0]) {
+ index_one_mbox_line(nb_fields_to_parse, fields_to_parse, full_line, db_file);
+ }
+
+ end_of_full_line = full_line;
+ strcpy(end_of_full_line, raw_line);
+ while(*end_of_full_line && *end_of_full_line != '\n') {
+ end_of_full_line++;
+ }
+ *end_of_full_line = '\0';
+ }
+
}
position_in_file += strlen(raw_line);