*
*/
+/*
+
+ This command is a dumb mail indexer. It can either (1) scan
+ directories containing mbox files, and create a db file containing
+ for each mail a list of fields computed from the header, or (2)
+ read such a db file and get all the mails matching regexp-defined
+ conditions on the fields.
+
+ It is low-tech, simple, light and fast.
+
+*/
+
#define _GNU_SOURCE
#include <stdio.h>
#include <dirent.h>
#include <regex.h>
-#define VERSION "0.1"
+#define MYMAIL_DB_MAGIC_TOKEN "mymail_index_file"
+#define VERSION "0.9"
+
+#define MAX_NB_SEARCH_PATTERNS 10
+
+#define BUFFER_SIZE 65536
-#define BUFFER_SIZE 16384
+char *db_filename;
+char *db_root_path;
+
+int paranoid;
+int action_index;
+
+/********************************************************************/
+
+enum {
+ ID_MAIL = 0,
+ ID_FROM,
+ ID_DEST,
+ ID_SUBJECT,
+ ID_PARTICIPANT,
+ MAX_ID
+};
+
+static char *field_names[] = {
+ "mail",
+ "from",
+ "dest",
+ "subject",
+ "part"
+};
+
+/********************************************************************/
+
+struct search_request {
+ int field_id;
+ int negation;
+ regex_t regexp;
+};
+
+/********************************************************************/
struct parsable_field {
- char *name;
+ int id;
char *regexp_string;
regex_t regexp;
- FILE *db_file;
};
-char *db_filename_prefix;
+static struct parsable_field fields_to_parse[] = {
+ {
+ ID_FROM,
+ "^\\(From \\|[Ff][Rr][Oo][Mm]:\\|[R][r][E][e][P][p][L][l][Y][y]-[T][t][O][o]:\\)",
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+ },
+
+ {
+ ID_DEST,
+ "^\\([Tt][Oo]\\|[Cc][Cc]\\|[Bb][Cc][Cc]\\): ",
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+ },
+
+ {
+ ID_SUBJECT,
+ "^[Ss][Uu][Bb][Jj][Ee][Cc][Tt]: ",
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+ },
+
+};
+
+/********************************************************************/
+
+int xor(int a, int b) {
+ return (a && !b) || (!a && b);
+}
+
+char *segment_next_field(char *current) {
+ while(*current && *current != ' ') current++;
+ *current = '\0'; current++;
+ while(*current && *current == ' ') current++;
+ return current;
+}
+
+void remove_eof(char *c) {
+ while(*c && *c != '\n' && *c != '\r') c++;
+ *c = '\0';
+}
/********************************************************************/
void *p = malloc(n);
if(!p && n != 0) {
fprintf(stderr,
- "mymail: can not allocate memory: %s\n", strerror(errno));
+ "mymail: cannot allocate memory: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
return p;
/*********************************************************************/
-void usage(FILE *out) {
+void print_version(FILE *out) {
fprintf(out, "mymail version %s (%s)\n", VERSION, UNAME);
+}
+
+void print_usage(FILE *out) {
+ print_version(out);
fprintf(out, "Written by Francois Fleuret <francois@fleuret.org>.\n");
fprintf(out, "\n");
- fprintf(out, "Usage: mymail [options] [<filename1> [<filename2> ...]]\n");
+ fprintf(out, "Usage: mymail [options] [<mbox dir1> [<mbox dir2> ...]]\n");
fprintf(out, "\n");
+ fprintf(out, " -h, --help\n");
+ fprintf(out, " show this help\n");
+ fprintf(out, " -v, --version\n");
+ fprintf(out, " print the version number\n");
+ fprintf(out, " -i, --index\n");
+ fprintf(out, " index mails\n");
+ fprintf(out, " -s <search pattern>, --search <search pattern>\n");
+ fprintf(out, " search for matching mails in the data-base file\n");
+ fprintf(out, " -d <db filename>, --db-file <db filename>\n");
+ fprintf(out, " set the data-base file\n");
+ fprintf(out, " -r <db root path>, --db-root <db root path>\n");
+ fprintf(out, " set the data-base root path for recursive search\n");
+}
+
+/*********************************************************************/
+
+int ignore_entry(const char *name) {
+ return
+ /* strcmp(name, ".") == 0 || */
+ /* strcmp(name, "..") == 0 || */
+ (name[0] == '.' && name[1] != '/');
+}
+
+int mbox_line_match_search(struct search_request *request,
+ int mbox_id, char *mbox_value) {
+ return
+ (request->field_id == mbox_id ||
+ (request->field_id == ID_PARTICIPANT && (mbox_id == ID_FROM || mbox_id == ID_DEST)))
+ &&
+ regexec(&request->regexp, mbox_value, 0, 0, 0) == 0;
+}
+
+void search_in_db(int nb_search_patterns,
+ struct search_request *search_requests,
+ FILE *db_file) {
+ int hits[MAX_NB_SEARCH_PATTERNS];
+ char raw_db_line[BUFFER_SIZE];
+ char raw_mbox_line[BUFFER_SIZE];
+ char current_mail_filename[PATH_MAX + 1];
+ unsigned long int current_position_in_mail;
+ char *mbox_name, *mbox_value;
+ int mbox_id;
+ int already_written, m, n;
+ int last_mbox_line_was_empty;
+
+ current_position_in_mail = 0;
+ already_written = 0;
+
+ for(n = 0; n < nb_search_patterns; n++) { hits[n] = 0; }
+
+ while(fgets(raw_db_line, BUFFER_SIZE, db_file)) {
+ mbox_name = raw_db_line;
+ mbox_value = segment_next_field(raw_db_line);
+
+ if(strcmp("mail", mbox_name) == 0) {
+ char *position_in_file_string;
+ char *mail_filename;
+
+ for(n = 0; n < nb_search_patterns && xor(hits[n], search_requests[n].negation); n++);
+
+ /* for(n = 0; n < nb_search_patterns && */
+ /* ((hits[n] && !search_requests[n].negation) || */
+ /* (!hits[n] && search_requests[n].negation)); n++); */
+
+ if(n == nb_search_patterns) {
+ FILE *mail_file;
+
+ mail_file = fopen(current_mail_filename, "r");
+
+ if(!mail_file) {
+ fprintf(stderr, "mymail: Cannot open mbox '%s'.\n", current_mail_filename);
+ exit(EXIT_FAILURE);
+ }
+
+ fseek(mail_file, current_position_in_mail, SEEK_SET);
+
+ if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) {
+ last_mbox_line_was_empty = 1;
+ printf("%s", raw_mbox_line);
+ while(1) {
+ if(!fgets(raw_mbox_line, BUFFER_SIZE, mail_file) ||
+ (last_mbox_line_was_empty && strncmp(raw_mbox_line, "From ", 5) == 0)) break;
+ last_mbox_line_was_empty = (raw_mbox_line[0] == '\n');
+ printf("%s", raw_mbox_line);
+ }
+ }
+
+ fclose(mail_file);
+ }
+
+ for(n = 0; n < nb_search_patterns; n++) { hits[n] = 0; }
+
+ position_in_file_string = mbox_value;
+ mail_filename = segment_next_field(mbox_value);
+ current_position_in_mail = atol(position_in_file_string);
+ strcpy(current_mail_filename, mail_filename);
+
+ remove_eof(current_mail_filename);
+ already_written = 0;
+ }
+
+ else {
+ mbox_id = -1;
+ for(m = 0; (m < MAX_ID) && mbox_id == -1; m++) {
+ if(strncmp(field_names[m], mbox_name, strlen(mbox_name)) == 0) {
+ mbox_id = m;
+ }
+ }
+ for(n = 0; n < nb_search_patterns; n++) {
+ hits[n] |= mbox_line_match_search(&search_requests[n],
+ mbox_id, mbox_value);
+ }
+ }
+ }
}
-void read_file(const char *input_filename,
- int nb_fields_to_parse, struct parsable_field *fields_to_parse) {
- char raw_line[BUFFER_SIZE];
+void recursive_search_in_db(const char *entry_name,
+ int nb_search_patterns,
+ struct search_request *search_requests) {
+ DIR *dir;
+ struct dirent *dir_e;
+ struct stat sb;
+ char raw_db_line[BUFFER_SIZE];
+ char subname[PATH_MAX + 1];
+
+ if(lstat(entry_name, &sb) != 0) {
+ fprintf(stderr,
+ "mymail: Cannot stat \"%s\": %s\n",
+ entry_name,
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ dir = opendir(entry_name);
+
+ if(dir) {
+ while((dir_e = readdir(dir))) {
+ if(!ignore_entry(dir_e->d_name)) {
+ snprintf(subname, PATH_MAX, "%s/%s", entry_name, dir_e->d_name);
+ recursive_search_in_db(subname,
+ nb_search_patterns,
+ search_requests);
+ }
+ }
+ closedir(dir);
+ } else {
+ const char *s = entry_name, *filename = entry_name;
+ while(*s) { if(*s == '/') { filename = s+1; } s++; }
+
+ if(strcmp(filename, db_filename) == 0) {
+ FILE *db_file = fopen(entry_name, "r");
+
+ if(!db_file) {
+ fprintf(stderr,
+ "mymail: Cannot open \"%s\" for reading: %s\n",
+ db_filename,
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ if(fgets(raw_db_line, BUFFER_SIZE, db_file)) {
+ if(strncmp(raw_db_line, MYMAIL_DB_MAGIC_TOKEN, strlen(MYMAIL_DB_MAGIC_TOKEN))) {
+ fprintf(stderr,
+ "mymail: Header line in '%s' does not match the mymail db format.\n",
+ entry_name);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ fprintf(stderr,
+ "mymail: Cannot read the header line in '%s'.\n",
+ entry_name);
+ exit(EXIT_FAILURE);
+ }
+
+ search_in_db(nb_search_patterns, search_requests, db_file);
+
+ fclose(db_file);
+ }
+ }
+}
+
+/*********************************************************************/
+
+void index_one_mbox_line(int nb_fields_to_parse, struct parsable_field *fields_to_parse,
+ char *raw_mbox_line, FILE *db_file) {
+ regmatch_t matches;
+ int f;
+ for(f = 0; f < nb_fields_to_parse; f++) {
+ if(regexec(&fields_to_parse[f].regexp, raw_mbox_line, 1, &matches, 0) == 0) {
+ fprintf(db_file, "%s %s\n",
+ field_names[fields_to_parse[f].id],
+ raw_mbox_line + matches.rm_eo);
+ }
+ }
+}
+
+void index_mbox(const char *mbox_filename,
+ int nb_fields_to_parse, struct parsable_field *fields_to_parse,
+ FILE *db_file) {
+ char raw_mbox_line[BUFFER_SIZE], full_line[BUFFER_SIZE];
+ char *end_of_full_line;
FILE *file;
- int in_header;
- unsigned int position_in_file;
+ int in_header, new_header, last_mbox_line_was_empty;
+ unsigned long int position_in_file;
- file = fopen(input_filename, "r");
+ file = fopen(mbox_filename, "r");
if(!file) {
- fprintf(stderr, "mymail: Can not open `%s'.\n", input_filename);
- exit(EXIT_FAILURE);
+ fprintf(stderr, "mymail: Cannot open '%s'.\n", mbox_filename);
+ if(paranoid) { exit(EXIT_FAILURE); }
+ return;
}
in_header = 0;
+ new_header = 0;
position_in_file = 0;
+ end_of_full_line = 0;
+ full_line[0] = '\0';
+ last_mbox_line_was_empty = 1;
- while(fgets(raw_line, BUFFER_SIZE, file)) {
- if(strncmp(raw_line, "From ", 5) == 0) {
+ while(fgets(raw_mbox_line, BUFFER_SIZE, file)) {
+ if(last_mbox_line_was_empty && strncmp(raw_mbox_line, "From ", 5) == 0) {
if(in_header) {
fprintf(stderr,
- "Got a 'From ' in the header in %s:%u.\n",
- input_filename, position_in_file);
- fprintf(stderr, "%s", raw_line);
- exit(EXIT_FAILURE);
+ "Got a ^\"From \" in the header in %s:%lu.\n",
+ mbox_filename, position_in_file);
+ fprintf(stderr, "%s", raw_mbox_line);
+ if(paranoid) { exit(EXIT_FAILURE); }
}
in_header = 1;
- } else if(strncmp(raw_line, "\n", 1) == 0) {
+ new_header = 1;
+ } else if(raw_mbox_line[0] == '\n') {
if(in_header) { in_header = 0; }
}
- /* if(in_header) { */
- /* printf("LINE.H %s", raw_line); */
- /* } else { */
- /* printf("LINE.B %s", raw_line); */
- /* } */
+ last_mbox_line_was_empty = (raw_mbox_line[0] == '\n');
if(in_header) {
- int f;
- regmatch_t matches;
- for(f = 0; f < nb_fields_to_parse; f++) {
- if(regexec(&fields_to_parse[f].regexp, raw_line, 1, &matches, 0) == 0) {
- fprintf(fields_to_parse[f].db_file, "%s:%d %s",
- input_filename, position_in_file,
- raw_line + matches.rm_eo);
+ if(new_header) {
+ fprintf(db_file, "mail %lu %s\n", position_in_file, mbox_filename);
+ new_header = 0;
+ }
+
+ if(raw_mbox_line[0] == ' ' || raw_mbox_line[0] == '\t') {
+ char *start = raw_mbox_line;
+ while(*start == ' ' || *start == '\t') start++;
+ *(end_of_full_line++) = ' ';
+ strcpy(end_of_full_line, start);
+ while(*end_of_full_line && *end_of_full_line != '\n') {
+ end_of_full_line++;
}
+ *end_of_full_line = '\0';
}
+
+ else {
+ /*
+ if(!((raw_mbox_line[0] >= 'a' && raw_mbox_line[0] <= 'z') ||
+ (raw_mbox_line[0] >= 'A' && raw_mbox_line[0] <= 'Z'))) {
+ fprintf(stderr,
+ "Header line syntax error %s:%lu.\n",
+ mbox_filename, position_in_file);
+ fprintf(stderr, "%s", raw_mbox_line);
+ }
+ */
+
+ if(full_line[0]) {
+ index_one_mbox_line(nb_fields_to_parse, fields_to_parse, full_line, db_file);
+ }
+
+ end_of_full_line = full_line;
+ strcpy(end_of_full_line, raw_mbox_line);
+ while(*end_of_full_line && *end_of_full_line != '\n') {
+ end_of_full_line++;
+ }
+ *end_of_full_line = '\0';
+ }
+
}
- position_in_file += strlen(raw_line);
+ position_in_file += strlen(raw_mbox_line);
}
fclose(file);
}
-int ignore_entry(const char *name) {
- return
- strcmp(name, ".") == 0 ||
- strcmp(name, "..") == 0 ||
- (name[0] == '.' && name[1] != '/');
-}
-
-void process_entry(const char *dir_name,
- int nb_fields_to_parse, struct parsable_field *fields_to_parse) {
+void recursive_index_mbox(FILE *db_file,
+ const char *entry_name,
+ int nb_fields_to_parse, struct parsable_field *fields_to_parse) {
DIR *dir;
struct dirent *dir_e;
struct stat sb;
char subname[PATH_MAX + 1];
- if(lstat(dir_name, &sb) != 0) {
+ if(lstat(entry_name, &sb) != 0) {
fprintf(stderr,
- "mymail: Can not stat \"%s\": %s\n",
- dir_name,
+ "mymail: Cannot stat \"%s\": %s\n",
+ entry_name,
strerror(errno));
exit(EXIT_FAILURE);
- } else {
}
- if(S_ISLNK(sb.st_mode)) {
- return;
- }
-
- dir = opendir(dir_name);
+ dir = opendir(entry_name);
if(dir) {
- printf("Processing directory '%s'.\n", dir_name);
while((dir_e = readdir(dir))) {
if(!ignore_entry(dir_e->d_name)) {
- snprintf(subname, PATH_MAX, "%s/%s", dir_name, dir_e->d_name);
- process_entry(subname, nb_fields_to_parse, fields_to_parse);
+ snprintf(subname, PATH_MAX, "%s/%s", entry_name, dir_e->d_name);
+ recursive_index_mbox(db_file, subname, nb_fields_to_parse, fields_to_parse);
}
}
closedir(dir);
} else {
- if(S_ISREG(sb.st_mode)) {
- /* printf("Processing regular file '%s'.\n", dir_name); */
- read_file(dir_name, nb_fields_to_parse, fields_to_parse);
- }
+ index_mbox(entry_name, nb_fields_to_parse, fields_to_parse, db_file);
}
}
/* For long options that have no equivalent short option, use a
non-character as a pseudo short option, starting with CHAR_MAX + 1. */
-enum
-{
+enum {
OPT_BASH_MODE = CHAR_MAX + 1
};
static struct option long_options[] = {
{ "help", no_argument, 0, 'h' },
- { "db-prefix", 1, 0, 'p' },
+ { "version", no_argument, 0, 'v' },
+ { "db-file", 1, 0, 'd' },
+ { "db-root", 1, 0, 'r' },
+ { "search", 1, 0, 's' },
+ { "index", 0, 0, 'i' },
{ 0, 0, 0, 0 }
};
-static struct parsable_field fields_to_parse[] = {
- {
- "from",
- "^[Ff][Rr][Oo][Mm]: *",
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 0
- },
-
- {
- "dest",
- "^\\([Tt][Oo]\\|[Cc][Cc]\\|[Bb][Cc][Cc]\\): *",
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 0
- },
-};
+/*********************************************************************/
int main(int argc, char **argv) {
int error = 0, show_help = 0;
const int nb_fields_to_parse = sizeof(fields_to_parse) / sizeof(struct parsable_field);
char c;
int f;
+ int nb_search_patterns;
+ char *search_pattern[MAX_NB_SEARCH_PATTERNS];
+
+ /* for(f = 0; f < argc; f++) { */
+ /* printf("arg %d \"%s\"\n", f, argv[f]); */
+ /* } */
+
+ paranoid = 0;
+ action_index = 0;
+ db_filename = 0;
+ db_root_path = 0;
setlocale(LC_ALL, "");
- while ((c = getopt_long(argc, argv, "hp:",
+ nb_search_patterns = 0;
+
+ while ((c = getopt_long(argc, argv, "hvip:s:d:r:",
long_options, NULL)) != -1) {
switch(c) {
show_help = 1;
break;
- case 'p':
- db_filename_prefix = strdup(optarg);
+ case 'v':
+ print_version(stdout);
+ break;
+
+ case 'i':
+ action_index = 1;
+ break;
+
+ case 'd':
+ db_filename = strdup(optarg);
+ break;
+
+ case 'r':
+ db_root_path = strdup(optarg);
+ break;
+
+ case 's':
+ if(nb_search_patterns == MAX_NB_SEARCH_PATTERNS) {
+ fprintf(stderr, "mymail: Too many search patterns.\n");
+ exit(EXIT_FAILURE);
+ }
+ search_pattern[nb_search_patterns++] = strdup(optarg);
break;
default:
}
}
- if(!db_filename_prefix) {
- db_filename_prefix = strdup("/tmp/mymail_");
+ if(!db_filename) {
+ char *default_db_filename = getenv("MYMAIL_DB_FILE");
+
+ if(!default_db_filename) {
+ default_db_filename = "mymail.db";
+ }
+
+ db_filename = strdup(default_db_filename);
+ }
+
+ if(!db_root_path) {
+ char *default_db_root_path = getenv("MYMAIL_DB_ROOT");
+
+ if(default_db_root_path) {
+ db_root_path = strdup(default_db_root_path);
+ }
}
if(error) {
- usage(stderr);
+ print_usage(stderr);
exit(EXIT_FAILURE);
}
if(show_help) {
- usage(stdout);
+ print_usage(stdout);
exit(EXIT_SUCCESS);
}
- for(f = 0; f < nb_fields_to_parse; f++) {
- char db_filename[BUFFER_SIZE];
- sprintf(db_filename, "%s%s", db_filename_prefix, fields_to_parse[f].name);
- fields_to_parse[f].db_file = fopen(db_filename, "w");
- if(!fields_to_parse[f].db_file) {
+ if(action_index) {
+ FILE *db_file;
+
+ db_file = fopen(db_filename, "w");
+
+ if(!db_file) {
fprintf(stderr,
- "mymail: Can not open \"%s\" for writing: %s\n",
+ "mymail: Cannot open \"%s\" for writing: %s\n",
db_filename,
strerror(errno));
exit(EXIT_FAILURE);
}
- printf("Initialized %s.\n", db_filename);
+ for(f = 0; f < nb_fields_to_parse; f++) {
+ if(regcomp(&fields_to_parse[f].regexp,
+ fields_to_parse[f].regexp_string,
+ REG_ICASE)) {
+ fprintf(stderr,
+ "mymail: Syntax error in regexp \"%s\" for field \"%s\".\n",
+ fields_to_parse[f].regexp_string,
+ field_names[fields_to_parse[f].id]);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ fprintf(db_file, "%s version_%s raw\n", MYMAIL_DB_MAGIC_TOKEN, VERSION);
+
+ while(optind < argc) {
+ recursive_index_mbox(db_file,
+ argv[optind],
+ nb_fields_to_parse, fields_to_parse);
+ optind++;
+ }
+
+ fclose(db_file);
+
+ for(f = 0; f < nb_fields_to_parse; f++) {
+ regfree(&fields_to_parse[f].regexp);
+ }
+ }
+
+ else {
- if(regcomp(&fields_to_parse[f].regexp,
- fields_to_parse[f].regexp_string,
- REG_ICASE)) {
+ if(!db_root_path) {
fprintf(stderr,
- "mymail: Syntax error in regexp \"%s\" for field \"%s\".\n",
- fields_to_parse[f].regexp_string,
- fields_to_parse[f].name);
+ "mymail: db root path is not set\n");
exit(EXIT_FAILURE);
}
- }
- while(optind < argc) {
- process_entry(argv[optind],
- nb_fields_to_parse, fields_to_parse);
- optind++;
- }
+ if(nb_search_patterns > 0) {
+ struct search_request search_requests[MAX_NB_SEARCH_PATTERNS];
+ char *search_regexp_string;
+ int m, n;
- for(f = 0; f < nb_fields_to_parse; f++) {
- fclose(fields_to_parse[f].db_file);
- regfree(&fields_to_parse[f].regexp);
+ for(n = 0; n < nb_search_patterns; n++) {
+ search_regexp_string = segment_next_field(search_pattern[n]);
+
+ if(search_pattern[n][0] == '!') {
+ search_pattern[n]++;
+ search_requests[n].negation = 1;
+ } else {
+ search_requests[n].negation = 0;
+ }
+
+ search_requests[n].field_id = -1;
+ for(m = 0; (m < MAX_ID) && search_requests[n].field_id == -1; m++) {
+ if(strncmp(field_names[m], search_pattern[n], strlen(search_pattern[n])) == 0) {
+ search_requests[n].field_id = m;
+ }
+ }
+
+ if(regcomp(&search_requests[n].regexp,
+ search_regexp_string,
+ REG_ICASE)) {
+ fprintf(stderr,
+ "mymail: Syntax error in regexp \"%s\" for field \"%s\".\n",
+ search_regexp_string,
+ field_names[search_requests[n].field_id]);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ recursive_search_in_db(db_root_path,
+ nb_search_patterns, search_requests);
+
+ for(n = 0; n < nb_search_patterns; n++) {
+ free(search_pattern[n]);
+ }
+ }
}
+ free(db_filename);
+ free(db_root_path);
+
exit(EXIT_SUCCESS);
}