#include <dirent.h>
#include <regex.h>
+#define MYMAIL_DB_MAGIC_TOKEN "mymail_index_file"
#define VERSION "0.1"
+#define MAX_NB_SEARCH_PATTERNS 10
+
#define BUFFER_SIZE 65536
+enum {
+ ID_MAIL,
+ ID_FROM,
+ ID_DEST,
+ ID_SUBJECT,
+ ID_PARTICIPANT,
+ MAX_ID
+};
+
+static char *field_names[] = {
+ "mail",
+ "from",
+ "dest",
+ "subject",
+ "part"
+};
+
+struct search_request {
+ int field_id;
+ int negation;
+ regex_t regexp;
+};
+
struct parsable_field {
- char *name;
+ int id;
char *regexp_string;
regex_t regexp;
};
char *db_filename;
-char *search_pattern;
+char *db_root_path;
int paranoid;
int action_index;
void *p = malloc(n);
if(!p && n != 0) {
fprintf(stderr,
- "mymail: can not allocate memory: %s\n", strerror(errno));
+ "mymail: cannot allocate memory: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
return p;
print_version(out);
fprintf(out, "Written by Francois Fleuret <francois@fleuret.org>.\n");
fprintf(out, "\n");
- fprintf(out, "Usage: mymail [options] [<filename1> [<filename2> ...]]\n");
+ fprintf(out, "Usage: mymail [options] [<mbox dir1> [<mbox dir2> ...]]\n");
fprintf(out, "\n");
fprintf(out, " -h, --help\n");
fprintf(out, " show this help\n");
fprintf(out, " print the version number\n");
fprintf(out, " -i, --index\n");
fprintf(out, " index mails\n");
- fprintf(out, " -d <db filename>, --db-file <db filename>\n");
- fprintf(out, " set the data-base file\n");
fprintf(out, " -s <search pattern>, --search <search pattern>\n");
fprintf(out, " search for matching mails in the data-base file\n");
+ fprintf(out, " -d <db filename>, --db-file <db filename>\n");
+ fprintf(out, " set the data-base file\n");
+ fprintf(out, " -r <db root path>, --db-root <db root path>\n");
+ fprintf(out, " set the data-base root path for recursive search\n");
}
/*********************************************************************/
-void search_in_db(const char *search_name, const char *search_regexp_string,
+int ignore_entry(const char *name) {
+ return
+ /* strcmp(name, ".") == 0 || */
+ /* strcmp(name, "..") == 0 || */
+ (name[0] == '.' && name[1] != '/');
+}
+
+int mbox_line_match_search(struct search_request *request,
+ int mbox_id, char *mbox_value) {
+ return
+ (request->field_id == mbox_id ||
+ (request->field_id == ID_PARTICIPANT && (mbox_id == ID_FROM || mbox_id == ID_DEST)))
+ &&
+ regexec(&request->regexp, mbox_value, 0, 0, 0) == 0;
+}
+
+void search_in_db(int nb_search_patterns,
+ struct search_request *search_requests,
FILE *db_file) {
- char raw_line[BUFFER_SIZE];
- char current_mail_filename[BUFFER_SIZE];
+ int hits[MAX_NB_SEARCH_PATTERNS];
+ char raw_db_line[BUFFER_SIZE];
+ char raw_mbox_line[BUFFER_SIZE];
+ char current_mail_filename[PATH_MAX + 1];
unsigned long int current_position_in_mail;
- char *name, *value;
- regex_t regexp;
- int already_written;
-
- if(regcomp(®exp,
- search_regexp_string,
- REG_ICASE)) {
- fprintf(stderr,
- "mymail: Syntax error in regexp \"%s\" for field \"%s\".\n",
- search_regexp_string,
- search_name);
- exit(EXIT_FAILURE);
- }
+ char *mbox_name, *mbox_value;
+ int mbox_id;
+ int already_written, m, n;
current_position_in_mail = 0;
already_written = 0;
- while(fgets(raw_line, BUFFER_SIZE, db_file)) {
- name = raw_line;
- value = segment_next_field(raw_line);
+ for(n = 0; n < nb_search_patterns; n++) { hits[n] = 0; }
- if(strcmp("mail", name) == 0) {
- char *position_in_file_string = value;
- char *mail_filename = segment_next_field(value);
- current_position_in_mail = atol(position_in_file_string);
- strcpy(current_mail_filename, mail_filename);
- remove_eof(current_mail_filename);
- already_written = 0;
- }
+ while(fgets(raw_db_line, BUFFER_SIZE, db_file)) {
+ mbox_name = raw_db_line;
+ mbox_value = segment_next_field(raw_db_line);
+
+ if(strcmp("mail", mbox_name) == 0) {
+ char *position_in_file_string;
+ char *mail_filename;
- else if(!already_written) {
- if(strcmp(search_name, name) == 0 && regexec(®exp, value, 0, 0, 0) == 0) {
+ for(n = 0; n < nb_search_patterns &&
+ ((hits[n] && !search_requests[n].negation) ||
+ (!hits[n] && search_requests[n].negation)); n++);
+
+ if(n == nb_search_patterns) {
FILE *mail_file;
+
mail_file = fopen(current_mail_filename, "r");
+
if(!mail_file) {
- fprintf(stderr, "mymail: Can not open '%s'.\n", current_mail_filename);
+ fprintf(stderr, "mymail: Cannot open mbox '%s'.\n", current_mail_filename);
exit(EXIT_FAILURE);
}
+
fseek(mail_file, current_position_in_mail, SEEK_SET);
- if(fgets(raw_line, BUFFER_SIZE, mail_file)) {
- printf("%s", raw_line);
- while(fgets(raw_line, BUFFER_SIZE, mail_file) &&
- strncmp(raw_line, "From ", 5)) {
- printf("%s", raw_line);
+
+ if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) {
+ printf("%s", raw_mbox_line);
+ while(fgets(raw_mbox_line, BUFFER_SIZE, mail_file) &&
+ strncmp(raw_mbox_line, "From ", 5)) {
+ printf("%s", raw_mbox_line);
}
}
+
fclose(mail_file);
- already_written = 1;
+ }
+
+ for(n = 0; n < nb_search_patterns; n++) { hits[n] = 0; }
+
+ position_in_file_string = mbox_value;
+ mail_filename = segment_next_field(mbox_value);
+ current_position_in_mail = atol(position_in_file_string);
+ strcpy(current_mail_filename, mail_filename);
+
+ remove_eof(current_mail_filename);
+ already_written = 0;
+ }
+
+ else {
+ mbox_id = -1;
+ for(m = 0; (m < MAX_ID) && mbox_id == -1; m++) {
+ if(strncmp(field_names[m], mbox_name, strlen(mbox_name)) == 0) {
+ mbox_id = m;
+ }
+ }
+ for(n = 0; n < nb_search_patterns; n++) {
+ hits[n] |= mbox_line_match_search(&search_requests[n],
+ mbox_id, mbox_value);
}
}
}
+}
+
+void recursive_search_in_db(const char *entry_name,
+ int nb_search_patterns,
+ struct search_request *search_requests) {
+ DIR *dir;
+ struct dirent *dir_e;
+ struct stat sb;
+ char raw_db_line[BUFFER_SIZE];
+ char subname[PATH_MAX + 1];
+
+ if(lstat(entry_name, &sb) != 0) {
+ fprintf(stderr,
+ "mymail: Cannot stat \"%s\": %s\n",
+ entry_name,
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ dir = opendir(entry_name);
+
+ if(dir) {
+ while((dir_e = readdir(dir))) {
+ if(!ignore_entry(dir_e->d_name)) {
+ snprintf(subname, PATH_MAX, "%s/%s", entry_name, dir_e->d_name);
+ recursive_search_in_db(subname,
+ nb_search_patterns,
+ search_requests);
+ }
+ }
+ closedir(dir);
+ } else {
+ const char *s = entry_name, *filename = entry_name;
+ while(*s) { if(*s == '/') { filename = s+1; } s++; }
- regfree(®exp);
+ if(strcmp(filename, db_filename) == 0) {
+ FILE *db_file = fopen(entry_name, "r");
+
+ if(!db_file) {
+ fprintf(stderr,
+ "mymail: Cannot open \"%s\" for reading: %s\n",
+ db_filename,
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ if(fgets(raw_db_line, BUFFER_SIZE, db_file)) {
+ if(strncmp(raw_db_line, MYMAIL_DB_MAGIC_TOKEN, strlen(MYMAIL_DB_MAGIC_TOKEN))) {
+ fprintf(stderr,
+ "mymail: Header line in '%s' does not match the mymail db format.\n",
+ entry_name);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ fprintf(stderr,
+ "mymail: Cannot read the header line in '%s'.\n",
+ entry_name);
+ exit(EXIT_FAILURE);
+ }
+
+ search_in_db(nb_search_patterns, search_requests, db_file);
+
+ fclose(db_file);
+ }
+ }
}
/*********************************************************************/
void index_one_mbox_line(int nb_fields_to_parse, struct parsable_field *fields_to_parse,
- char *raw_line, FILE *db_file) {
+ char *raw_mbox_line, FILE *db_file) {
regmatch_t matches;
int f;
for(f = 0; f < nb_fields_to_parse; f++) {
- if(regexec(&fields_to_parse[f].regexp, raw_line, 1, &matches, 0) == 0) {
+ if(regexec(&fields_to_parse[f].regexp, raw_mbox_line, 1, &matches, 0) == 0) {
fprintf(db_file, "%s %s\n",
- fields_to_parse[f].name,
- raw_line + matches.rm_eo);
+ field_names[fields_to_parse[f].id],
+ raw_mbox_line + matches.rm_eo);
}
}
}
-void index_mbox(const char *input_filename,
+void index_mbox(const char *mbox_filename,
int nb_fields_to_parse, struct parsable_field *fields_to_parse,
FILE *db_file) {
- char raw_line[BUFFER_SIZE], full_line[BUFFER_SIZE];
+ char raw_mbox_line[BUFFER_SIZE], full_line[BUFFER_SIZE];
char *end_of_full_line;
FILE *file;
int in_header, new_header;
unsigned long int position_in_file;
- file = fopen(input_filename, "r");
+ file = fopen(mbox_filename, "r");
if(!file) {
- fprintf(stderr, "mymail: Can not open '%s'.\n", input_filename);
+ fprintf(stderr, "mymail: Cannot open '%s'.\n", mbox_filename);
if(paranoid) { exit(EXIT_FAILURE); }
return;
}
end_of_full_line = 0;
full_line[0] = '\0';
- while(fgets(raw_line, BUFFER_SIZE, file)) {
- if(strncmp(raw_line, "From ", 5) == 0) {
+ while(fgets(raw_mbox_line, BUFFER_SIZE, file)) {
+ if(strncmp(raw_mbox_line, "From ", 5) == 0) {
if(in_header) {
fprintf(stderr,
"Got a ^\"From \" in the header in %s:%lu.\n",
- input_filename, position_in_file);
- fprintf(stderr, "%s", raw_line);
+ mbox_filename, position_in_file);
+ fprintf(stderr, "%s", raw_mbox_line);
if(paranoid) { exit(EXIT_FAILURE); }
}
in_header = 1;
new_header = 1;
- } else if(strncmp(raw_line, "\n", 1) == 0) {
+ } else if(strncmp(raw_mbox_line, "\n", 1) == 0) {
if(in_header) { in_header = 0; }
}
- /* printf("PARSE %d %s", in_header, raw_line); */
-
if(in_header) {
if(new_header) {
- fprintf(db_file, "mail %lu %s\n", position_in_file, input_filename);
+ fprintf(db_file, "mail %lu %s\n", position_in_file, mbox_filename);
new_header = 0;
}
- if(raw_line[0] == ' ' || raw_line[0] == '\t') {
- char *start = raw_line;
+ if(raw_mbox_line[0] == ' ' || raw_mbox_line[0] == '\t') {
+ char *start = raw_mbox_line;
while(*start == ' ' || *start == '\t') start++;
*(end_of_full_line++) = ' ';
strcpy(end_of_full_line, start);
}
else {
- /* if(!((raw_line[0] >= 'a' && raw_line[0] <= 'z') || */
- /* (raw_line[0] >= 'A' && raw_line[0] <= 'Z'))) { */
- /* fprintf(stderr, */
- /* "Header line syntax error %s:%lu.\n", */
- /* input_filename, position_in_file); */
- /* fprintf(stderr, "%s", raw_line); */
- /* } */
+ /*
+ if(!((raw_mbox_line[0] >= 'a' && raw_mbox_line[0] <= 'z') ||
+ (raw_mbox_line[0] >= 'A' && raw_mbox_line[0] <= 'Z'))) {
+ fprintf(stderr,
+ "Header line syntax error %s:%lu.\n",
+ mbox_filename, position_in_file);
+ fprintf(stderr, "%s", raw_mbox_line);
+ }
+ */
if(full_line[0]) {
index_one_mbox_line(nb_fields_to_parse, fields_to_parse, full_line, db_file);
}
end_of_full_line = full_line;
- strcpy(end_of_full_line, raw_line);
+ strcpy(end_of_full_line, raw_mbox_line);
while(*end_of_full_line && *end_of_full_line != '\n') {
end_of_full_line++;
}
}
- position_in_file += strlen(raw_line);
+ position_in_file += strlen(raw_mbox_line);
}
fclose(file);
}
-int ignore_entry(const char *name) {
- return
- /* strcmp(name, ".") == 0 || */
- /* strcmp(name, "..") == 0 || */
- (name[0] == '.' && name[1] != '/');
-}
-
-void process_entry(const char *dir_name,
- int nb_fields_to_parse, struct parsable_field *fields_to_parse,
- FILE *db_file) {
+void recursive_index_mbox(FILE *db_file,
+ const char *entry_name,
+ int nb_fields_to_parse, struct parsable_field *fields_to_parse) {
DIR *dir;
struct dirent *dir_e;
struct stat sb;
char subname[PATH_MAX + 1];
- if(lstat(dir_name, &sb) != 0) {
+ if(lstat(entry_name, &sb) != 0) {
fprintf(stderr,
- "mymail: Can not stat \"%s\": %s\n",
- dir_name,
+ "mymail: Cannot stat \"%s\": %s\n",
+ entry_name,
strerror(errno));
exit(EXIT_FAILURE);
}
- dir = opendir(dir_name);
+ dir = opendir(entry_name);
if(dir) {
- printf("Processing directory '%s'.\n", dir_name);
while((dir_e = readdir(dir))) {
if(!ignore_entry(dir_e->d_name)) {
- snprintf(subname, PATH_MAX, "%s/%s", dir_name, dir_e->d_name);
- process_entry(subname, nb_fields_to_parse, fields_to_parse, db_file);
+ snprintf(subname, PATH_MAX, "%s/%s", entry_name, dir_e->d_name);
+ recursive_index_mbox(db_file, subname, nb_fields_to_parse, fields_to_parse);
}
}
closedir(dir);
} else {
- index_mbox(dir_name, nb_fields_to_parse, fields_to_parse, db_file);
+ index_mbox(entry_name, nb_fields_to_parse, fields_to_parse, db_file);
}
}
{ "help", no_argument, 0, 'h' },
{ "version", no_argument, 0, 'v' },
{ "db-file", 1, 0, 'd' },
- { "search-pattern", 1, 0, 's' },
+ { "db-root", 1, 0, 'r' },
+ { "search", 1, 0, 's' },
{ "index", 0, 0, 'i' },
{ 0, 0, 0, 0 }
};
static struct parsable_field fields_to_parse[] = {
{
- "from",
+ ID_FROM,
"^\\([Ff][Rr][Oo][Mm]:\\|From\\) *",
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
},
{
- "dest",
+ ID_DEST,
"^\\([Tt][Oo]\\|[Cc][Cc]\\|[Bb][Cc][Cc]\\): *",
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
},
+
+ {
+ ID_SUBJECT,
+ "^[Ss][Uu][Bb][Jj][Ee][Cc][Tt]: *",
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+ },
+
};
+/*********************************************************************/
+
int main(int argc, char **argv) {
int error = 0, show_help = 0;
const int nb_fields_to_parse = sizeof(fields_to_parse) / sizeof(struct parsable_field);
char c;
int f;
+ int nb_search_patterns;
+ char *search_pattern[MAX_NB_SEARCH_PATTERNS];
+
+ /* for(f = 0; f < argc; f++) { */
+ /* printf("arg %d \"%s\"\n", f, argv[f]); */
+ /* } */
paranoid = 0;
action_index = 0;
- search_pattern = 0;
+ db_filename = 0;
+ db_root_path = 0;
setlocale(LC_ALL, "");
- while ((c = getopt_long(argc, argv, "hvip:s:",
+ nb_search_patterns = 0;
+
+ while ((c = getopt_long(argc, argv, "hvip:s:d:r:",
long_options, NULL)) != -1) {
switch(c) {
action_index = 1;
break;
- case 'p':
+ case 'd':
db_filename = strdup(optarg);
- /* printf("db_filename=\"%s\"\n", db_filename); */
+ break;
+
+ case 'r':
+ db_root_path = strdup(optarg);
break;
case 's':
- if(search_pattern) {
- fprintf(stderr, "mymail: Search pattern already defined.\n");
+ if(nb_search_patterns == MAX_NB_SEARCH_PATTERNS) {
+ fprintf(stderr, "mymail: Too many search patterns.\n");
exit(EXIT_FAILURE);
}
- search_pattern = strdup(optarg);
+ search_pattern[nb_search_patterns++] = strdup(optarg);
break;
default:
if(!db_filename) {
char *default_db_filename = getenv("MYMAIL_DB_FILE");
- if(!default_db_filename) { default_db_filename = "/tmp/mymail.db"; }
+
+ if(!default_db_filename) {
+ default_db_filename = "mymail.db";
+ }
+
db_filename = strdup(default_db_filename);
}
+ if(!db_root_path) {
+ char *default_db_root_path = getenv("MYMAIL_DB_ROOT");
+
+ if(default_db_root_path) {
+ db_root_path = strdup(default_db_root_path);
+ }
+ }
+
+ if(!db_root_path) {
+ fprintf(stderr,
+ "mymail: db root path is not set\n");
+ exit(EXIT_FAILURE);
+ }
+
+
if(error) {
print_usage(stderr);
exit(EXIT_FAILURE);
}
if(action_index) {
- FILE *db_file = fopen(db_filename, "w");
+ FILE *db_file;
+
+ db_file = fopen(db_filename, "w");
+
if(!db_file) {
fprintf(stderr,
- "mymail: Can not open \"%s\" for writing: %s\n",
+ "mymail: Cannot open \"%s\" for writing: %s\n",
db_filename,
strerror(errno));
exit(EXIT_FAILURE);
fprintf(stderr,
"mymail: Syntax error in regexp \"%s\" for field \"%s\".\n",
fields_to_parse[f].regexp_string,
- fields_to_parse[f].name);
+ field_names[fields_to_parse[f].id]);
exit(EXIT_FAILURE);
}
}
+ fprintf(db_file, "%s version_%s raw version\n", MYMAIL_DB_MAGIC_TOKEN, VERSION);
+
while(optind < argc) {
- process_entry(argv[optind],
- nb_fields_to_parse, fields_to_parse, db_file);
+ recursive_index_mbox(db_file,
+ argv[optind],
+ nb_fields_to_parse, fields_to_parse);
optind++;
}
}
else {
- if(search_pattern) {
- FILE *db_file;
- char *search_name;
+
+ if(nb_search_patterns > 0) {
+ struct search_request search_requests[MAX_NB_SEARCH_PATTERNS];
char *search_regexp_string;
- search_name = search_pattern;
- search_regexp_string = segment_next_field(search_pattern);
- if(!*search_regexp_string) {
- fprintf(stderr,
- "Syntax error in the search pattern.\n");
- exit(EXIT_FAILURE);
- }
+ int m, n;
- db_file = fopen(db_filename, "r");
+ for(n = 0; n < nb_search_patterns; n++) {
+ search_regexp_string = segment_next_field(search_pattern[n]);
- if(!db_file) {
- fprintf(stderr,
- "mymail: Can not open \"%s\" for reading: %s\n",
- db_filename,
- strerror(errno));
- exit(EXIT_FAILURE);
+ if(search_pattern[n][0] == '!') {
+ search_pattern[n]++;
+ search_requests[n].negation = 1;
+ } else {
+ search_requests[n].negation = 0;
+ }
+
+ search_requests[n].field_id = -1;
+ for(m = 0; (m < MAX_ID) && search_requests[n].field_id == -1; m++) {
+ if(strncmp(field_names[m], search_pattern[n], strlen(search_pattern[n])) == 0) {
+ search_requests[n].field_id = m;
+ }
+ }
+
+ if(regcomp(&search_requests[n].regexp,
+ search_regexp_string,
+ REG_ICASE)) {
+ fprintf(stderr,
+ "mymail: Syntax error in regexp \"%s\" for field \"%s\".\n",
+ search_regexp_string,
+ field_names[search_requests[n].field_id]);
+ exit(EXIT_FAILURE);
+ }
}
- search_in_db(search_name, search_regexp_string, db_file);
+ recursive_search_in_db(db_root_path,
+ nb_search_patterns, search_requests);
- fclose(db_file);
- free(search_pattern);
+ for(n = 0; n < nb_search_patterns; n++) {
+ free(search_pattern[n]);
+ }
}
}
free(db_filename);
+ free(db_root_path);
exit(EXIT_SUCCESS);
}