directories containing mbox files, and create a db file containing
for each mail a list of fields computed from the header, or (2)
read such a db file and get all the mails matching regexp-defined
- conditions on the fields.
+ conditions on the fields, to create a resulting mbox file.
It is low-tech, simple, light and fast.
#include <time.h>
#define MYMAIL_DB_MAGIC_TOKEN "mymail_index_file"
-#define VERSION "0.9.1"
+#define VERSION "0.9.2"
-#define MAX_NB_SEARCH_CONDITIONS 10
+#define MAX_NB_SEARCH_CONDITIONS 32
#define BUFFER_SIZE 65536
#define TOKEN_BUFFER_SIZE 1024
+#define LEADING_FROM_LINE_REGEXP "^From [^ ]* \\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\) \\(Jan\\|Feb\\|Mar\\|Apr\\|May\\|Jun\\|Jul\\|Aug\\|Sep\\|Oct\\|Nov\\|Dec\\) [ 123][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]\n$"
+
regex_t leading_from_line_regexp;
/* Global variables! */
int paranoid;
int quiet;
char *default_search_field;
+int ignore_dot_files;
/********************************************************************/
int ignore_entry(const char *name) {
return
- /* strcmp(name, ".") == 0 || */
- /* strcmp(name, "..") == 0 || */
- (name[0] == '.' && name[1] != '/');
+ strcmp(name, ".") == 0 ||
+ strcmp(name, "..") == 0 ||
+ (ignore_dot_files && name[0] == '.' && name[1] != '/');
}
-int is_a_leading_from_line(int last_mbox_line_was_empty, char *mbox_line) {
+int is_a_leading_from_line(char *mbox_line) {
return
/*
*/
- /* last_mbox_line_was_empty && */
strncmp(mbox_line, "From ", 5) == 0 &&
regexec(&leading_from_line_regexp, mbox_line, 0, 0, 0) == 0;
}
int *hits) {
FILE *mail_file;
int header, n;
- int last_mbox_line_was_empty;
char raw_mbox_line[BUFFER_SIZE];
int nb_body_hits;
if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) {
while(nb_body_hits < nb_body_conditions) {
- last_mbox_line_was_empty = (raw_mbox_line[0] == '\n');
+ /* last_mbox_line_was_empty = (raw_mbox_line[0] == '\n'); */
+ /* if(last_mbox_line_was_empty) { header = 0; } */
- if(last_mbox_line_was_empty) { header = 0; }
+ if(raw_mbox_line[0] == '\n') { header = 0; }
if(!header) {
for(n = 0; n < nb_search_conditions; n++) {
}
if(!fgets(raw_mbox_line, BUFFER_SIZE, mail_file) ||
- (is_a_leading_from_line(last_mbox_line_was_empty, raw_mbox_line)))
+ (is_a_leading_from_line(raw_mbox_line)))
break;
}
}
fclose(mail_file);
}
-void write_mail(const char *mail_filename, unsigned long int position_in_mail,
+void extract_mail(const char *mail_filename, unsigned long int position_in_mail,
FILE *output_file) {
char raw_mbox_line[BUFFER_SIZE];
- int last_mbox_line_was_empty;
FILE *mail_file;
mail_file = fopen(mail_filename, "r");
fseek(mail_file, position_in_mail, SEEK_SET);
if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) {
- last_mbox_line_was_empty = 0;
fprintf(output_file, "%s", raw_mbox_line);
while(1) {
if(!fgets(raw_mbox_line, BUFFER_SIZE, mail_file) ||
- (is_a_leading_from_line(last_mbox_line_was_empty, raw_mbox_line))
+ (is_a_leading_from_line(raw_mbox_line))
)
break;
- last_mbox_line_was_empty = (raw_mbox_line[0] == '\n');
fprintf(output_file, "%s", raw_mbox_line);
}
}
fclose(mail_file);
}
-void search_in_db(const char *db_filename,
- int nb_search_conditions,
- struct search_condition *search_conditions,
- FILE *output_file) {
+int search_in_db(const char *db_filename,
+ int nb_search_conditions,
+ struct search_condition *search_conditions,
+ FILE *output_file) {
int hits[MAX_NB_SEARCH_CONDITIONS];
char raw_db_line[BUFFER_SIZE];
int already_written, m, n;
int nb_body_conditions, nb_fulfilled_body_conditions;
FILE *db_file;
+ int nb_extracted_mails;
+
+ nb_extracted_mails = 0;
if(!quiet) {
printf("Searching in '%s' ... ", db_filename);
}
if(nb_body_conditions == nb_fulfilled_body_conditions) {
- write_mail(current_mail_filename, current_position_in_mail, output_file);
+ nb_extracted_mails++;
+ extract_mail(current_mail_filename, current_position_in_mail, output_file);
}
}
}
printf("done.\n");
fflush(stdout);
}
+
+ return nb_extracted_mails;
}
-void recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp,
- int nb_search_conditions,
- struct search_condition *search_conditions,
- FILE *output_file) {
+int recursive_search_in_db(const char *entry_name, regex_t *db_filename_regexp,
+ int nb_search_conditions,
+ struct search_condition *search_conditions,
+ FILE *output_file) {
DIR *dir;
struct dirent *dir_e;
struct stat sb;
char subname[PATH_MAX + 1];
+ int nb_extracted_mails = 0;
if(lstat(entry_name, &sb) != 0) {
fprintf(stderr,
while((dir_e = readdir(dir))) {
if(!ignore_entry(dir_e->d_name)) {
snprintf(subname, PATH_MAX, "%s/%s", entry_name, dir_e->d_name);
- recursive_search_in_db(subname, db_filename_regexp,
- nb_search_conditions, search_conditions,
- output_file);
+ nb_extracted_mails += recursive_search_in_db(subname, db_filename_regexp,
+ nb_search_conditions, search_conditions,
+ output_file);
}
}
closedir(dir);
while(*s) { if(*s == '/') { filename = s+1; } s++; }
if(regexec(db_filename_regexp, filename, 0, 0, 0) == 0) {
- search_in_db(entry_name, nb_search_conditions, search_conditions, output_file);
+ nb_extracted_mails +=
+ search_in_db(entry_name, nb_search_conditions, search_conditions, output_file);
}
}
+
+ return nb_extracted_mails;
}
/*********************************************************************/
char raw_mbox_line[BUFFER_SIZE], full_line[BUFFER_SIZE];
char *end_of_full_line;
FILE *file;
- int in_header, new_header, last_mbox_line_was_empty;
+ int in_header, new_header;
unsigned long int position_in_file;
file = fopen(mbox_filename, "r");
position_in_file = 0;
end_of_full_line = 0;
full_line[0] = '\0';
- last_mbox_line_was_empty = 1;
while(fgets(raw_mbox_line, BUFFER_SIZE, file)) {
- if(is_a_leading_from_line(last_mbox_line_was_empty, raw_mbox_line)) {
+ if(is_a_leading_from_line(raw_mbox_line)) {
if(in_header) {
fprintf(stderr,
"Got a ^\"From \" in the header in %s:%lu.\n",
if(in_header) { in_header = 0; }
}
- last_mbox_line_was_empty = (raw_mbox_line[0] == '\n');
-
if(in_header) {
if(new_header) {
fprintf(db_file, "mail %lu %s\n", position_in_file, mbox_filename);
FILE *output_file;
struct search_condition search_conditions[MAX_NB_SEARCH_CONDITIONS];
- if(regcomp(&leading_from_line_regexp,
- "^From [^ ]* \\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\) \\(Jan\\|Feb\\|Mar\\|Apr\\|May\\|Jun\\|Jul\\|Aug\\|Sep\\|Oct\\|Nov\\|Dec\\) [ 123][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]\n$",
- 0)) {
+ if(regcomp(&leading_from_line_regexp, LEADING_FROM_LINE_REGEXP, 0)) {
fprintf(stderr,
"mymail: Cannot compile leading \"from\" line regexp. That is strange.\n");
exit(EXIT_FAILURE);
db_filename_list = 0;
quiet = 0;
default_search_field = 0;
+ ignore_dot_files = 1;
setlocale(LC_ALL, "");
}
else {
+ int nb_extracted_mails = 0;
if(nb_search_conditions > 0) {
exit(EXIT_FAILURE);
}
- recursive_search_in_db(db_root_path, &db_filename_regexp,
- nb_search_conditions, search_conditions,
- output_file);
+ nb_extracted_mails += recursive_search_in_db(db_root_path, &db_filename_regexp,
+ nb_search_conditions, search_conditions,
+ output_file);
regfree(&db_filename_regexp);
}
s = parse_token(db_filename, PATH_MAX + 1, ';', s);
if(db_filename[0]) {
- search_in_db(db_filename, nb_search_conditions, search_conditions, output_file);
+ nb_extracted_mails +=
+ search_in_db(db_filename, nb_search_conditions, search_conditions, output_file);
}
}
}
/* Search in all db files listed in the command arguments */
while(optind < argc) {
- search_in_db(argv[optind], nb_search_conditions, search_conditions, output_file);
+ nb_extracted_mails +=
+ search_in_db(argv[optind], nb_search_conditions, search_conditions, output_file);
optind++;
}
}
+
+ if(!quiet) {
+ if(nb_extracted_mails > 0) {
+ printf("Found %d matching mails.\n", nb_extracted_mails);
+ } else {
+ printf("No matching mail found.\n");
+ }
+ }
+
}
for(n = 0; n < nb_search_conditions; n++) {