/*
- This command is a dumb mail indexer. It can either (1) scan
- directories containing mbox files, and create a db file containing
- for each mail a list of fields computed from the header, or (2)
- read such a db file and get all the mails matching regexp-defined
- conditions on the fields, to create a resulting mbox file.
+ mymail is a simple mail indexer. It can:
+
+ (1) scan mbox files, and create a db file containing for each mail a
+ list of fields computed from its header.
+
+ (2) read such a db file, gets all the mails matching regexp-defined
+ conditions on the fields, and generates a resulting mbox file.
It is low-tech, simple, light and fast.
#include <stdio.h>
#include <stdlib.h>
+#include <sys/stat.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <time.h>
#define MYMAIL_DB_MAGIC_TOKEN "mymail_index_file"
-#define MYMAIL_VERSION "0.9.9"
+#define MYMAIL_VERSION "0.9.10"
#define MYMAIL_DB_FORMAT_VERSION 1
int global_quiet;
int global_use_leading_time;
int global_nb_mails_max;
+int global_discard_mail_from_the_future;
regex_t global_leading_from_line_regexp;
struct alias_node *global_alias_list;
+time_t global_current_time;
/********************************************************************/
ID_PARTICIPANT,
ID_BODY,
ID_TIME_INTERVAL,
+ ID_MAIL_ID,
+ ID_REFERENCE_ID,
+ ID_THREAD_ID,
MAX_ID
};
"date",
"part",
"body",
- "interval"
+ "interval",
+ "mailid",
+ "reference",
+ "thread"
};
/********************************************************************/
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
},
+ {
+ ID_MAIL_ID,
+ REG_ICASE,
+ "^message-id: ",
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+ },
+
+ {
+ ID_REFERENCE_ID,
+ REG_ICASE,
+ "^\\(in-reply-to\\|references\\): ",
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+ },
+
};
/********************************************************************/
fprintf(out, " -t, --use-leading-time\n");
fprintf(out, " use the time stamp from the leading line of each mail and not the Date:\n");
fprintf(out, " field\n");
+ fprintf(out, " -f, --do-not-discard-mails-from-the-future\n");
+ fprintf(out, " do not ignore mails with a date more than 24h in the future\n");
fprintf(out, " -p <db filename pattern>, --db-pattern <db filename pattern>\n");
fprintf(out, " set the db filename pattern for recursive search\n");
fprintf(out, " -r <db root path>, --db-root <db root path>\n");
fprintf(out, " set the maximum number of mails to extract\n");
fprintf(out, " -a <search field>, --default-search <search field>\n");
fprintf(out, " set the default search field\n");
+
}
/*********************************************************************/
(condition->db_key == ID_FROM && db_key == ID_LEADING_LINE)
+ ||
+
+ (condition->db_key == ID_THREAD_ID && (db_key == ID_MAIL_ID ||
+ db_key == ID_REFERENCE_ID))
)
&&
/* printf("Extract\n"); */
mail_file = safe_fopen(mail_filename, "r", "mbox for mail extraction");
+ /* fchmod(fileno(mail_file), 0x660); */
fseek(mail_file, position_in_mail, SEEK_SET);
if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) {
if(db_key == ID_LEADING_LINE) {
c = db_value;
- while(*c && *c != ' ') c++; while(*c && *c == ' ') c++;
+ while(*c && *c != ' ') c++;
+ while(*c && *c == ' ') c++;
/* printf("From %s", db_value); */
strptime(c, "%a %b %e %k:%M:%S %Y", &tm);
*t = mktime(&tm);
for(n = 0; n < nb_search_conditions; n++) { hits[n] = 0; }
nb_body_conditions = 0;
- need_time = 0;
+ need_time = global_discard_mail_from_the_future;
mail_time = 0;
for(n = 0; n < nb_search_conditions; n++) {
while(nb_extracted_mails < global_nb_mails_max &&
fgets(raw_db_line, BUFFER_SIZE, db_file)) {
+
+ /* Removes the CR */
+ char *s = raw_db_line;
+ while(*s && *s != '\n') { s++; }
+ *s = '\0';
+
db_value = parse_token(db_key_string, TOKEN_BUFFER_SIZE, ' ', raw_db_line);
if(strcmp("mail", db_key_string) == 0) {
- if(current_mail_filename[0]) {
- if(check_full_mail_match(current_mail_filename,
- mail_time,
- nb_search_conditions, search_conditions,
- nb_body_conditions, hits, current_position_in_mail)) {
- extract_mail(current_mail_filename, current_position_in_mail, output_file);
- nb_extracted_mails++;
- }
+ if(current_mail_filename[0] &&
+ (!global_discard_mail_from_the_future || mail_time < global_current_time + 3600 * 24) &&
+ check_full_mail_match(current_mail_filename,
+ mail_time,
+ nb_search_conditions, search_conditions,
+ nb_body_conditions, hits, current_position_in_mail)) {
+ extract_mail(current_mail_filename, current_position_in_mail, output_file);
+ nb_extracted_mails++;
}
for(n = 0; n < nb_search_conditions; n++) { hits[n] = 0; }
db_value = parse_token(position_in_file_string, TOKEN_BUFFER_SIZE, ' ', db_value);
- db_value = parse_token(current_mail_filename, PATH_MAX+1, '\n', db_value);
+ strncpy(current_mail_filename, db_value, PATH_MAX + 1);
current_position_in_mail = atol(position_in_file_string);
}
}
}
+ /* printf("global_discard_mail_from_the_future = %d\n",
+ global_discard_mail_from_the_future); */
+
if(nb_extracted_mails < global_nb_mails_max &&
current_mail_filename[0] &&
+ (!global_discard_mail_from_the_future || mail_time < global_current_time + 3600 * 24) &&
check_full_mail_match(current_mail_filename,
mail_time,
nb_search_conditions, search_conditions,
{ "version", no_argument, 0, 'v' },
{ "quiet", no_argument, 0, 'q' },
{ "use-leading-time", no_argument, 0, 't' },
+ { "do-not-discard-mails-from-the-future", no_argument, 0, 'f' },
{ "db-file-output", 1, 0, 'd' },
{ "db-pattern", 1, 0, 'p' },
{ "db-root", 1, 0, 'r' },
static struct time_criterion time_criteria[] = {
+ { "1h", 0, 1, -1, -1 },
+ { "2h", 0, 2, -1, -1 },
+ { "4h", 0, 4, -1, -1 },
{ "8h", 0, 8, -1, -1 },
{ "24h", 0, 24, -1, -1 },
{ "48h", 0, 48, -1, -1 },
{ "week", 0, 24 * 7, -1, -1 },
+ { "2weeks", 0, 24 * 14, -1, -1 },
{ "month", 0, 24 * 31, -1, -1 },
+ { "semester", 0, 24 * 185, -1, -1 },
{ "trimester", 0, 24 * 92, -1, -1 },
{ "year", 0, 24 * 365, -1, -1 },
/*********************************************************************/
time_t time_for_past_day(int day) {
- time_t t;
struct tm *tm;
int delta_day;
- t = time(0);
- tm = localtime(&t);
+ tm = localtime(&global_current_time);
if(day > 0) {
delta_day = (7 + tm->tm_wday - day) % 7;
} else {
delta_day = - day;
}
- return t - (delta_day * 3600 * 24 + tm->tm_sec + 60 * tm->tm_min + 3600 * tm->tm_hour);
+ return global_current_time - (delta_day * 3600 * 24 + tm->tm_sec + 60 * tm->tm_min + 3600 * tm->tm_hour);
}
void init_condition(struct search_condition *condition, const char *full_string,
condition->time_start = time_for_past_day(time_criteria[k].past_week_day);
condition->time_stop = condition->time_start + 3600 * 24;
} else {
- condition->time_start = time(0) - 3600 * time_criteria[k].start_hour;
+ condition->time_start = global_current_time - 3600 * time_criteria[k].start_hour;
if(time_criteria[k].end_hour >= 0) {
- condition->time_stop = time(0) - 3600 * time_criteria[k].end_hour;
+ condition->time_stop = global_current_time - 3600 * time_criteria[k].end_hour;
} else {
condition->time_stop = 0;
}
struct search_condition search_conditions[MAX_NB_SEARCH_CONDITIONS];
struct alias_node *a, *b;
+ /* Group and others have no access */
+ umask(S_IRWXG | S_IRWXO);
+
if(regcomp(&global_leading_from_line_regexp, LEADING_FROM_LINE_REGEXP_STRING, 0)) {
fprintf(stderr,
"mymail: Cannot compile leading \"from\" line regexp. That is strange.\n");
global_quiet = 0;
global_use_leading_time = 0;
global_nb_mails_max = 250;
+ global_discard_mail_from_the_future = 1;
+ global_current_time = time(0);
default_search_field = 0;
strncpy(output_filename, "", PATH_MAX);
nb_search_conditions = 0;
- while ((c = getopt_long(argc, argv, "hvqip:s:d:r:l:o:a:m:",
+ while ((c = getopt_long(argc, argv, "hvqtfip:s:d:r:l:o:a:m:",
long_options, NULL)) != -1) {
switch(c) {
global_use_leading_time = 1;
break;
+ case 'f':
+ global_discard_mail_from_the_future = 0;
+ break;
+
case 'i':
action_index = 1;
break;
}
}
+ if(error) {
+ print_usage(stderr);
+ exit(EXIT_FAILURE);
+ }
+
+ if(show_help) {
+ print_usage(stdout);
+ exit(EXIT_SUCCESS);
+ }
+
/* Set all the values that may defined in the arguments, through
environment variables, or hard-coded */
"MYMAIL_MBOX_PATTERN",
0);
- /* Start the processing */
-
- if(error) {
- print_usage(stderr);
- exit(EXIT_FAILURE);
- }
-
- if(show_help) {
- print_usage(stdout);
- exit(EXIT_SUCCESS);
- }
-
/* mbox indexing */
if(action_index) {