projects
/
mymail.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
a9a15a3
)
Starting to convert the ID from strings to integers to speed things up.
author
Francois Fleuret
<francois@fleuret.org>
Thu, 31 Jan 2013 21:47:32 +0000
(22:47 +0100)
committer
Francois Fleuret
<francois@fleuret.org>
Thu, 31 Jan 2013 21:47:32 +0000
(22:47 +0100)
mymail.c
patch
|
blob
|
history
diff --git
a/mymail.c
b/mymail.c
index
313b8d5
..
dcb44bf
100644
(file)
--- a/
mymail.c
+++ b/
mymail.c
@@
-51,8
+51,25
@@
#define BUFFER_SIZE 65536
#define BUFFER_SIZE 65536
+enum {
+ ID_MAIL,
+ ID_FROM,
+ ID_DEST,
+ ID_SUBJECT,
+ ID_FROMDEST,
+ MAX_ID
+};
+
+static char *field_names[] = {
+ "mail",
+ "from",
+ "dest",
+ "subj",
+ "fromdest"
+};
+
struct parsable_field {
struct parsable_field {
-
char *name
;
+
int id
;
char *regexp_string;
regex_t regexp;
};
char *regexp_string;
regex_t regexp;
};
@@
-124,26
+141,32
@@
int ignore_entry(const char *name) {
(name[0] == '.' && name[1] != '/');
}
(name[0] == '.' && name[1] != '/');
}
+int mbox_line_match_search(int search_id, regex_t *search_regexp,
+ int mbox_id, char *mbox_value) {
+ return search_id == mbox_id && regexec(search_regexp, mbox_value, 0, 0, 0) == 0;
+}
+
void search_in_db(int nb_search_patterns,
void search_in_db(int nb_search_patterns,
-
char **search_name, char **search_regexp_string
,
+
int *search_ids, char **search_regexp_strings
,
FILE *db_file) {
int hits[MAX_NB_SEARCH_PATTERNS];
char raw_db_line[BUFFER_SIZE];
char raw_mbox_line[BUFFER_SIZE];
char current_mail_filename[PATH_MAX + 1];
unsigned long int current_position_in_mail;
FILE *db_file) {
int hits[MAX_NB_SEARCH_PATTERNS];
char raw_db_line[BUFFER_SIZE];
char raw_mbox_line[BUFFER_SIZE];
char current_mail_filename[PATH_MAX + 1];
unsigned long int current_position_in_mail;
- char *name, *value;
- regex_t regexp[MAX_NB_SEARCH_PATTERNS];
- int already_written, n;
+ char *mbox_name, *mbox_value;
+ int mbox_id;
+ regex_t search_regexps[MAX_NB_SEARCH_PATTERNS];
+ int already_written, m, n;
for(n = 0; n < nb_search_patterns; n++) {
for(n = 0; n < nb_search_patterns; n++) {
- if(regcomp(&
regexp
[n],
- search_regexp_string[n],
+ if(regcomp(&
search_regexps
[n],
+ search_regexp_string
s
[n],
REG_ICASE)) {
fprintf(stderr,
"mymail: Syntax error in regexp \"%s\" for field \"%s\".\n",
REG_ICASE)) {
fprintf(stderr,
"mymail: Syntax error in regexp \"%s\" for field \"%s\".\n",
- search_regexp_string[n],
-
search_name[n
]);
+ search_regexp_string
s
[n],
+
field_names[search_ids[n]
]);
exit(EXIT_FAILURE);
}
}
exit(EXIT_FAILURE);
}
}
@@
-154,10
+177,10
@@
void search_in_db(int nb_search_patterns,
for(n = 0; n < nb_search_patterns; n++) { hits[n] = 0; }
while(fgets(raw_db_line, BUFFER_SIZE, db_file)) {
for(n = 0; n < nb_search_patterns; n++) { hits[n] = 0; }
while(fgets(raw_db_line, BUFFER_SIZE, db_file)) {
- name = raw_db_line;
- value = segment_next_field(raw_db_line);
+
mbox_
name = raw_db_line;
+
mbox_
value = segment_next_field(raw_db_line);
- if(strcmp("mail", name) == 0) {
+ if(strcmp("mail",
mbox_
name) == 0) {
char *position_in_file_string;
char *mail_filename;
char *position_in_file_string;
char *mail_filename;
@@
-165,12
+188,16
@@
void search_in_db(int nb_search_patterns,
if(n == nb_search_patterns) {
FILE *mail_file;
if(n == nb_search_patterns) {
FILE *mail_file;
+
mail_file = fopen(current_mail_filename, "r");
mail_file = fopen(current_mail_filename, "r");
+
if(!mail_file) {
fprintf(stderr, "mymail: Cannot open mbox '%s'.\n", current_mail_filename);
exit(EXIT_FAILURE);
}
if(!mail_file) {
fprintf(stderr, "mymail: Cannot open mbox '%s'.\n", current_mail_filename);
exit(EXIT_FAILURE);
}
+
fseek(mail_file, current_position_in_mail, SEEK_SET);
fseek(mail_file, current_position_in_mail, SEEK_SET);
+
if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) {
printf("%s", raw_mbox_line);
while(fgets(raw_mbox_line, BUFFER_SIZE, mail_file) &&
if(fgets(raw_mbox_line, BUFFER_SIZE, mail_file)) {
printf("%s", raw_mbox_line);
while(fgets(raw_mbox_line, BUFFER_SIZE, mail_file) &&
@@
-178,13
+205,14
@@
void search_in_db(int nb_search_patterns,
printf("%s", raw_mbox_line);
}
}
printf("%s", raw_mbox_line);
}
}
+
fclose(mail_file);
}
for(n = 0; n < nb_search_patterns; n++) { hits[n] = 0; }
fclose(mail_file);
}
for(n = 0; n < nb_search_patterns; n++) { hits[n] = 0; }
- position_in_file_string = value;
- mail_filename = segment_next_field(value);
+ position_in_file_string =
mbox_
value;
+ mail_filename = segment_next_field(
mbox_
value);
current_position_in_mail = atol(position_in_file_string);
strcpy(current_mail_filename, mail_filename);
current_position_in_mail = atol(position_in_file_string);
strcpy(current_mail_filename, mail_filename);
@@
-193,22
+221,27
@@
void search_in_db(int nb_search_patterns,
}
else {
}
else {
+ mbox_id = -1;
+ for(m = 0; (m < MAX_ID) && mbox_id == -1; m++) {
+ if(strncmp(field_names[m], mbox_name, strlen(mbox_name)) == 0) {
+ mbox_id = m;
+ }
+ }
for(n = 0; n < nb_search_patterns; n++) {
for(n = 0; n < nb_search_patterns; n++) {
- hits[n] |=
- (strncmp(search_name[n], name, strlen(search_name[n])) == 0 &&
- regexec(®exp[n], value, 0, 0, 0) == 0);
+ hits[n] |= mbox_line_match_search(search_ids[n], &search_regexps[n],
+ mbox_id, mbox_value);
}
}
}
for(n = 0; n < nb_search_patterns; n++) {
}
}
}
for(n = 0; n < nb_search_patterns; n++) {
- regfree(&
regexp
[n]);
+ regfree(&
search_regexps
[n]);
}
}
void recursive_search_in_db(const char *entry_name,
int nb_search_patterns,
}
}
void recursive_search_in_db(const char *entry_name,
int nb_search_patterns,
-
char **search_name, char **search_regexp_string
) {
+
int *search_ids, char **search_regexp_strings
) {
DIR *dir;
struct dirent *dir_e;
struct stat sb;
DIR *dir;
struct dirent *dir_e;
struct stat sb;
@@
-231,7
+264,7
@@
void recursive_search_in_db(const char *entry_name,
snprintf(subname, PATH_MAX, "%s/%s", entry_name, dir_e->d_name);
recursive_search_in_db(subname,
nb_search_patterns,
snprintf(subname, PATH_MAX, "%s/%s", entry_name, dir_e->d_name);
recursive_search_in_db(subname,
nb_search_patterns,
- search_
name, search_regexp_string
);
+ search_
ids, search_regexp_strings
);
}
}
closedir(dir);
}
}
closedir(dir);
@@
-264,7
+297,7
@@
void recursive_search_in_db(const char *entry_name,
exit(EXIT_FAILURE);
}
exit(EXIT_FAILURE);
}
- search_in_db(nb_search_patterns, search_
name, search_regexp_string
,
+ search_in_db(nb_search_patterns, search_
ids, search_regexp_strings
,
db_file);
fclose(db_file);
db_file);
fclose(db_file);
@@
-281,7
+314,7
@@
void index_one_mbox_line(int nb_fields_to_parse, struct parsable_field *fields_t
for(f = 0; f < nb_fields_to_parse; f++) {
if(regexec(&fields_to_parse[f].regexp, raw_mbox_line, 1, &matches, 0) == 0) {
fprintf(db_file, "%s %s\n",
for(f = 0; f < nb_fields_to_parse; f++) {
if(regexec(&fields_to_parse[f].regexp, raw_mbox_line, 1, &matches, 0) == 0) {
fprintf(db_file, "%s %s\n",
- field
s_to_parse[f].name
,
+ field
_names[fields_to_parse[f].id]
,
raw_mbox_line + matches.rm_eo);
}
}
raw_mbox_line + matches.rm_eo);
}
}
@@
-344,13
+377,15
@@
void index_mbox(const char *mbox_filename,
}
else {
}
else {
- /* if(!((raw_mbox_line[0] >= 'a' && raw_mbox_line[0] <= 'z') || */
- /* (raw_mbox_line[0] >= 'A' && raw_mbox_line[0] <= 'Z'))) { */
- /* fprintf(stderr, */
- /* "Header line syntax error %s:%lu.\n", */
- /* mbox_filename, position_in_file); */
- /* fprintf(stderr, "%s", raw_mbox_line); */
- /* } */
+ /*
+ if(!((raw_mbox_line[0] >= 'a' && raw_mbox_line[0] <= 'z') ||
+ (raw_mbox_line[0] >= 'A' && raw_mbox_line[0] <= 'Z'))) {
+ fprintf(stderr,
+ "Header line syntax error %s:%lu.\n",
+ mbox_filename, position_in_file);
+ fprintf(stderr, "%s", raw_mbox_line);
+ }
+ */
if(full_line[0]) {
index_one_mbox_line(nb_fields_to_parse, fields_to_parse, full_line, db_file);
if(full_line[0]) {
index_one_mbox_line(nb_fields_to_parse, fields_to_parse, full_line, db_file);
@@
-423,19
+458,19
@@
static struct option long_options[] = {
static struct parsable_field fields_to_parse[] = {
{
static struct parsable_field fields_to_parse[] = {
{
-
"from"
,
+
ID_FROM
,
"^\\([Ff][Rr][Oo][Mm]:\\|From\\) *",
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
},
{
"^\\([Ff][Rr][Oo][Mm]:\\|From\\) *",
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
},
{
-
"dest"
,
+
ID_DEST
,
"^\\([Tt][Oo]\\|[Cc][Cc]\\|[Bb][Cc][Cc]\\): *",
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
},
{
"^\\([Tt][Oo]\\|[Cc][Cc]\\|[Bb][Cc][Cc]\\): *",
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
},
{
-
"subj"
,
+
ID_SUBJECT
,
"^[Ss][Uu][Bb][Jj][Ee][Cc][Tt]: *",
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
},
"^[Ss][Uu][Bb][Jj][Ee][Cc][Tt]: *",
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
},
@@
-559,7
+594,7
@@
int main(int argc, char **argv) {
fprintf(stderr,
"mymail: Syntax error in regexp \"%s\" for field \"%s\".\n",
fields_to_parse[f].regexp_string,
fprintf(stderr,
"mymail: Syntax error in regexp \"%s\" for field \"%s\".\n",
fields_to_parse[f].regexp_string,
- field
s_to_parse[f].name
);
+ field
_names[fields_to_parse[f].id]
);
exit(EXIT_FAILURE);
}
}
exit(EXIT_FAILURE);
}
}
@@
-583,23
+618,28
@@
int main(int argc, char **argv) {
else {
if(nb_search_patterns > 0) {
else {
if(nb_search_patterns > 0) {
-
char *search_name
[MAX_NB_SEARCH_PATTERNS];
- char *search_regexp_string[MAX_NB_SEARCH_PATTERNS];
- int n;
+
int search_ids
[MAX_NB_SEARCH_PATTERNS];
+ char *search_regexp_string
s
[MAX_NB_SEARCH_PATTERNS];
+ int
m,
n;
for(n = 0; n < nb_search_patterns; n++) {
for(n = 0; n < nb_search_patterns; n++) {
- search_name[n] = search_pattern[n];
- search_regexp_string[n] = segment_next_field(search_pattern[n]);
+ search_regexp_strings[n] = segment_next_field(search_pattern[n]);
+ search_ids[n] = -1;
+ for(m = 0; (m < MAX_ID) && search_ids[n] == -1; m++) {
+ if(strncmp(field_names[m], search_pattern[n], strlen(search_pattern[n])) == 0) {
+ search_ids[n] = m;
+ }
+ }
}
}
- if(!*search_regexp_string) {
+ if(!*search_regexp_string
s
) {
fprintf(stderr,
"Syntax error in the search pattern.\n");
exit(EXIT_FAILURE);
}
recursive_search_in_db(db_root_path,
fprintf(stderr,
"Syntax error in the search pattern.\n");
exit(EXIT_FAILURE);
}
recursive_search_in_db(db_root_path,
- nb_search_patterns, search_
name, search_regexp_string
);
+ nb_search_patterns, search_
ids, search_regexp_strings
);
for(n = 0; n < nb_search_patterns; n++) {
free(search_pattern[n]);
for(n = 0; n < nb_search_patterns; n++) {
free(search_pattern[n]);