+ qsort(nodes, nb, sizeof(struct file_node *), compare_nodes);
+
+ if(command_to_exec) {
+ exec_command(nb, nodes);
+ } else if(result_file_prefix) {
+ write_groups_in_files(nb, nodes);
+ } else {
+ for(n = 0; n < nb; n++) {
+ first_of_group = (n == 0);
+ if(n > 0 && nodes[n]->group_id != nodes[n-1]->group_id) {
+ if(!show_groups) {
+ printf("\n");
+ }
+ first_of_group = 1;
+ }
+ if(!trim_first || !first_of_group) {
+ write_one_entry_to_file(stdout, nodes[n]);
+ }
+ }
+ }
+
+ free(nodes);
+}
+
+struct progress_state {
+ int bar_width;
+ int nb_values, value;
+ int last_position;
+};
+
+void print_progress(struct progress_state *state) {
+ int position, k, normalizer;
+ struct winsize win;
+ char buffer[PROGRESS_BUFFER_SIZE];
+ char *s;
+
+ normalizer = (state->nb_values > 1 ? state->nb_values - 1 : 1);
+
+ if(show_progress) {
+ /* We use the previous bar_width to compute the position, so that
+ we avoid doing too many ioctls */
+ position = (state->bar_width * state->value) / normalizer;
+ if(state->bar_width <= 0 || position != state->last_position) {
+ if(!ioctl (STDERR_FILENO, TIOCGWINSZ, (char *) &win)) {
+ /* Something weird is going on if the previous test is wrong */
+ if(win.ws_col >= PROGRESS_BUFFER_SIZE - 3) {
+ state->bar_width = PROGRESS_BUFFER_SIZE - 10;
+ } else {
+ state->bar_width = win.ws_col - 7;
+ }
+ position = (state->bar_width * state->value) / normalizer;
+ state->last_position = position;
+ s = buffer;
+ for(k = 0; k < position; k++) {
+ *(s++) = '+';
+ }
+ for(; k < state->bar_width; k++) {
+ *(s++) = '-';
+ }
+
+ /* We need four % because of the fprintf that follows */
+ sprintf(s, " [%3d%%]\r",
+ (100 * state->value) / normalizer);
+
+ fprintf(stderr, "%s", buffer);
+ }
+ }
+ }
+}
+
+void start(const char *dirname1, const char *dirname2) {
+ struct file_node *list1, *list2;
+ struct file_node *node1, *node2;
+ struct progress_state progress_state;
+ int not_in, found;
+ int nb_groups, nb_nodes;
+ int list1_length;
+
+ char *buffer1 = safe_malloc(sizeof(char) * READ_BUFFER_SIZE);
+ char *buffer2 = safe_malloc(sizeof(char) * READ_BUFFER_SIZE);
+
+ not_in = 0;
+
+ list1 = scan_directory(0, dirname1);
+ list1_length = file_list_length(list1);
+
+ if(dirname2) {
+ if(strncmp(dirname2, "not:", 4) == 0) {
+ not_in = 1;
+ /* groups are not computed in the not: mode */
+ show_groups = 0;
+ dirname2 += 4;
+ } else if(strncmp(dirname2, "and:", 4) == 0) {
+ dirname2 += 4;
+ }
+ list2 = scan_directory(0, dirname2);
+ } else {
+ list2 = list1;
+ }
+
+ if(show_progress) {
+ fprintf(stderr,
+ "Now looking for identical files (this may take a while).\n");
+ }
+
+ nb_groups = 0;
+ nb_nodes = 0;
+
+ progress_state.bar_width = -1;
+ progress_state.last_position = -1;
+ progress_state.nb_values = list1_length;
+
+ if(not_in) {
+ for(node1 = list1; node1; node1 = node1->next) {
+ progress_state.value = nb_nodes;
+ print_progress(&progress_state);
+ nb_nodes++;
+
+ found = 0;
+
+ for(node2 = list2; !found && node2; node2 = node2->next) {
+ if(same_files(node1, node2, buffer1, buffer2)) {
+ found = 1;
+ }
+ }
+
+ if(!found) {
+ if(show_realpaths) {
+ printf("%s\n", realpath(node1->name, 0));
+ } else {
+ printf("%s\n", node1->name);
+ }
+ }
+ }
+
+ } else {
+ for(node1 = list1; node1; node1 = node1->next) {
+ progress_state.value = nb_nodes;
+ print_progress(&progress_state);
+ nb_nodes++;
+
+ for(node2 = list2; node2; node2 = node2->next) {
+ if(node1->group_id < 0 || node2->group_id < 0) {
+ if(same_files(node1, node2, buffer1, buffer2)) {
+ if(node1->group_id < 0) {
+ if(node2->group_id >= 0) {
+ node1->group_id = node2->group_id;
+ } else {
+ node1->group_id = nb_groups;
+ node1->dir_id = 1;
+ nb_groups++;
+ }
+ }
+ if(node2->group_id < 0) {
+ node2->group_id = node1->group_id;
+ node2->dir_id = 2;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if(show_progress) {
+ fprintf(stderr, "\n");
+ }
+
+ if(dirname2) {
+ print_result(list1, list2);
+ file_list_delete(list1);
+ file_list_delete(list2);
+ } else {
+ print_result(list1, 0);
+ file_list_delete(list1);
+ }
+
+ free(buffer1);
+ free(buffer2);
+}
+
+void usage(FILE *out) {
+ fprintf(out, "Usage: finddup [OPTION]... [DIR1 [[and:|not:]DIR2]]\n");
+ fprintf(out, "Version %s (%s)\n", VERSION_NUMBER, UNAME);
+ fprintf(out, "Without DIR2, lists duplicated files found in DIR1, or the current directory if DIR1 is not provided. With DIR2, lists files common to both directories. With the not: prefix, lists files found in DIR1 which do not exist in DIR2. The and: prefix is the default and should be used only if you have a directory starting with 'not:'\n");
+ fprintf(out, "\n");
+ /* 01234567890123456789012345678901234567890123456789012345678901234567890123456789*/
+ fprintf(out, " -v, --version prints the version number and exit\n");
+ fprintf(out, " -h, --help show this help\n");
+ fprintf(out, " -d, --ignore-dots ignore dot files and directories\n");
+ fprintf(out, " -0, --ignore-empty ignore empty files\n");
+ fprintf(out, " -c, --hide-matchings do not show which files in DIR2 corresponds to\n");
+ fprintf(out, " those in DIR1\n");
+ fprintf(out, " -g, --no-group-ids do not show the file groups\n");
+ fprintf(out, " -t, --time-sort sort according to modification time in each group\n");
+ fprintf(out, " -q, --trim-first do not show the first file in each group\n");
+ fprintf(out, " -p, --show-progress show progress\n");
+ fprintf(out, " -r, --real-paths show the real file paths\n");
+ fprintf(out, " -i, --same-inodes-are-different\n");
+ fprintf(out, " consider files with same inode as different\n");
+ fprintf(out, " -e <command>, --exec <command>\n");
+ fprintf(out, " execute the provided command for each group of\n");
+ fprintf(out, " identical files, with their names as arguments\n");
+ fprintf(out, " -f <string>, --result-prefix <string>\n");
+ fprintf(out, " for each group of identical files, write one\n");
+ fprintf(out, " result file whose name is the given prefix string\n");
+ fprintf(out, " followed by the group number, and containing\n");
+ fprintf(out, " one filename per line\n");
+ fprintf(out, "\n");
+ fprintf(out, "Report bugs and comments to <francois@fleuret.org>.\n");