+ qsort(nodes, nb, sizeof(struct file_node *), compare_nodes);
+
+ for(n = 0; n < nb; n++) {
+ if(!show_groups && n > 0 && nodes[n]->group_id != nodes[n-1]->group_id) {
+ printf("\n");
+ }
+ print_file(nodes[n]);
+ }
+
+ free(nodes);
+}
+
+struct progress_state {
+ int bar_width;
+ int nb_values, value;
+ int last_position;
+};
+
+void print_progress(struct progress_state *state) {
+ int position, k;
+ struct winsize win;
+ char buffer[PROGRESS_BUFFER_SIZE];
+ char *s;
+
+ if(show_progress) {
+ /* We use the previous bar_width to compute the position, so that
+ we avoid doing too many ioctls */
+ position = (state->bar_width * state->value) / (state->nb_values - 1);
+ if(state->bar_width <= 0 || position != state->last_position) {
+ if(!ioctl (STDERR_FILENO, TIOCGWINSZ, (char *) &win)) {
+ /* Something weird is going on if the previous test is wrong */
+ if(win.ws_col >= PROGRESS_BUFFER_SIZE) {
+ state->bar_width = PROGRESS_BUFFER_SIZE - 8;
+ } else {
+ state->bar_width = win.ws_col - 7;
+ }
+ position = (state->bar_width * state->value) / (state->nb_values - 1);
+ state->last_position = position;
+ s = buffer;
+ for(k = 0; k < position; k++) {
+ *(s++) = '+';
+ }
+ for(; k < state->bar_width; k++) {
+ *(s++) = '-';
+ }
+ sprintf(s, " [%3d%%]\r",
+ (100 * state->value) / (state->nb_values - 1));
+
+ fprintf(stderr, buffer);
+ }
+ }
+ }
+}
+
+void start(const char *dirname1, const char *dirname2) {
+ struct file_node *list1, *list2;
+ struct file_node *node1, *node2;
+ struct progress_state progress_state;
+ int not_in, found;
+ int nb_groups, nb_nodes;
+ int list1_length, list2_length, previous_progress;
+
+ char *buffer1 = safe_malloc(sizeof(char) * READ_BUFFER_SIZE);
+ char *buffer2 = safe_malloc(sizeof(char) * READ_BUFFER_SIZE);
+
+ not_in = 0;
+
+ if(show_progress) {
+ fprintf(stderr, "Scanning %s ... ", dirname1);
+ }
+
+ list1 = scan_directory(0, dirname1);
+
+ list1_length = file_list_length(list1);
+
+ if(dirname2) {
+ if(strncmp(dirname2, "not:", 4) == 0) {
+ not_in = 1;
+ /* groups are not computed in the not: mode */
+ show_groups = 0;
+ dirname2 += 4;
+ } else if(strncmp(dirname2, "and:", 4) == 0) {
+ dirname2 += 4;
+ }
+ if(show_progress) {
+ fprintf(stderr, "%s ... ", dirname2);
+ }
+ list2 = scan_directory(0, dirname2);
+ } else {
+ list2 = list1;
+ }
+
+ if(show_progress) {
+ fprintf(stderr, "done.\n");
+ fprintf(stderr,
+ "%s: %d file%s.\n",
+ dirname1, list1_length, (list1_length > 1 ? "s" : ""));
+ if(dirname2) {
+ list2_length = file_list_length(list2);
+ fprintf(stderr,
+ "%s: %d file%s.\n",
+ dirname2, list2_length, (list2_length > 1 ? "s" : ""));
+ }
+ fprintf(stderr, "Now looking for identical files.\n");
+ }
+
+ nb_groups = 0;
+ previous_progress = -1;
+ nb_nodes = 0;
+
+ progress_state.bar_width = -1;
+ progress_state.last_position = -1;
+ progress_state.nb_values = list1_length;
+
+ if(not_in) {
+ for(node1 = list1; node1; node1 = node1->next) {
+ progress_state.value = nb_nodes;
+ print_progress(&progress_state);
+ nb_nodes++;
+
+ found = 0;
+
+ for(node2 = list2; !found && node2; node2 = node2->next) {
+ if(same_files(node1, node2, buffer1, buffer2)) {
+ found = 1;
+ }
+ }
+
+ if(!found) {
+ if(show_realpaths) {
+ printf("%s\n", realpath(node1->name, 0));
+ } else {
+ printf("%s\n", node1->name);
+ }
+ }
+ }
+
+ } else {
+ for(node1 = list1; node1; node1 = node1->next) {
+ progress_state.value = nb_nodes;
+ print_progress(&progress_state);
+ nb_nodes++;
+
+ for(node2 = list2; node2; node2 = node2->next) {
+ if(node1->group_id < 0 || node2->group_id < 0) {
+ if(same_files(node1, node2, buffer1, buffer2)) {
+ if(node1->group_id < 0) {
+ if(node2->group_id >= 0) {
+ node1->group_id = node2->group_id;
+ } else {
+ node1->group_id = nb_groups;
+ node1->dir_id = 1;
+ nb_groups++;
+ }
+ }
+ if(node2->group_id < 0) {
+ node2->group_id = node1->group_id;
+ node2->dir_id = 2;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if(show_progress) {
+ fprintf(stderr, "\n");
+ }
+
+ if(dirname2) {
+ print_result(list1, list2);
+ file_list_delete(list1);
+ file_list_delete(list2);
+ } else {
+ print_result(list1, 0);
+ file_list_delete(list1);
+ }
+
+ free(buffer1);
+ free(buffer2);
+}
+
+void usage(FILE *out) {
+ fprintf(out, "Usage: finddup [OPTION]... DIR1 [[and:|not:]DIR2]\n");
+ fprintf(out, "Version %s (%s)\n", VERSION_NUMBER, UNAME);
+ fprintf(out, "Without DIR2, lists duplicated files found in DIR1. With DIR2, lists files common to both directories. With the not: prefix, lists files found in DIR1 which do not exist in DIR2. The and: prefix is the default and should be used only if you have a directory starting with 'not:'\n");
+ fprintf(out, "\n");
+ /* 01234567890123456789012345678901234567890123456789012345678901234567890123456789*/
+ fprintf(out, " -h, --help show this help\n");
+ fprintf(out, " -d, --ignore-dots ignore dot files and directories\n");
+ fprintf(out, " -0, --ignore-empty ignore empty files\n");
+ fprintf(out, " -c, --hide-matchings do not show which files in DIR2 corresponds to\n");
+ fprintf(out, " those in DIR1\n");
+ fprintf(out, " -g, --no-group-ids do not show the file groups\n");
+ fprintf(out, " -p, --show-progress show progress\n");
+ fprintf(out, " -r, --real-paths show the real file paths\n");
+ fprintf(out, " -i, --same-inodes-are-different\n");
+ fprintf(out, " consider files with same inode as different\n");
+#ifdef WITH_MD5
+ fprintf(out, " -m, --md5 use MD5 hashing\n");
+#endif
+ fprintf(out, "\n");
+ fprintf(out, "Report bugs and comments to <francois@fleuret.org>.\n");