From 7b78746dc81a4654942ae17a4604d7443fbde929 Mon Sep 17 00:00:00 2001 From: Francois Fleuret Date: Sat, 14 Mar 2009 22:59:40 +0100 Subject: [PATCH] Added a routine, now commented out, to remove all duplicates (not only successive ones). It is too slow as-is. --- selector.cc | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/selector.cc b/selector.cc index 2a6a4f3..e119fc6 100644 --- a/selector.cc +++ b/selector.cc @@ -459,6 +459,28 @@ int main(int argc, char **argv) { while(*s == ' ' || (*s >= '0' && *s <= '9')) s++; } + /* + + // This is supposed to remove any duplicates, not only + // successive ones. However, it is O(N^2), we should use + // hash-codes + + int keep = 1; + + if(remove_duplicates) { + for(int k = 0; keep && k < nb_lines; k++) { + keep &= strcmp(lines[k], s); + } + } + + if(keep) { + lines[nb_lines] = new char[strlen(s) + 1]; + strcpy(lines[nb_lines], s); + nb_lines++; + } + + */ + if(!remove_duplicates || nb_lines == 0 || strcmp(lines[nb_lines - 1], s)) { lines[nb_lines] = new char[strlen(s) + 1]; strcpy(lines[nb_lines], s); -- 2.39.5