From a28a1c658978082b6e3f08d46e8396979fb3fad4 Mon Sep 17 00:00:00 2001 From: Thomas Oster Date: Tue, 7 Jan 2020 00:15:52 +0100 Subject: [PATCH 1/2] Added a -progress option to display the progress during elimination --- Rdutil.cc | 18 ++++++++++++++++-- Rdutil.hh | 5 ++++- rdfind.1 | 17 ++++++++++------- rdfind.cc | 6 +++++- 4 files changed, 35 insertions(+), 11 deletions(-) diff --git a/Rdutil.cc b/Rdutil.cc index f098d2c..891b320 100644 --- a/Rdutil.cc +++ b/Rdutil.cc @@ -538,18 +538,32 @@ Rdutil::saveablespace(std::ostream& out) const int Rdutil::fillwithbytes(enum Fileinfo::readtobuffermode type, enum Fileinfo::readtobuffermode lasttype, - const long nsecsleep) + const long nsecsleep, + std::ostream* out) { // first sort on inode (to read efficiently from the hard drive) sortOnDeviceAndInode(); const auto duration = std::chrono::nanoseconds{ nsecsleep }; - + const auto size = m_list.size(); + std::size_t count = 0; + std::size_t percent = 0; + if (out) { + (*out) << std::endl << "0 %\r"; + } for (auto& elem : m_list) { elem.fillwithbytes(type, lasttype); + if (out && (100*(++count))/size != percent) { + percent = (100*(count))/size; + (*out) << percent << " %\r"; + out->flush(); + } if (nsecsleep > 0) { std::this_thread::sleep_for(duration); } } + if (out) { + (*out) << std::endl; + } return 0; } diff --git a/Rdutil.hh b/Rdutil.hh index b39e2e9..fb94928 100644 --- a/Rdutil.hh +++ b/Rdutil.hh @@ -85,10 +85,13 @@ public: // and file is read anyway. // if there is trouble with too much disk reading, sleeping for nsecsleep // nanoseconds can be made between each file. + // if out is provided, the progress in % will be written to it (overwriting + // each line with \r ) int fillwithbytes(enum Fileinfo::readtobuffermode type, enum Fileinfo::readtobuffermode lasttype = Fileinfo::readtobuffermode::NOT_DEFINED, - long nsecsleep = 0); + long nsecsleep = 0, + std::ostream* out = nullptr); /// make symlinks of duplicates. std::size_t makesymlinks(bool dryrun) const; diff --git a/rdfind.1 b/rdfind.1 index d390370..47b1da8 100644 --- a/rdfind.1 +++ b/rdfind.1 @@ -7,7 +7,7 @@ .SH NAME rdfind \- finds duplicate files .SH SYNOPSIS -.B rdfind [ options ] +.B rdfind [ options ] .I directory1 | file1 .B [ .I directory2 | file2 @@ -16,7 +16,7 @@ rdfind \- finds duplicate files .B rdfind finds duplicate files across and/or within several directories. It calculates checksum only if necessary. -rdfind runs in O(Nlog(N)) time with N being the number of files. +rdfind runs in O(Nlog(N)) time with N being the number of files. If two (or more) equal files are found, the program decides which of them is the original and the rest are considered duplicates. This @@ -24,14 +24,14 @@ is done by ranking the files to each other and deciding which has the highest rank. See section RANKING for details. By default, no action is taken besides creating a file with the -detected files and showing the possible amount of saved space. +detected files and showing the possible amount of saved space. If you need better control over the ranking than given, you can use some preprocessor which sorts the file names in desired order and then run the program using xargs. See examples below for how to use find and xargs in conjunction with rdfind. -To include files or directories that have names starting with -, use +To include files or directories that have names starting with -, use rdfind ./- to not confuse them with options. .SH RANKING @@ -39,7 +39,7 @@ Given two or more equal files, the one with the highest rank is selected to be the original and the rest are duplicates. The rules of ranking are given below, where the rules are executed from start until an original has been found. Given two files A and B which have equal -size and content, the ranking is as follows: +size and content, the ranking is as follows: If A was found while scanning an input argument earlier than than B, A is higher ranked. @@ -109,12 +109,15 @@ General options: .BR \-sleep " " \fIX\fRms Sleeps X milliseconds between reading each file, to reduce load. Default is 0 (no sleep). Note that only a few values are -supported at present: 0,1-5,10,25,50,100 milliseconds. +supported at present: 0,1-5,10,25,50,100 milliseconds. .TP .BR \-n ", " \-dryrun " " \fItrue\fR|\fIfalse\fR Displays what should have been done, don't actually delete or link anything. Default is false. .TP +.BR \-progress " " \fItrue\fR|\fIfalse\fR +Show progress during elimination. Defaults to false. +.TP .BR \-h ", " \-help ", " \-\-help Displays a brief help message. .TP @@ -145,7 +148,7 @@ DUPTYPE_WITHIN_SAME_TREE files in the same tree (found when processing the directory in the same input argument as the original) DUPTYPE_OUTSIDE_TREE the file is found during processing another input -argument than the original. +argument than the original. .SH ENVIRONMENT .SH DIAGNOSTICS .SH EXIT VALUES diff --git a/rdfind.cc b/rdfind.cc index facdda7..fa1fdff 100644 --- a/rdfind.cc +++ b/rdfind.cc @@ -71,6 +71,7 @@ usage() << " -makeresultsfile (true)| false makes a results file\n" << " -outputname name sets the results file name to \"name\" " "(default results.txt)\n" + << " -progress true |(false) show progress\n" << " -deleteduplicates true |(false) delete duplicate files\n" << " -sleep Xms sleep for X milliseconds between " "file reads.\n" @@ -108,6 +109,7 @@ struct Options bool deterministic = true; // be independent of filesystem order long nsecsleep = 0; // number of nanoseconds to sleep between each file read. std::string resultsfile = "results.txt"; // results file name. + bool show_progress = false; //show progress }; Options @@ -134,6 +136,8 @@ parseOptions(Parser& parser) o.makehardlinks = parser.get_parsed_bool(); } else if (parser.try_parse_bool("-makeresultsfile")) { o.makeresultsfile = parser.get_parsed_bool(); + } else if (parser.try_parse_bool("-progress")) { + o.show_progress = parser.get_parsed_bool(); } else if (parser.try_parse_string("-outputname")) { o.resultsfile = parser.get_parsed_string(); } else if (parser.try_parse_bool("-ignoreempty")) { @@ -373,7 +377,7 @@ main(int narg, const char* argv[]) << it->second << ": " << std::flush; // read bytes (destroys the sorting, for disk reading efficiency) - gswd.fillwithbytes(it[0].first, it[-1].first, o.nsecsleep); + gswd.fillwithbytes(it[0].first, it[-1].first, o.nsecsleep, o.show_progress ? &std::cout : nullptr); // remove non-duplicates std::cout << "removed " << gswd.removeUniqSizeAndBuffer() From 92d3b0d3851a49c61ccda67be3b11cc965f30bf9 Mon Sep 17 00:00:00 2001 From: Thomas Oster Date: Tue, 21 Jan 2020 23:32:42 +0100 Subject: [PATCH 2/2] Enhancement: Output progress in percent with two decimal places and flush after outputting 0% --- Rdutil.cc | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Rdutil.cc b/Rdutil.cc index 891b320..d8baedb 100644 --- a/Rdutil.cc +++ b/Rdutil.cc @@ -14,6 +14,7 @@ #include #include //for file writing #include //for std::cerr +#include //for number output #include //for output #include //for easier passing of string arguments #include //sleep @@ -547,17 +548,20 @@ Rdutil::fillwithbytes(enum Fileinfo::readtobuffermode type, const auto duration = std::chrono::nanoseconds{ nsecsleep }; const auto size = m_list.size(); std::size_t count = 0; - std::size_t percent = 0; + std::size_t per_ten_thousand = 0; if (out) { (*out) << std::endl << "0 %\r"; + //set mode for converting numbers to two decimal places + (*out) << std::fixed << std::setprecision(2); + out->flush(); } for (auto& elem : m_list) { - elem.fillwithbytes(type, lasttype); - if (out && (100*(++count))/size != percent) { - percent = (100*(count))/size; - (*out) << percent << " %\r"; + if (out && (10000*(++count))/size != per_ten_thousand) { + per_ten_thousand = (10000*(count))/size; + (*out) << (static_cast (per_ten_thousand)/100.0) << " %\r"; out->flush(); } + elem.fillwithbytes(type, lasttype); if (nsecsleep > 0) { std::this_thread::sleep_for(duration); }