robfrawley · robfrawley · Jan 6, 2020 · Jan 21, 2020
diff --git a/Rdutil.cc b/Rdutil.cc
@@ -14,6 +14,7 @@
 #include <cstring>
 #include <fstream>  //for file writing
 #include <iostream> //for std::cerr
+#include <iomanip> //for number output
 #include <ostream>  //for output
 #include <string>   //for easier passing of string arguments
 #include <thread>   //sleep
@@ -538,18 +539,35 @@ Rdutil::saveablespace(std::ostream& out) const
 int
 Rdutil::fillwithbytes(enum Fileinfo::readtobuffermode type,
                       enum Fileinfo::readtobuffermode lasttype,
-                      const long nsecsleep)
+                      const long nsecsleep,
+                      std::ostream* out)
 {
   // first sort on inode (to read efficiently from the hard drive)
   sortOnDeviceAndInode();
 
   const auto duration = std::chrono::nanoseconds{ nsecsleep };
-
+  const auto size = m_list.size();
+  std::size_t count = 0;
+  std::size_t per_ten_thousand = 0;
+  if (out) {
+    (*out) << std::endl << "0 %\r";
+    //set mode for converting numbers to two decimal places
+    (*out) << std::fixed << std::setprecision(2);
+    out->flush();
+  }
   for (auto& elem : m_list) {
+    if (out && (10000*(++count))/size != per_ten_thousand) {
+      per_ten_thousand = (10000*(count))/size;
+      (*out) << (static_cast<double> (per_ten_thousand)/100.0) << " %\r";
+      out->flush();
+    }
     elem.fillwithbytes(type, lasttype);
     if (nsecsleep > 0) {
       std::this_thread::sleep_for(duration);
     }
   }
+  if (out) {
+    (*out) << std::endl;
+  }
   return 0;
 }
diff --git a/Rdutil.hh b/Rdutil.hh
@@ -85,10 +85,13 @@ public:
   // and file is read anyway.
   // if there is trouble with too much disk reading, sleeping for nsecsleep
   // nanoseconds can be made between each file.
+  // if out is provided, the progress in % will be written to it (overwriting
+  // each line with \r )
   int fillwithbytes(enum Fileinfo::readtobuffermode type,
                     enum Fileinfo::readtobuffermode lasttype =
                       Fileinfo::readtobuffermode::NOT_DEFINED,
-                    long nsecsleep = 0);
+                    long nsecsleep = 0,
+                    std::ostream* out = nullptr);
 
   /// make symlinks of duplicates.
   std::size_t makesymlinks(bool dryrun) const;

diff --git a/rdfind.1 b/rdfind.1
@@ -7,7 +7,7 @@
 .SH NAME
 rdfind \- finds duplicate files
 .SH SYNOPSIS
-.B rdfind [ options ] 
+.B rdfind [ options ]
 .I directory1 | file1
 .B [
 .I directory2 | file2
@@ -16,30 +16,30 @@ rdfind \- finds duplicate files
 .B rdfind
 finds duplicate files across and/or within several directories. It calculates
 checksum only if necessary.
-rdfind runs in O(Nlog(N)) time with N being the number of files. 
+rdfind runs in O(Nlog(N)) time with N being the number of files.
 
 If two (or more) equal files are found, the program decides which of
 them is the original and the rest are considered duplicates. This
 is done by ranking the files to each other and deciding which has the
 highest rank. See section RANKING for details.
 
 By default, no action is taken besides creating a file with the
-detected files and showing the possible amount of saved space. 
+detected files and showing the possible amount of saved space.
 
 If you need better control over the ranking than given, you can use
 some preprocessor which sorts the file names in desired order and then
 run the program using xargs. See examples below for how to use find
 and xargs in conjunction with rdfind.
 
-To include files or directories that have names starting with -, use 
+To include files or directories that have names starting with -, use
 rdfind ./- to not confuse them with options.
 
 .SH RANKING
 Given two or more equal files, the one with the highest rank is
 selected to be the original and the rest are duplicates. The rules of
 ranking are given below, where the rules are executed from start until
 an original has been found. Given two files A and B which have equal
-size and content, the ranking is as follows: 
+size and content, the ranking is as follows:
 
 If A was found while scanning an input argument earlier than than B, A
 is higher ranked.
@@ -109,12 +109,15 @@ General options:
 .BR \-sleep " " \fIX\fRms
 Sleeps X milliseconds between reading each file, to reduce
 load. Default is 0 (no sleep). Note that only a few values are
-supported at present: 0,1-5,10,25,50,100 milliseconds. 
+supported at present: 0,1-5,10,25,50,100 milliseconds.
 .TP
 .BR \-n ", " \-dryrun " " \fItrue\fR|\fIfalse\fR
 Displays what should have been done, don't actually delete or link
 anything. Default is false.
 .TP
+.BR \-progress " " \fItrue\fR|\fIfalse\fR
+Show progress during elimination. Defaults to false.
+.TP
 .BR \-h ", " \-help ", " \-\-help
 Displays a brief help message.
 .TP
@@ -145,7 +148,7 @@ DUPTYPE_WITHIN_SAME_TREE files in the same tree (found when processing
 the directory in the same input argument as the original)
 
 DUPTYPE_OUTSIDE_TREE the file is found during processing another input
-argument than the original. 
+argument than the original.
 .SH ENVIRONMENT
 .SH DIAGNOSTICS
 .SH EXIT VALUES

diff --git a/rdfind.cc b/rdfind.cc
@@ -71,6 +71,7 @@ usage()
     << " -makeresultsfile  (true)| false  makes a results file\n"
     << " -outputname  name  sets the results file name to \"name\" "
        "(default results.txt)\n"
+    << " -progress          true |(false) show progress\n"
     << " -deleteduplicates  true |(false) delete duplicate files\n"
     << " -sleep              Xms          sleep for X milliseconds between "
        "file reads.\n"
@@ -108,6 +109,7 @@ struct Options
   bool deterministic = true; // be independent of filesystem order
   long nsecsleep = 0; // number of nanoseconds to sleep between each file read.
   std::string resultsfile = "results.txt"; // results file name.
+  bool show_progress = false;  //show progress
 };
 
 Options
@@ -134,6 +136,8 @@ parseOptions(Parser& parser)
       o.makehardlinks = parser.get_parsed_bool();
     } else if (parser.try_parse_bool("-makeresultsfile")) {
       o.makeresultsfile = parser.get_parsed_bool();
+    } else if (parser.try_parse_bool("-progress")) {
+      o.show_progress = parser.get_parsed_bool();
     } else if (parser.try_parse_string("-outputname")) {
       o.resultsfile = parser.get_parsed_string();
     } else if (parser.try_parse_bool("-ignoreempty")) {
@@ -373,7 +377,7 @@ main(int narg, const char* argv[])
               << it->second << ": " << std::flush;
 
     // read bytes (destroys the sorting, for disk reading efficiency)
-    gswd.fillwithbytes(it[0].first, it[-1].first, o.nsecsleep);
+    gswd.fillwithbytes(it[0].first, it[-1].first, o.nsecsleep, o.show_progress ? &std::cout : nullptr);
 
     // remove non-duplicates
     std::cout << "removed " << gswd.removeUniqSizeAndBuffer()