From a28a1c658978082b6e3f08d46e8396979fb3fad4 Mon Sep 17 00:00:00 2001
From: Thomas Oster <thomas.oster@upstart-it.de>
Date: Tue, 7 Jan 2020 00:15:52 +0100
Subject: [PATCH 1/2] Added a -progress option to display the progress during
 elimination

---
 Rdutil.cc | 18 ++++++++++++++++--
 Rdutil.hh |  5 ++++-
 rdfind.1  | 17 ++++++++++-------
 rdfind.cc |  6 +++++-
 4 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/Rdutil.cc b/Rdutil.cc
index f098d2c..891b320 100644
--- a/Rdutil.cc
+++ b/Rdutil.cc
@@ -538,18 +538,32 @@ Rdutil::saveablespace(std::ostream& out) const
 int
 Rdutil::fillwithbytes(enum Fileinfo::readtobuffermode type,
                       enum Fileinfo::readtobuffermode lasttype,
-                      const long nsecsleep)
+                      const long nsecsleep,
+                      std::ostream* out)
 {
   // first sort on inode (to read efficiently from the hard drive)
   sortOnDeviceAndInode();
 
   const auto duration = std::chrono::nanoseconds{ nsecsleep };
-
+  const auto size = m_list.size();
+  std::size_t count = 0;
+  std::size_t percent = 0;
+  if (out) {
+    (*out) << std::endl << "0 %\r";
+  }
   for (auto& elem : m_list) {
     elem.fillwithbytes(type, lasttype);
+    if (out && (100*(++count))/size != percent) {
+      percent = (100*(count))/size;
+      (*out) << percent << " %\r";
+      out->flush();
+    }
     if (nsecsleep > 0) {
       std::this_thread::sleep_for(duration);
     }
   }
+  if (out) {
+    (*out) << std::endl;
+  }
   return 0;
 }
diff --git a/Rdutil.hh b/Rdutil.hh
index b39e2e9..fb94928 100644
--- a/Rdutil.hh
+++ b/Rdutil.hh
@@ -85,10 +85,13 @@ public:
   // and file is read anyway.
   // if there is trouble with too much disk reading, sleeping for nsecsleep
   // nanoseconds can be made between each file.
+  // if out is provided, the progress in % will be written to it (overwriting
+  // each line with \r )
   int fillwithbytes(enum Fileinfo::readtobuffermode type,
                     enum Fileinfo::readtobuffermode lasttype =
                       Fileinfo::readtobuffermode::NOT_DEFINED,
-                    long nsecsleep = 0);
+                    long nsecsleep = 0,
+                    std::ostream* out = nullptr);
 
   /// make symlinks of duplicates.
   std::size_t makesymlinks(bool dryrun) const;
diff --git a/rdfind.1 b/rdfind.1
index d390370..47b1da8 100644
--- a/rdfind.1
+++ b/rdfind.1
@@ -7,7 +7,7 @@
 .SH NAME
 rdfind \- finds duplicate files
 .SH SYNOPSIS
-.B rdfind [ options ] 
+.B rdfind [ options ]
 .I directory1 | file1
 .B [
 .I directory2 | file2
@@ -16,7 +16,7 @@ rdfind \- finds duplicate files
 .B rdfind
 finds duplicate files across and/or within several directories. It calculates
 checksum only if necessary.
-rdfind runs in O(Nlog(N)) time with N being the number of files. 
+rdfind runs in O(Nlog(N)) time with N being the number of files.
 
 If two (or more) equal files are found, the program decides which of
 them is the original and the rest are considered duplicates. This
@@ -24,14 +24,14 @@ is done by ranking the files to each other and deciding which has the
 highest rank. See section RANKING for details.
 
 By default, no action is taken besides creating a file with the
-detected files and showing the possible amount of saved space. 
+detected files and showing the possible amount of saved space.
 
 If you need better control over the ranking than given, you can use
 some preprocessor which sorts the file names in desired order and then
 run the program using xargs. See examples below for how to use find
 and xargs in conjunction with rdfind.
 
-To include files or directories that have names starting with -, use 
+To include files or directories that have names starting with -, use
 rdfind ./- to not confuse them with options.
 
 .SH RANKING
@@ -39,7 +39,7 @@ Given two or more equal files, the one with the highest rank is
 selected to be the original and the rest are duplicates. The rules of
 ranking are given below, where the rules are executed from start until
 an original has been found. Given two files A and B which have equal
-size and content, the ranking is as follows: 
+size and content, the ranking is as follows:
 
 If A was found while scanning an input argument earlier than than B, A
 is higher ranked.
@@ -109,12 +109,15 @@ General options:
 .BR \-sleep " " \fIX\fRms
 Sleeps X milliseconds between reading each file, to reduce
 load. Default is 0 (no sleep). Note that only a few values are
-supported at present: 0,1-5,10,25,50,100 milliseconds. 
+supported at present: 0,1-5,10,25,50,100 milliseconds.
 .TP
 .BR \-n ", " \-dryrun " " \fItrue\fR|\fIfalse\fR
 Displays what should have been done, don't actually delete or link
 anything. Default is false.
 .TP
+.BR \-progress " " \fItrue\fR|\fIfalse\fR
+Show progress during elimination. Defaults to false.
+.TP
 .BR \-h ", " \-help ", " \-\-help
 Displays a brief help message.
 .TP
@@ -145,7 +148,7 @@ DUPTYPE_WITHIN_SAME_TREE files in the same tree (found when processing
 the directory in the same input argument as the original)
 
 DUPTYPE_OUTSIDE_TREE the file is found during processing another input
-argument than the original. 
+argument than the original.
 .SH ENVIRONMENT
 .SH DIAGNOSTICS
 .SH EXIT VALUES
diff --git a/rdfind.cc b/rdfind.cc
index facdda7..fa1fdff 100644
--- a/rdfind.cc
+++ b/rdfind.cc
@@ -71,6 +71,7 @@ usage()
     << " -makeresultsfile  (true)| false  makes a results file\n"
     << " -outputname  name  sets the results file name to \"name\" "
        "(default results.txt)\n"
+    << " -progress          true |(false) show progress\n"
     << " -deleteduplicates  true |(false) delete duplicate files\n"
     << " -sleep              Xms          sleep for X milliseconds between "
        "file reads.\n"
@@ -108,6 +109,7 @@ struct Options
   bool deterministic = true; // be independent of filesystem order
   long nsecsleep = 0; // number of nanoseconds to sleep between each file read.
   std::string resultsfile = "results.txt"; // results file name.
+  bool show_progress = false;  //show progress
 };
 
 Options
@@ -134,6 +136,8 @@ parseOptions(Parser& parser)
       o.makehardlinks = parser.get_parsed_bool();
     } else if (parser.try_parse_bool("-makeresultsfile")) {
       o.makeresultsfile = parser.get_parsed_bool();
+    } else if (parser.try_parse_bool("-progress")) {
+      o.show_progress = parser.get_parsed_bool();
     } else if (parser.try_parse_string("-outputname")) {
       o.resultsfile = parser.get_parsed_string();
     } else if (parser.try_parse_bool("-ignoreempty")) {
@@ -373,7 +377,7 @@ main(int narg, const char* argv[])
               << it->second << ": " << std::flush;
 
     // read bytes (destroys the sorting, for disk reading efficiency)
-    gswd.fillwithbytes(it[0].first, it[-1].first, o.nsecsleep);
+    gswd.fillwithbytes(it[0].first, it[-1].first, o.nsecsleep, o.show_progress ? &std::cout : nullptr);
 
     // remove non-duplicates
     std::cout << "removed " << gswd.removeUniqSizeAndBuffer()

From 92d3b0d3851a49c61ccda67be3b11cc965f30bf9 Mon Sep 17 00:00:00 2001
From: Thomas Oster <thomas.oster@upstart-it.de>
Date: Tue, 21 Jan 2020 23:32:42 +0100
Subject: [PATCH 2/2] Enhancement: Output progress in percent with two decimal
 places and flush after outputting 0%

---
 Rdutil.cc | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/Rdutil.cc b/Rdutil.cc
index 891b320..d8baedb 100644
--- a/Rdutil.cc
+++ b/Rdutil.cc
@@ -14,6 +14,7 @@
 #include <cstring>
 #include <fstream>  //for file writing
 #include <iostream> //for std::cerr
+#include <iomanip> //for number output
 #include <ostream>  //for output
 #include <string>   //for easier passing of string arguments
 #include <thread>   //sleep
@@ -547,17 +548,20 @@ Rdutil::fillwithbytes(enum Fileinfo::readtobuffermode type,
   const auto duration = std::chrono::nanoseconds{ nsecsleep };
   const auto size = m_list.size();
   std::size_t count = 0;
-  std::size_t percent = 0;
+  std::size_t per_ten_thousand = 0;
   if (out) {
     (*out) << std::endl << "0 %\r";
+    //set mode for converting numbers to two decimal places
+    (*out) << std::fixed << std::setprecision(2);
+    out->flush();
   }
   for (auto& elem : m_list) {
-    elem.fillwithbytes(type, lasttype);
-    if (out && (100*(++count))/size != percent) {
-      percent = (100*(count))/size;
-      (*out) << percent << " %\r";
+    if (out && (10000*(++count))/size != per_ten_thousand) {
+      per_ten_thousand = (10000*(count))/size;
+      (*out) << (static_cast<double> (per_ten_thousand)/100.0) << " %\r";
       out->flush();
     }
+    elem.fillwithbytes(type, lasttype);
     if (nsecsleep > 0) {
       std::this_thread::sleep_for(duration);
     }