From f57b5f26394b4967c5da9e25d0196e121400b7b5 Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Sun, 9 Feb 2025 05:32:53 +0300
Subject: [PATCH 01/12] Move pretty_progress() of sampling steps

... from sample() to sample_k_diffusion()
---
 denoiser.hpp | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/denoiser.hpp b/denoiser.hpp
index 975699d22..ef61c883e 100644
--- a/denoiser.hpp
+++ b/denoiser.hpp
@@ -468,6 +468,13 @@ struct FluxFlowDenoiser : public Denoiser {
 
 typedef std::function<ggml_tensor*(ggml_tensor*, float, int)> denoise_cb_t;
 
+static inline void show_step(int i0, int im, int64_t* t0) {
+    int64_t t1 = ggml_time_us();
+    pretty_progress(i0 + 1, im, (t1 - (*t0)) / 1000000.f);
+//    LOG_INFO("step %d sampling completed taking %.2fs", i0, (t1 - t0) * 1.0f / 1000000);
+    *t0 = t1;
+}
+
 // k diffusion reverse ODE: dx = (x - D(x;\sigma)) / \sigma dt; \sigma(t) = t
 static void sample_k_diffusion(sample_method_t method,
                                denoise_cb_t model,
@@ -476,6 +483,8 @@ static void sample_k_diffusion(sample_method_t method,
                                std::vector<float> sigmas,
                                std::shared_ptr<RNG> rng) {
     size_t steps = sigmas.size() - 1;
+    int64_t t0 = ggml_time_us();
+
     // sample_euler_ancestral
     switch (method) {
         case EULER_A: {
@@ -529,6 +538,7 @@ static void sample_k_diffusion(sample_method_t method,
                         }
                     }
                 }
+                show_step(i, steps, &t0);
             }
         } break;
         case EULER:  // Implemented without any sigma churn
@@ -562,6 +572,7 @@ static void sample_k_diffusion(sample_method_t method,
                         vec_x[j] = vec_x[j] + vec_d[j] * dt;
                     }
                 }
+                show_step(i, steps, &t0);
             }
         } break;
         case HEUN: {
@@ -612,6 +623,7 @@ static void sample_k_diffusion(sample_method_t method,
                         vec_x[j] = vec_x[j] + vec_d[j] * dt;
                     }
                 }
+                show_step(i, steps, &t0);
             }
         } break;
         case DPM2: {
@@ -663,6 +675,7 @@ static void sample_k_diffusion(sample_method_t method,
                         vec_x[j] = vec_x[j] + d2 * dt_2;
                     }
                 }
+                show_step(i, steps, &t0);
             }
 
         } break;
@@ -737,6 +750,7 @@ static void sample_k_diffusion(sample_method_t method,
                         }
                     }
                 }
+                show_step(i, steps, &t0);
             }
         } break;
         case DPMPP2M:  // DPM++ (2M) from Karras et al (2022)
@@ -776,6 +790,7 @@ static void sample_k_diffusion(sample_method_t method,
                 for (int j = 0; j < ggml_nelements(x); j++) {
                     vec_old_denoised[j] = vec_denoised[j];
                 }
+                show_step(i, steps, &t0);
             }
         } break;
         case DPMPP2Mv2:  // Modified DPM++ (2M) from https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457
@@ -819,6 +834,7 @@ static void sample_k_diffusion(sample_method_t method,
                 for (int j = 0; j < ggml_nelements(x); j++) {
                     vec_old_denoised[j] = vec_denoised[j];
                 }
+                show_step(i, steps, &t0);
             }
         } break;
         case IPNDM:  // iPNDM sampler from https://github.com/zju-pi/diff-sampler/tree/main/diff-solvers-main
@@ -894,6 +910,7 @@ static void sample_k_diffusion(sample_method_t method,
                 } else {
                     buffer_model.push_back(d_cur);
                 }
+                show_step(i, steps, &t0);
             }
         } break;
         case IPNDM_V:  // iPNDM_v sampler from https://github.com/zju-pi/diff-sampler/tree/main/diff-solvers-main
@@ -968,6 +985,7 @@ static void sample_k_diffusion(sample_method_t method,
 
                 // Prepare the next d tensor
                 d_cur = ggml_dup_tensor(work_ctx, x_next);
+                show_step(i, steps, &t0);
             }
         } break;
         case LCM:  // Latent Consistency Models

From fc3b4ee72d9e4d303a3757ccad34b779f0a4d8bd Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Sun, 9 Feb 2025 05:41:15 +0300
Subject: [PATCH 02/12] Forgot LCM sampler

---
 denoiser.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/denoiser.hpp b/denoiser.hpp
index ef61c883e..850785c76 100644
--- a/denoiser.hpp
+++ b/denoiser.hpp
@@ -1021,6 +1021,7 @@ static void sample_k_diffusion(sample_method_t method,
                         }
                     }
                 }
+                show_step(i, steps, &t0);
             }
         } break;
 

From a0cc648e35aad7f4d3665b98b954c28253442775 Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Sun, 9 Feb 2025 05:47:04 +0300
Subject: [PATCH 03/12] Remove pretty_progress() from sample()

---
 stable-diffusion.cpp | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index cea12e6f2..5fc3ffa6a 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -847,7 +847,6 @@ class StableDiffusionGGML {
             if (step == 1) {
                 pretty_progress(0, (int)steps, 0);
             }
-            int64_t t0 = ggml_time_us();
 
             std::vector<float> scaling = denoiser->get_scalings(sigma);
             GGML_ASSERT(scaling.size() == 3);
@@ -966,11 +965,6 @@ class StableDiffusionGGML {
                 // denoised = (v * c_out + input * c_skip) or (input + eps * c_out)
                 vec_denoised[i] = latent_result * c_out + vec_input[i] * c_skip;
             }
-            int64_t t1 = ggml_time_us();
-            if (step > 0) {
-                pretty_progress(step, (int)steps, (t1 - t0) / 1000000.f);
-                // LOG_INFO("step %d sampling completed taking %.2fs", step, (t1 - t0) * 1.0f / 1000000);
-            }
             if (noise_mask != nullptr) {
                 for (int64_t x = 0; x < denoised->ne[0]; x++) {
                     for (int64_t y = 0; y < denoised->ne[1]; y++) {

From 41ae63f9a33117478a19adad9a1dc1e8e88a8de2 Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Sun, 9 Feb 2025 11:37:41 +0300
Subject: [PATCH 04/12] Dereference t0 also for logger

---
 denoiser.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/denoiser.hpp b/denoiser.hpp
index 850785c76..02f64fbc7 100644
--- a/denoiser.hpp
+++ b/denoiser.hpp
@@ -470,8 +470,8 @@ typedef std::function<ggml_tensor*(ggml_tensor*, float, int)> denoise_cb_t;
 
 static inline void show_step(int i0, int im, int64_t* t0) {
     int64_t t1 = ggml_time_us();
-    pretty_progress(i0 + 1, im, (t1 - (*t0)) / 1000000.f);
-//    LOG_INFO("step %d sampling completed taking %.2fs", i0, (t1 - t0) * 1.0f / 1000000);
+    pretty_progress(i0 + 1, im, (t1 - *t0) / 1000000.f);
+//    LOG_INFO("step %d sampling completed taking %.2fs", i0 + 1, (t1 - *t0) * 1.0f / 1000000);
     *t0 = t1;
 }
 

From 50f355ddddb726de280cb4ebde363a8f5e8ca6b7 Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Wed, 12 Feb 2025 17:22:03 +0300
Subject: [PATCH 05/12] Add pretty_progress() with time remainder

---
 util.cpp | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/util.cpp b/util.cpp
index 01c01200e..2bc00decd 100644
--- a/util.cpp
+++ b/util.cpp
@@ -357,6 +357,39 @@ void pretty_progress(int step, int steps, float time) {
     }
 }
 
+#ifdef SD_SHOW_REMAINING_TIME
+void pretty_progress(int step, int steps, float time, float left) {
+    if (sd_progress_cb) {
+        sd_progress_cb(step, steps, time, sd_progress_cb_data);
+        return;
+    }
+    if (step == 0) {
+        return;
+    }
+    std::string progress = "  |";
+    int max_progress     = 50;
+    int32_t current      = (int32_t)(step * 1.f * max_progress / steps);
+    for (int i = 0; i < 50; i++) {
+        if (i > current) {
+            progress += " ";
+        } else if (i == current && i != max_progress - 1) {
+            progress += ">";
+        } else {
+            progress += "=";
+        }
+    }
+    progress += "|";
+    printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s\033[K",
+           progress.c_str(), step, steps,
+           time > 1.0f || time == 0 ? time : (1.0f / time));
+    printf(", %.0fm %.2fs left  ", left / 60, fmod(left, 60));
+    fflush(stdout);  // for linux
+    if (step == steps) {
+        printf("\n");
+    }
+}
+#endif  // SD_SHOW_REMAINING_TIME
+
 std::string ltrim(const std::string& s) {
     auto it = std::find_if(s.begin(), s.end(), [](int ch) {
         return !std::isspace(ch);
@@ -686,4 +719,4 @@ std::vector<std::pair<std::string, float>> parse_prompt_attention(const std::str
     }
 
     return res;
-}
\ No newline at end of file
+}

From 6d4c5caf39930a1d169ecd69dacf66171ec2e24b Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Wed, 12 Feb 2025 17:24:15 +0300
Subject: [PATCH 06/12] Add pretty_progress(time) header

---
 util.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/util.h b/util.h
index 14fa812e5..344159e0b 100644
--- a/util.h
+++ b/util.h
@@ -47,6 +47,9 @@ sd_image_f32_t clip_preprocess(sd_image_f32_t image, int size);
 std::string path_join(const std::string& p1, const std::string& p2);
 std::vector<std::string> splitString(const std::string& str, char delimiter);
 void pretty_progress(int step, int steps, float time);
+#ifdef SD_SHOW_REMAINING_TIME
+void pretty_progress(int step, int steps, float time, float left);
+#endif  // SD_SHOW_REMAINING_TIME
 
 void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...);
 

From 7a9a6fab19a9667437ef39ad1ef0536b3176a1ea Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Wed, 12 Feb 2025 17:27:12 +0300
Subject: [PATCH 07/12] Use pretty_progress(time_remainder)

---
 denoiser.hpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/denoiser.hpp b/denoiser.hpp
index 02f64fbc7..9bdd46111 100644
--- a/denoiser.hpp
+++ b/denoiser.hpp
@@ -469,10 +469,17 @@ struct FluxFlowDenoiser : public Denoiser {
 typedef std::function<ggml_tensor*(ggml_tensor*, float, int)> denoise_cb_t;
 
 static inline void show_step(int i0, int im, int64_t* t0) {
+#ifdef SD_SHOW_REMAINING_TIME
+    int i = i0 + 1;
+    float t1 = (ggml_time_us() - *t0) / 1000000.f / i;
+    pretty_progress(i, im, t1, t1 * (im - i));
+//    LOG_INFO("step %d sampling completed taking %.2fs", i, (t1 - *t0) * 1.0f / 1000000 / i);
+#else  // SD_SHOW_REMAINING_TIME
     int64_t t1 = ggml_time_us();
     pretty_progress(i0 + 1, im, (t1 - *t0) / 1000000.f);
 //    LOG_INFO("step %d sampling completed taking %.2fs", i0 + 1, (t1 - *t0) * 1.0f / 1000000);
     *t0 = t1;
+#endif  // SD_SHOW_REMAINING_TIME
 }
 
 // k diffusion reverse ODE: dx = (x - D(x;\sigma)) / \sigma dt; \sigma(t) = t

From 2f3ef394e8fc35a0e2c09a1a55c429b0240c6e1d Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Wed, 12 Feb 2025 17:30:12 +0300
Subject: [PATCH 08/12] Config option for time remainder

---
 CMakeLists.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7b7cc6c47..b9580c6aa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -32,6 +32,7 @@ option(SD_SYCL                       "sd: sycl backend" OFF)
 option(SD_MUSA                       "sd: musa backend" OFF)
 option(SD_FAST_SOFTMAX               "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
 option(SD_BUILD_SHARED_LIBS          "sd: build shared libs" OFF)
+option(SD_SHOW_REMAINING_TIME        "sd: show remaining and average sampling time" OFF)
 #option(SD_BUILD_SERVER               "sd: build server example"                           ON)
 
 if(SD_CUDA)
@@ -93,6 +94,11 @@ else()
     add_library(${SD_LIB} STATIC ${SD_LIB_SOURCES})
 endif()
 
+if (SD_SHOW_REMAINING_TIME)
+    message("-- Display remaining and average sampling time")
+    add_definitions(-DSD_SHOW_REMAINING_TIME)
+endif ()
+
 if(SD_SYCL)
     message("-- Use SYCL as backend stable-diffusion")
     set(GGML_SYCL ON)

From afab8fec8348e949fe96c475057bc6fda7a13ffc Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Fri, 14 Feb 2025 14:57:50 +0300
Subject: [PATCH 09/12] "%.0f" is rounded to nearest, this is wrong

A floor() is needed.
Also, clean the time after finish of sampling.
---
 util.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/util.cpp b/util.cpp
index 2bc00decd..17e70206e 100644
--- a/util.cpp
+++ b/util.cpp
@@ -382,7 +382,13 @@ void pretty_progress(int step, int steps, float time, float left) {
     printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s\033[K",
            progress.c_str(), step, steps,
            time > 1.0f || time == 0 ? time : (1.0f / time));
-    printf(", %.0fm %.2fs left  ", left / 60, fmod(left, 60));
+    if (left >= 60.0f) {
+        printf(", %.0fm %.2fs left         \b\b\b\b\b\b\b\b\b", floor(left / 60), fmod(left, 60));
+    } else if (left > 0) {
+        printf(", %.2fs left               \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", left);
+    } else {
+        printf("                           \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", left);
+    }
     fflush(stdout);  // for linux
     if (step == steps) {
         printf("\n");

From 509beaeae1bd0bc340754b927e02e6b90cb436de Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Fri, 14 Feb 2025 15:02:56 +0300
Subject: [PATCH 10/12] Needless argument

---
 util.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util.cpp b/util.cpp
index 17e70206e..6d3f20a6a 100644
--- a/util.cpp
+++ b/util.cpp
@@ -387,7 +387,7 @@ void pretty_progress(int step, int steps, float time, float left) {
     } else if (left > 0) {
         printf(", %.2fs left               \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", left);
     } else {
-        printf("                           \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", left);
+        printf("                           \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b");
     }
     fflush(stdout);  // for linux
     if (step == steps) {

From 55752634edb899096b2399446aa2dc04c0f4ccf3 Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Tue, 18 Feb 2025 13:57:04 +0300
Subject: [PATCH 11/12] "1m 60s left" is also wrong

As it actually is 1m 59.997s.
---
 util.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/util.cpp b/util.cpp
index 6d3f20a6a..8f421c063 100644
--- a/util.cpp
+++ b/util.cpp
@@ -383,7 +383,11 @@ void pretty_progress(int step, int steps, float time, float left) {
            progress.c_str(), step, steps,
            time > 1.0f || time == 0 ? time : (1.0f / time));
     if (left >= 60.0f) {
-        printf(", %.0fm %.2fs left         \b\b\b\b\b\b\b\b\b", floor(left / 60), fmod(left, 60));
+        /* same number of spaces and backspaces */
+        printf(", %.0fm %.2fs left         \b\b\b\b\b\b\b\b\b",
+               /* min appears faster than mul+div for me, 19.31s vs 19.34s average */
+               floor(left / 60.0f), std::min(59.99f, fmod(left, 60.0f)));
+               //floor(left / 60.0f), floor(fmod(left, 60.0f) * 100.0f) / 100.0f);
     } else if (left > 0) {
         printf(", %.2fs left               \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", left);
     } else {

From deaeaf95cc9375ec11f61a40273e4986569db742 Mon Sep 17 00:00:00 2001
From: vmobilis <75476228+vmobilis@users.noreply.github.com>
Date: Wed, 26 Feb 2025 00:33:55 +0300
Subject: [PATCH 12/12] Sync with @yslai commit

---
 denoiser.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/denoiser.hpp b/denoiser.hpp
index 64b83df44..263b574e9 100644
--- a/denoiser.hpp
+++ b/denoiser.hpp
@@ -1224,6 +1224,7 @@ static void sample_k_diffusion(sample_method_t method,
                 // needs to be prescaled again, since k-diffusion's
                 // model() differes from the bare U-net F_theta by the
                 // factor c_in.
+                show_step(i, steps, &t0);
             }
         } break;
         case TCD:  // Strategic Stochastic Sampling (Algorithm 4) in
@@ -1398,6 +1399,7 @@ static void sample_k_diffusion(sample_method_t method,
                             vec_noise[j];
                     }
                 }
+                show_step(i, steps, &t0);
             }
         } break;