From f57b5f26394b4967c5da9e25d0196e121400b7b5 Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Sun, 9 Feb 2025 05:32:53 +0300 Subject: [PATCH 01/12] Move pretty_progress() of sampling steps ... from sample() to sample_k_diffusion() --- denoiser.hpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/denoiser.hpp b/denoiser.hpp index 975699d22..ef61c883e 100644 --- a/denoiser.hpp +++ b/denoiser.hpp @@ -468,6 +468,13 @@ struct FluxFlowDenoiser : public Denoiser { typedef std::function<ggml_tensor*(ggml_tensor*, float, int)> denoise_cb_t; +static inline void show_step(int i0, int im, int64_t* t0) { + int64_t t1 = ggml_time_us(); + pretty_progress(i0 + 1, im, (t1 - (*t0)) / 1000000.f); +// LOG_INFO("step %d sampling completed taking %.2fs", i0, (t1 - t0) * 1.0f / 1000000); + *t0 = t1; +} + // k diffusion reverse ODE: dx = (x - D(x;\sigma)) / \sigma dt; \sigma(t) = t static void sample_k_diffusion(sample_method_t method, denoise_cb_t model, @@ -476,6 +483,8 @@ static void sample_k_diffusion(sample_method_t method, std::vector<float> sigmas, std::shared_ptr<RNG> rng) { size_t steps = sigmas.size() - 1; + int64_t t0 = ggml_time_us(); + // sample_euler_ancestral switch (method) { case EULER_A: { @@ -529,6 +538,7 @@ static void sample_k_diffusion(sample_method_t method, } } } + show_step(i, steps, &t0); } } break; case EULER: // Implemented without any sigma churn @@ -562,6 +572,7 @@ static void sample_k_diffusion(sample_method_t method, vec_x[j] = vec_x[j] + vec_d[j] * dt; } } + show_step(i, steps, &t0); } } break; case HEUN: { @@ -612,6 +623,7 @@ static void sample_k_diffusion(sample_method_t method, vec_x[j] = vec_x[j] + vec_d[j] * dt; } } + show_step(i, steps, &t0); } } break; case DPM2: { @@ -663,6 +675,7 @@ static void sample_k_diffusion(sample_method_t method, vec_x[j] = vec_x[j] + d2 * dt_2; } } + show_step(i, steps, &t0); } } break; @@ -737,6 +750,7 @@ static void sample_k_diffusion(sample_method_t method, } } } + show_step(i, steps, &t0); } } break; case DPMPP2M: // DPM++ (2M) from Karras et al (2022) @@ -776,6 +790,7 @@ static void sample_k_diffusion(sample_method_t method, for (int j = 0; j < ggml_nelements(x); j++) { vec_old_denoised[j] = vec_denoised[j]; } + show_step(i, steps, &t0); } } break; case DPMPP2Mv2: // Modified DPM++ (2M) from https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457 @@ -819,6 +834,7 @@ static void sample_k_diffusion(sample_method_t method, for (int j = 0; j < ggml_nelements(x); j++) { vec_old_denoised[j] = vec_denoised[j]; } + show_step(i, steps, &t0); } } break; case IPNDM: // iPNDM sampler from https://github.com/zju-pi/diff-sampler/tree/main/diff-solvers-main @@ -894,6 +910,7 @@ static void sample_k_diffusion(sample_method_t method, } else { buffer_model.push_back(d_cur); } + show_step(i, steps, &t0); } } break; case IPNDM_V: // iPNDM_v sampler from https://github.com/zju-pi/diff-sampler/tree/main/diff-solvers-main @@ -968,6 +985,7 @@ static void sample_k_diffusion(sample_method_t method, // Prepare the next d tensor d_cur = ggml_dup_tensor(work_ctx, x_next); + show_step(i, steps, &t0); } } break; case LCM: // Latent Consistency Models From fc3b4ee72d9e4d303a3757ccad34b779f0a4d8bd Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Sun, 9 Feb 2025 05:41:15 +0300 Subject: [PATCH 02/12] Forgot LCM sampler --- denoiser.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/denoiser.hpp b/denoiser.hpp index ef61c883e..850785c76 100644 --- a/denoiser.hpp +++ b/denoiser.hpp @@ -1021,6 +1021,7 @@ static void sample_k_diffusion(sample_method_t method, } } } + show_step(i, steps, &t0); } } break; From a0cc648e35aad7f4d3665b98b954c28253442775 Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Sun, 9 Feb 2025 05:47:04 +0300 Subject: [PATCH 03/12] Remove pretty_progress() from sample() --- stable-diffusion.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index cea12e6f2..5fc3ffa6a 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -847,7 +847,6 @@ class StableDiffusionGGML { if (step == 1) { pretty_progress(0, (int)steps, 0); } - int64_t t0 = ggml_time_us(); std::vector<float> scaling = denoiser->get_scalings(sigma); GGML_ASSERT(scaling.size() == 3); @@ -966,11 +965,6 @@ class StableDiffusionGGML { // denoised = (v * c_out + input * c_skip) or (input + eps * c_out) vec_denoised[i] = latent_result * c_out + vec_input[i] * c_skip; } - int64_t t1 = ggml_time_us(); - if (step > 0) { - pretty_progress(step, (int)steps, (t1 - t0) / 1000000.f); - // LOG_INFO("step %d sampling completed taking %.2fs", step, (t1 - t0) * 1.0f / 1000000); - } if (noise_mask != nullptr) { for (int64_t x = 0; x < denoised->ne[0]; x++) { for (int64_t y = 0; y < denoised->ne[1]; y++) { From 41ae63f9a33117478a19adad9a1dc1e8e88a8de2 Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Sun, 9 Feb 2025 11:37:41 +0300 Subject: [PATCH 04/12] Dereference t0 also for logger --- denoiser.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/denoiser.hpp b/denoiser.hpp index 850785c76..02f64fbc7 100644 --- a/denoiser.hpp +++ b/denoiser.hpp @@ -470,8 +470,8 @@ typedef std::function<ggml_tensor*(ggml_tensor*, float, int)> denoise_cb_t; static inline void show_step(int i0, int im, int64_t* t0) { int64_t t1 = ggml_time_us(); - pretty_progress(i0 + 1, im, (t1 - (*t0)) / 1000000.f); -// LOG_INFO("step %d sampling completed taking %.2fs", i0, (t1 - t0) * 1.0f / 1000000); + pretty_progress(i0 + 1, im, (t1 - *t0) / 1000000.f); +// LOG_INFO("step %d sampling completed taking %.2fs", i0 + 1, (t1 - *t0) * 1.0f / 1000000); *t0 = t1; } From 50f355ddddb726de280cb4ebde363a8f5e8ca6b7 Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Wed, 12 Feb 2025 17:22:03 +0300 Subject: [PATCH 05/12] Add pretty_progress() with time remainder --- util.cpp | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/util.cpp b/util.cpp index 01c01200e..2bc00decd 100644 --- a/util.cpp +++ b/util.cpp @@ -357,6 +357,39 @@ void pretty_progress(int step, int steps, float time) { } } +#ifdef SD_SHOW_REMAINING_TIME +void pretty_progress(int step, int steps, float time, float left) { + if (sd_progress_cb) { + sd_progress_cb(step, steps, time, sd_progress_cb_data); + return; + } + if (step == 0) { + return; + } + std::string progress = " |"; + int max_progress = 50; + int32_t current = (int32_t)(step * 1.f * max_progress / steps); + for (int i = 0; i < 50; i++) { + if (i > current) { + progress += " "; + } else if (i == current && i != max_progress - 1) { + progress += ">"; + } else { + progress += "="; + } + } + progress += "|"; + printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s\033[K", + progress.c_str(), step, steps, + time > 1.0f || time == 0 ? time : (1.0f / time)); + printf(", %.0fm %.2fs left ", left / 60, fmod(left, 60)); + fflush(stdout); // for linux + if (step == steps) { + printf("\n"); + } +} +#endif // SD_SHOW_REMAINING_TIME + std::string ltrim(const std::string& s) { auto it = std::find_if(s.begin(), s.end(), [](int ch) { return !std::isspace(ch); @@ -686,4 +719,4 @@ std::vector<std::pair<std::string, float>> parse_prompt_attention(const std::str } return res; -} \ No newline at end of file +} From 6d4c5caf39930a1d169ecd69dacf66171ec2e24b Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Wed, 12 Feb 2025 17:24:15 +0300 Subject: [PATCH 06/12] Add pretty_progress(time) header --- util.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/util.h b/util.h index 14fa812e5..344159e0b 100644 --- a/util.h +++ b/util.h @@ -47,6 +47,9 @@ sd_image_f32_t clip_preprocess(sd_image_f32_t image, int size); std::string path_join(const std::string& p1, const std::string& p2); std::vector<std::string> splitString(const std::string& str, char delimiter); void pretty_progress(int step, int steps, float time); +#ifdef SD_SHOW_REMAINING_TIME +void pretty_progress(int step, int steps, float time, float left); +#endif // SD_SHOW_REMAINING_TIME void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...); From 7a9a6fab19a9667437ef39ad1ef0536b3176a1ea Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Wed, 12 Feb 2025 17:27:12 +0300 Subject: [PATCH 07/12] Use pretty_progress(time_remainder) --- denoiser.hpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/denoiser.hpp b/denoiser.hpp index 02f64fbc7..9bdd46111 100644 --- a/denoiser.hpp +++ b/denoiser.hpp @@ -469,10 +469,17 @@ struct FluxFlowDenoiser : public Denoiser { typedef std::function<ggml_tensor*(ggml_tensor*, float, int)> denoise_cb_t; static inline void show_step(int i0, int im, int64_t* t0) { +#ifdef SD_SHOW_REMAINING_TIME + int i = i0 + 1; + float t1 = (ggml_time_us() - *t0) / 1000000.f / i; + pretty_progress(i, im, t1, t1 * (im - i)); +// LOG_INFO("step %d sampling completed taking %.2fs", i, (t1 - *t0) * 1.0f / 1000000 / i); +#else // SD_SHOW_REMAINING_TIME int64_t t1 = ggml_time_us(); pretty_progress(i0 + 1, im, (t1 - *t0) / 1000000.f); // LOG_INFO("step %d sampling completed taking %.2fs", i0 + 1, (t1 - *t0) * 1.0f / 1000000); *t0 = t1; +#endif // SD_SHOW_REMAINING_TIME } // k diffusion reverse ODE: dx = (x - D(x;\sigma)) / \sigma dt; \sigma(t) = t From 2f3ef394e8fc35a0e2c09a1a55c429b0240c6e1d Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Wed, 12 Feb 2025 17:30:12 +0300 Subject: [PATCH 08/12] Config option for time remainder --- CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b7cc6c47..b9580c6aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,6 +32,7 @@ option(SD_SYCL "sd: sycl backend" OFF) option(SD_MUSA "sd: musa backend" OFF) option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF) option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF) +option(SD_SHOW_REMAINING_TIME "sd: show remaining and average sampling time" OFF) #option(SD_BUILD_SERVER "sd: build server example" ON) if(SD_CUDA) @@ -93,6 +94,11 @@ else() add_library(${SD_LIB} STATIC ${SD_LIB_SOURCES}) endif() +if (SD_SHOW_REMAINING_TIME) + message("-- Display remaining and average sampling time") + add_definitions(-DSD_SHOW_REMAINING_TIME) +endif () + if(SD_SYCL) message("-- Use SYCL as backend stable-diffusion") set(GGML_SYCL ON) From afab8fec8348e949fe96c475057bc6fda7a13ffc Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Fri, 14 Feb 2025 14:57:50 +0300 Subject: [PATCH 09/12] "%.0f" is rounded to nearest, this is wrong A floor() is needed. Also, clean the time after finish of sampling. --- util.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/util.cpp b/util.cpp index 2bc00decd..17e70206e 100644 --- a/util.cpp +++ b/util.cpp @@ -382,7 +382,13 @@ void pretty_progress(int step, int steps, float time, float left) { printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s\033[K", progress.c_str(), step, steps, time > 1.0f || time == 0 ? time : (1.0f / time)); - printf(", %.0fm %.2fs left ", left / 60, fmod(left, 60)); + if (left >= 60.0f) { + printf(", %.0fm %.2fs left \b\b\b\b\b\b\b\b\b", floor(left / 60), fmod(left, 60)); + } else if (left > 0) { + printf(", %.2fs left \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", left); + } else { + printf(" \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", left); + } fflush(stdout); // for linux if (step == steps) { printf("\n"); From 509beaeae1bd0bc340754b927e02e6b90cb436de Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Fri, 14 Feb 2025 15:02:56 +0300 Subject: [PATCH 10/12] Needless argument --- util.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util.cpp b/util.cpp index 17e70206e..6d3f20a6a 100644 --- a/util.cpp +++ b/util.cpp @@ -387,7 +387,7 @@ void pretty_progress(int step, int steps, float time, float left) { } else if (left > 0) { printf(", %.2fs left \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", left); } else { - printf(" \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", left); + printf(" \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"); } fflush(stdout); // for linux if (step == steps) { From 55752634edb899096b2399446aa2dc04c0f4ccf3 Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Tue, 18 Feb 2025 13:57:04 +0300 Subject: [PATCH 11/12] "1m 60s left" is also wrong As it actually is 1m 59.997s. --- util.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/util.cpp b/util.cpp index 6d3f20a6a..8f421c063 100644 --- a/util.cpp +++ b/util.cpp @@ -383,7 +383,11 @@ void pretty_progress(int step, int steps, float time, float left) { progress.c_str(), step, steps, time > 1.0f || time == 0 ? time : (1.0f / time)); if (left >= 60.0f) { - printf(", %.0fm %.2fs left \b\b\b\b\b\b\b\b\b", floor(left / 60), fmod(left, 60)); + /* same number of spaces and backspaces */ + printf(", %.0fm %.2fs left \b\b\b\b\b\b\b\b\b", + /* min appears faster than mul+div for me, 19.31s vs 19.34s average */ + floor(left / 60.0f), std::min(59.99f, fmod(left, 60.0f))); + //floor(left / 60.0f), floor(fmod(left, 60.0f) * 100.0f) / 100.0f); } else if (left > 0) { printf(", %.2fs left \b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", left); } else { From deaeaf95cc9375ec11f61a40273e4986569db742 Mon Sep 17 00:00:00 2001 From: vmobilis <75476228+vmobilis@users.noreply.github.com> Date: Wed, 26 Feb 2025 00:33:55 +0300 Subject: [PATCH 12/12] Sync with @yslai commit --- denoiser.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/denoiser.hpp b/denoiser.hpp index 64b83df44..263b574e9 100644 --- a/denoiser.hpp +++ b/denoiser.hpp @@ -1224,6 +1224,7 @@ static void sample_k_diffusion(sample_method_t method, // needs to be prescaled again, since k-diffusion's // model() differes from the bare U-net F_theta by the // factor c_in. + show_step(i, steps, &t0); } } break; case TCD: // Strategic Stochastic Sampling (Algorithm 4) in @@ -1398,6 +1399,7 @@ static void sample_k_diffusion(sample_method_t method, vec_noise[j]; } } + show_step(i, steps, &t0); } } break;