Skip to content

Fixes #1312 Optimize ReadFullAsync to minimize memory allocations #1314

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from

Conversation

pbolduc
Copy link
Contributor

@pbolduc pbolduc commented Jun 12, 2025

This PR fixes #1312.

Benchmark Source

using BenchmarkDotNet.Attributes;

namespace MinioBechmarks
{
    [MemoryDiagnoser]
    public class ReadFullAsyncBenchmarks
    {
        private Stream _testStream;

        [Params(1024 * 64, 1024 * 1024, 16 * 1024L * 1024L)] // 1 KB, 64 KB, 1 MB, 5MB (min part size)
        public int PartSize;

        [Params(1024 * 4, 1024 * 8, 1024 * 16)]
        public int ReadSize;

        [GlobalSetup]
        public void Setup()
        {
            _testStream = new VirtualStream(PartSize, ReadSize);
        }

        [IterationSetup]
        public void IterationSetup()
        {
            // Reset stream position before each iteration
            _testStream.Position = 0;
        }

        [Benchmark(Baseline = true)]
        public async Task<ReadOnlyMemory<byte>> ReadFull_Baseline()
        {
            return await ReadFullAsync(_testStream, PartSize);
        }

        [Benchmark]
        public async Task<ReadOnlyMemory<byte>> ReadFull_Optimzed()
        {
            return await ReadFullAsync_Optimzed(_testStream, PartSize);
        }


        private async Task<ReadOnlyMemory<byte>> ReadFullAsync_Optimzed(Stream data, int currentPartSize)
        {
            Memory<byte> result = new byte[currentPartSize];
            var totalRead = 0;
            while (totalRead < currentPartSize)
            {
                var curData = result[totalRead..currentPartSize];
                var curRead = await data.ReadAsync(curData).ConfigureAwait(false);
                if (curRead == 0) break;
                totalRead += curRead;
            }

            if (totalRead == 0) return null;

            // Return only the valid portion without allocating a new buffer
            return result[..totalRead];
        }

        private async Task<ReadOnlyMemory<byte>> ReadFullAsync(Stream data, int currentPartSize)
        {
            Memory<byte> result = new byte[currentPartSize];
            var totalRead = 0;
            while (totalRead < currentPartSize)
            {
                Memory<byte> curData = new byte[currentPartSize - totalRead];
                var curRead = await data.ReadAsync(curData[..(currentPartSize - totalRead)]).ConfigureAwait(false);
                if (curRead == 0) break;
                for (var i = 0; i < curRead; i++)
                    curData.Slice(i, 1).CopyTo(result[(totalRead + i)..]);
                totalRead += curRead;
            }

            if (totalRead == 0) return null;

            if (totalRead == currentPartSize) return result;

            Memory<byte> truncatedResult = new byte[totalRead];
            for (var i = 0; i < totalRead; i++)
                result.Slice(i, 1).CopyTo(truncatedResult[i..]);
            return truncatedResult;
        }
    }
}

internal class VirtualStream : Stream
{
    private long position;
    private readonly long length;
    private readonly int readSize;

    public VirtualStream(long length, int readSize = 4096)
    {
        position = 0;
        this.length = length;
        this.readSize = readSize;
    }

    public override bool CanRead => true;
    public override bool CanSeek => false;
    public override bool CanWrite => false;
    public override long Length => length;
    public override long Position
    {
        get => position;
        set => position = (int)value;
    }
    public override void Flush() { }

    public override int Read(byte[] buffer, int offset, int count)
    {
        // Calculate how many bytes are left to read
        var remaining = length - position;
        if (remaining <= 0)
            return 0; // End of stream

        // Limit the read to a maximum of readSize bytes per call
        if (readSize < count) count = readSize;
        if (remaining < count) count = (int)remaining;

        Random.Shared.NextBytes(buffer.AsSpan(offset, count));
        position += count;

        return count;
    }

    public override void SetLength(long value)
    {
#pragma warning disable MA0025
        throw new NotSupportedException();
#pragma warning restore MA0025
    }

    public override void Write(byte[] buffer, int offset, int count)
    {
#pragma warning disable MA0025
        throw new NotSupportedException();
#pragma warning restore MA0025
    }

    public override long Seek(long offset, SeekOrigin origin)
    {
#pragma warning disable MA0025
        throw new NotImplementedException();
#pragma warning restore MA0025
    }
}

Results

// * Summary *

BenchmarkDotNet v0.15.0, Windows 11 (10.0.22631.5472/23H2/2023Update/SunValley3)
AMD Ryzen Threadripper PRO 3955WX 16-Cores 3.90GHz, 1 CPU, 32 logical and 16 physical cores
.NET SDK 9.0.300
[Host] : .NET 8.0.17 (8.0.1725.26602), X64 RyuJIT AVX2
Job-MFMDJV : .NET 8.0.17 (8.0.1725.26602), X64 RyuJIT AVX2

InvocationCount=1 UnrollFactor=1

Method PartSize ReadSize Mean Error StdDev Median Ratio RatioSD Gen0 Gen1 Gen2 Allocated Alloc Ratio
ReadFull_Baseline 1048576 4096 20,906.09 us 724.300 us 2,112.817 us 20,381.80 us 1.01 0.14 28000.0000 28000.0000 28000.0000 132832.15 KB 1.000
ReadFull_Optimzed 1048576 4096 642.47 us 14.962 us 42.929 us 631.70 us 0.03 0.00 - - - 1053.01 KB 0.008
ReadFull_Baseline 1048576 8192 15,781.60 us 355.193 us 1,041.719 us 15,812.70 us 1.00 0.09 16000.0000 16000.0000 16000.0000 67178.41 KB 1.00
ReadFull_Optimzed 1048576 8192 581.66 us 17.203 us 49.909 us 581.00 us 0.04 0.00 - - - 1039.01 KB 0.02
ReadFull_Baseline 1048576 16384 11,704.86 us 233.576 us 685.037 us 11,625.20 us 1.00 0.08 8000.0000 8000.0000 8000.0000 34350.24 KB 1.00
ReadFull_Optimzed 1048576 16384 517.80 us 12.564 us 36.251 us 515.25 us 0.04 0.00 - - - 1032.01 KB 0.03
ReadFull_Baseline 16777216 4096 3,055,224.83 us 59,758.072 us 106,219.921 us 3,049,715.20 us 1.001 0.05 448000.0000 448000.0000 448000.0000 33580838.48 KB 1.000
ReadFull_Optimzed 16777216 4096 9,775.28 us 158.556 us 148.313 us 9,743.30 us 0.003 0.00 - - - 16833.01 KB 0.001
ReadFull_Baseline 16777216 8192 1,590,136.69 us 25,951.411 us 24,274.966 us 1,592,640.80 us 1.000 0.02 231000.0000 231000.0000 231000.0000 16802755.82 KB 1.000
ReadFull_Optimzed 16777216 8192 8,602.60 us 265.460 us 748.736 us 8,334.95 us 0.005 0.00 - - - 16609.01 KB 0.001
ReadFull_Baseline 16777216 16384 856,254.32 us 15,729.213 us 30,304.855 us 848,952.20 us 1.001 0.05 115000.0000 115000.0000 115000.0000 8413650.39 KB 1.000
ReadFull_Optimzed 16777216 16384 8,472.09 us 155.529 us 129.874 us 8,469.00 us 0.010 0.00 - - - 16497.01 KB 0.002
ReadFull_Baseline 65536 4096 678.58 us 12.069 us 15.693 us 677.45 us 1.00 0.03 - - - 612.41 KB 1.00
ReadFull_Optimzed 65536 4096 87.07 us 8.190 us 23.890 us 87.75 us 0.13 0.04 - - - 66.76 KB 0.11
ReadFull_Baseline 65536 8192 531.94 us 5.175 us 4.322 us 529.80 us 1.00 0.01 - - - 354.09 KB 1.00
ReadFull_Optimzed 65536 8192 77.69 us 6.214 us 17.627 us 81.00 us 0.15 0.03 - - - 65.88 KB 0.19
ReadFull_Baseline 65536 16384 462.96 us 7.640 us 6.773 us 460.75 us 1.00 0.02 - - - 225.55 KB 1.00
ReadFull_Optimzed 65536 16384 65.32 us 4.181 us 11.654 us 68.20 us 0.14 0.03 - - - 65.45 KB 0.29

// * Warnings *

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

internal method ReadFullAsync(Stream data, int currentPartSize) has terrible performance
2 participants