Skip to content

Commit 749194e

Browse files
authored
Convert managed implementation of Marvin hashing to ReadOnlySpan (dotnet#25735)
* Add Marvin perf tests * Move Marvin hashing to Span * Add comment about perf regression when using modulo + subtraction instead of clearing bits
1 parent 80c903d commit 749194e

File tree

6 files changed

+108
-45
lines changed

6 files changed

+108
-45
lines changed

src/Common/src/System/Marvin.cs

+19-40
Original file line numberDiff line numberDiff line change
@@ -14,74 +14,53 @@ internal static class Marvin
1414
/// Convenience method to compute a Marvin hash and collapse it into a 32-bit hash.
1515
/// </summary>
1616
[MethodImpl(MethodImplOptions.AggressiveInlining)]
17-
public static int ComputeHash32(ref byte data, int count, ulong seed)
17+
public static int ComputeHash32(ReadOnlySpan<byte> data, ulong seed)
1818
{
19-
long hash64 = ComputeHash(ref data, count, seed);
19+
long hash64 = ComputeHash(data, seed);
2020
return ((int)(hash64 >> 32)) ^ (int)hash64;
2121
}
2222

2323
/// <summary>
2424
/// Computes a 64-hash using the Marvin algorithm.
2525
/// </summary>
26-
public static long ComputeHash(ref byte data, int count, ulong seed)
26+
public static long ComputeHash(ReadOnlySpan<byte> data, ulong seed)
2727
{
28-
uint ucount = (uint)count;
2928
uint p0 = (uint)seed;
3029
uint p1 = (uint)(seed >> 32);
3130

32-
int byteOffset = 0; // declared as signed int so we don't have to cast everywhere (it's passed to Unsafe.Add() and used for nothing else.)
33-
34-
while (ucount >= 8)
31+
if (data.Length >= sizeof(uint))
3532
{
36-
p0 += Unsafe.As<byte, uint>(ref Unsafe.Add(ref data, byteOffset));
37-
Block(ref p0, ref p1);
33+
ReadOnlySpan<uint> uData = data.NonPortableCast<byte, uint>();
3834

39-
p0 += Unsafe.As<byte, uint>(ref Unsafe.Add(ref data, byteOffset + 4));
40-
Block(ref p0, ref p1);
35+
for (int i = 0; i < uData.Length; i++)
36+
{
37+
p0 += uData[i];
38+
Block(ref p0, ref p1);
39+
}
4140

42-
byteOffset += 8;
43-
ucount -= 8;
41+
// byteOffset = data.Length - data.Length % 4
42+
// is equivalent to clearing last 2 bits of length
43+
// Using it directly gives a perf hit for short strings making it at least 5% or more slower.
44+
int byteOffset = data.Length & (~3);
45+
data = data.Slice(byteOffset);
4446
}
4547

46-
switch (ucount)
48+
switch (data.Length)
4749
{
48-
case 4:
49-
p0 += Unsafe.As<byte, uint>(ref Unsafe.Add(ref data, byteOffset));
50-
Block(ref p0, ref p1);
51-
goto case 0;
52-
5350
case 0:
5451
p0 += 0x80u;
5552
break;
5653

57-
case 5:
58-
p0 += Unsafe.As<byte, uint>(ref Unsafe.Add(ref data, byteOffset));
59-
byteOffset += 4;
60-
Block(ref p0, ref p1);
61-
goto case 1;
62-
6354
case 1:
64-
p0 += 0x8000u | Unsafe.Add(ref data, byteOffset);
55+
p0 += 0x8000u | data[0];
6556
break;
6657

67-
case 6:
68-
p0 += Unsafe.As<byte, uint>(ref Unsafe.Add(ref data, byteOffset));
69-
byteOffset += 4;
70-
Block(ref p0, ref p1);
71-
goto case 2;
72-
7358
case 2:
74-
p0 += 0x800000u | Unsafe.As<byte, ushort>(ref Unsafe.Add(ref data, byteOffset));
59+
p0 += 0x800000u | data.NonPortableCast<byte, ushort>()[0];
7560
break;
7661

77-
case 7:
78-
p0 += Unsafe.As<byte, uint>(ref Unsafe.Add(ref data, byteOffset));
79-
byteOffset += 4;
80-
Block(ref p0, ref p1);
81-
goto case 3;
82-
8362
case 3:
84-
p0 += 0x80000000u | (((uint)(Unsafe.Add(ref data, byteOffset + 2))) << 16)| (uint)(Unsafe.As<byte, ushort>(ref Unsafe.Add(ref data, byteOffset)));
63+
p0 += 0x80000000u | (((uint)data[2]) << 16) | (uint)(data.NonPortableCast<byte, ushort>()[0]);
8564
break;
8665

8766
default:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3+
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
4+
<PropertyGroup>
5+
<IncludePerformanceTests>true</IncludePerformanceTests>
6+
<ProjectGuid>{B96198F5-9BF7-42DE-83E8-3EE39DA25F43}</ProjectGuid>
7+
<DisableTests Condition="'$(TargetGroup)' == 'uap' AND ('$(ArchGroup)' == 'arm' OR '$(ArchGroup)' == 'arm64')">true</DisableTests>
8+
</PropertyGroup>
9+
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'netcoreapp-Debug|AnyCPU'" />
10+
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'netcoreapp-Release|AnyCPU'" />
11+
<ItemGroup Condition="'$(DisableTests)' != 'true'">
12+
<Compile Include="$(CommonPath)\System\Marvin.cs">
13+
<Link>Common\System\Marvin.cs</Link>
14+
</Compile>
15+
<Compile Include="Perf.Marvin.cs" />
16+
<Compile Include="$(CommonTestPath)\System\PerfUtils.cs">
17+
<Link>Common\System\PerfUtils.cs</Link>
18+
</Compile>
19+
</ItemGroup>
20+
<ItemGroup Condition="'$(DisableTests)' != 'true'">
21+
<ProjectReference Include="$(CommonPath)\..\perf\PerfRunner\PerfRunner.csproj">
22+
<Project>{69e46a6f-9966-45a5-8945-2559fe337827}</Project>
23+
<Name>PerfRunner</Name>
24+
</ProjectReference>
25+
</ItemGroup>
26+
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
27+
</Project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3+
<PropertyGroup>
4+
<BuildConfigurations>
5+
netcoreapp;
6+
</BuildConfigurations>
7+
</PropertyGroup>
8+
</Project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System.Collections.Generic;
6+
using System.Linq;
7+
using Microsoft.Xunit.Performance;
8+
using Xunit;
9+
10+
namespace System
11+
{
12+
public class Perf_Marvin
13+
{
14+
private static IEnumerable<object[]> EnumerateRandomByteArrayTestCases()
15+
{
16+
var r = new Random(123);
17+
foreach (int size in
18+
Enumerable.Range(0, 25)
19+
.Union(new int[] { 50, 100, 200, 2000, 20000, 200000, 2000000 }))
20+
{
21+
byte[] array = new byte[size];
22+
r.NextBytes(array);
23+
24+
int iterations = 2000000 / Math.Max(1, size);
25+
yield return new object[] { iterations, array };
26+
}
27+
}
28+
29+
[Benchmark]
30+
[MemberData(nameof(EnumerateRandomByteArrayTestCases))]
31+
public void Add(int iterations, byte[] data)
32+
{
33+
Span<byte> otherData = new byte[] { 1, 2, 3 };
34+
var bytes = new Span<byte>(data);
35+
foreach (var iteration in Benchmark.Iterations)
36+
{
37+
using (iteration.StartMeasurement())
38+
{
39+
for (int i = 0; i < iterations; i++)
40+
{
41+
Marvin.ComputeHash(bytes, 123123123123UL);
42+
Marvin.ComputeHash(otherData, 555888555888UL);
43+
}
44+
}
45+
}
46+
}
47+
}
48+
}

src/Common/tests/Tests/System/MarvinTests.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ public class MarvinTests
2929
public void ComputeHash_Success(ulong seed, string testDataString, ulong expectedHash)
3030
{
3131
var testDataSpan = new Span<byte>(testDataString.HexToByteArray());
32-
long hash = Marvin.ComputeHash(ref testDataSpan.DangerousGetPinnableReference(), testDataSpan.Length, seed);
32+
long hash = Marvin.ComputeHash(testDataSpan, seed);
3333
Assert.Equal((long)expectedHash, hash);
3434
}
3535

src/System.Net.Primitives/src/System/Net/IPAddress.cs

+5-4
Original file line numberDiff line numberDiff line change
@@ -634,16 +634,17 @@ public override int GetHashCode()
634634
Debug.Assert(scopeWritten);
635635

636636
hashCode = Marvin.ComputeHash32(
637-
ref addressAndScopeIdSpan[0],
638-
addressAndScopeIdLength,
637+
addressAndScopeIdSpan,
639638
Marvin.DefaultSeed);
640639
}
641640
else
642641
{
642+
Span<uint> addressOrScopeIdSpan = stackalloc uint[1];
643+
addressOrScopeIdSpan[0] = _addressOrScopeId;
644+
643645
// For IPv4 addresses, we use Marvin on the integer representation of the Address.
644646
hashCode = Marvin.ComputeHash32(
645-
ref Unsafe.As<uint, byte>(ref _addressOrScopeId),
646-
sizeof(uint),
647+
addressOrScopeIdSpan.AsBytes(),
647648
Marvin.DefaultSeed);
648649
}
649650

0 commit comments

Comments
 (0)