Skip to content

Commit 8d66aac

Browse files
authored
compiler-rt: implement __udivei4 and __umodei4 (#14023)
Allows dividing and formatting arbitrary-large unsigned integers.
1 parent 71668fc commit 8d66aac

File tree

4 files changed

+147
-0
lines changed

4 files changed

+147
-0
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,7 @@ set(ZIG_STAGE2_SOURCES
472472
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/truncxfhf2.zig"
473473
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/truncxfsf2.zig"
474474
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/udivmod.zig"
475+
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/udivmodei4.zig"
475476
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/udivmodti4.zig"
476477
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/udivti3.zig"
477478
"${CMAKE_SOURCE_DIR}/lib/compiler_rt/umodti3.zig"

lib/compiler_rt.zig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ comptime {
112112
_ = @import("compiler_rt/modti3.zig");
113113
_ = @import("compiler_rt/multi3.zig");
114114
_ = @import("compiler_rt/udivti3.zig");
115+
_ = @import("compiler_rt/udivmodei4.zig");
115116
_ = @import("compiler_rt/udivmodti4.zig");
116117
_ = @import("compiler_rt/umodti3.zig");
117118

lib/compiler_rt/udivmodei4.zig

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
const std = @import("std");
2+
const builtin = @import("builtin");
3+
const common = @import("common.zig");
4+
const shr = std.math.shr;
5+
const shl = std.math.shl;
6+
7+
const max_limbs = std.math.divCeil(usize, 65535, 32) catch unreachable; // max supported type is u65535
8+
9+
comptime {
10+
@export(__udivei4, .{ .name = "__udivei4", .linkage = common.linkage });
11+
@export(__umodei4, .{ .name = "__umodei4", .linkage = common.linkage });
12+
}
13+
14+
const endian = builtin.cpu.arch.endian();
15+
16+
/// Get the value of a limb.
17+
inline fn limb(x: []const u32, i: usize) u32 {
18+
return if (endian == .Little) x[i] else x[x.len - 1 - i];
19+
}
20+
21+
/// Change the value of a limb.
22+
inline fn limb_set(x: []u32, i: usize, v: u32) void {
23+
if (endian == .Little) {
24+
x[i] = v;
25+
} else {
26+
x[x.len - 1 - i] = v;
27+
}
28+
}
29+
30+
// Uses Knuth's Algorithm D, 4.3.1, p. 272.
31+
fn divmod(q: ?[]u32, r: ?[]u32, u: []const u32, v: []const u32) !void {
32+
if (q) |q_| std.mem.set(u32, q_[0..], 0);
33+
if (r) |r_| std.mem.set(u32, r_[0..], 0);
34+
35+
if (u.len == 0 or v.len == 0) return error.DivisionByZero;
36+
37+
var m = u.len - 1;
38+
var n = v.len - 1;
39+
while (limb(u, m) == 0) : (m -= 1) {
40+
if (m == 0) return;
41+
}
42+
while (limb(v, n) == 0) : (n -= 1) {
43+
if (n == 0) return error.DivisionByZero;
44+
}
45+
46+
if (n > m) {
47+
if (r) |r_| std.mem.copy(u32, r_[0..], u[0..]);
48+
return;
49+
}
50+
51+
const s = @clz(limb(v, n));
52+
53+
var vn: [max_limbs]u32 = undefined;
54+
var i = n;
55+
while (i > 0) : (i -= 1) {
56+
limb_set(&vn, i, shl(u32, limb(v, i), s) | shr(u32, limb(v, i - 1), 32 - s));
57+
}
58+
limb_set(&vn, 0, shl(u32, limb(v, 0), s));
59+
60+
var un: [max_limbs + 1]u32 = undefined;
61+
limb_set(&un, m + 1, shr(u32, limb(u, m), 32 - s));
62+
i = m;
63+
while (i > 0) : (i -= 1) {
64+
limb_set(&un, i, shl(u32, limb(u, i), s) | shr(u32, limb(u, i - 1), 32 - s));
65+
}
66+
limb_set(&un, 0, shl(u32, limb(u, 0), s));
67+
68+
var j = m - n;
69+
while (true) : (j -= 1) {
70+
const uu = (@as(u64, limb(&un, j + n + 1)) << 32) + limb(&un, j + n);
71+
var qhat = uu / limb(&vn, n);
72+
var rhat = uu % limb(&vn, n);
73+
74+
while (true) {
75+
if (qhat >= (1 << 32) or (n > 0 and qhat * limb(&vn, n - 1) > (rhat << 32) + limb(&un, j + n - 1))) {
76+
qhat -= 1;
77+
rhat += limb(&vn, n);
78+
if (rhat < (1 << 32)) continue;
79+
}
80+
break;
81+
}
82+
var carry: u64 = 0;
83+
i = 0;
84+
while (i <= n) : (i += 1) {
85+
const p = qhat * limb(&vn, i);
86+
const t = limb(&un, i + j) - carry - @truncate(u32, p);
87+
limb_set(&un, i + j, @truncate(u32, t));
88+
carry = @intCast(u64, p >> 32) - @intCast(u64, t >> 32);
89+
}
90+
const t = limb(&un, j + n + 1) - carry;
91+
limb_set(&un, j + n + 1, @truncate(u32, t));
92+
if (q) |q_| limb_set(q_, j, @truncate(u32, qhat));
93+
if (t < 0) {
94+
if (q) |q_| limb_set(q_, j, limb(q_, j) - 1);
95+
var carry2: u64 = 0;
96+
i = 0;
97+
while (i <= n) : (i += 1) {
98+
const t2 = @as(u64, limb(&un, i + j)) + @as(u64, limb(&vn, i)) + carry2;
99+
limb_set(&un, i + j, @truncate(u32, t2));
100+
carry2 = t2 >> 32;
101+
}
102+
limb_set(un, j + n + 1, @truncate(u32, limb(&un, j + n + 1) + carry2));
103+
}
104+
if (j == 0) break;
105+
}
106+
if (r) |r_| {
107+
i = 0;
108+
while (i <= n) : (i += 1) {
109+
limb_set(r_, i, shr(u32, limb(&un, i), s) | shl(u32, limb(&un, i + 1), 32 - s));
110+
}
111+
limb_set(r_, n, shr(u32, limb(&un, n), s));
112+
}
113+
}
114+
115+
pub fn __udivei4(r_q: [*c]u32, u_p: [*c]const u32, v_p: [*c]const u32, bits: usize) callconv(.C) void {
116+
@setRuntimeSafety(builtin.is_test);
117+
const u = u_p[0 .. bits / 32];
118+
const v = v_p[0 .. bits / 32];
119+
var q = r_q[0 .. bits / 32];
120+
@call(.always_inline, divmod, .{ q, null, u, v }) catch unreachable;
121+
}
122+
123+
pub fn __umodei4(r_p: [*c]u32, u_p: [*c]const u32, v_p: [*c]const u32, bits: usize) callconv(.C) void {
124+
@setRuntimeSafety(builtin.is_test);
125+
const u = u_p[0 .. bits / 32];
126+
const v = v_p[0 .. bits / 32];
127+
var r = r_p[0 .. bits / 32];
128+
@call(.always_inline, divmod, .{ null, r, u, v }) catch unreachable;
129+
}
130+
131+
test "__udivei4/__umodei4" {
132+
const RndGen = std.rand.DefaultPrng;
133+
var rnd = RndGen.init(42);
134+
var i: usize = 10000;
135+
while (i > 0) : (i -= 1) {
136+
const u = rnd.random().int(u1000);
137+
const v = 1 + rnd.random().int(u1200);
138+
const q = u / v;
139+
const r = u % v;
140+
const z = q * v + r;
141+
try std.testing.expect(z == u);
142+
}
143+
}

tools/gen_stubs.zig

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -865,12 +865,14 @@ const blacklisted_symbols = [_][]const u8{
865865
"__ucmpsi2",
866866
"__ucmpti2",
867867
"__udivdi3",
868+
"__udivei4",
868869
"__udivmoddi4",
869870
"__udivmodsi4",
870871
"__udivmodti4",
871872
"__udivsi3",
872873
"__udivti3",
873874
"__umoddi3",
875+
"__umodei4",
874876
"__umodsi3",
875877
"__umodti3",
876878
"__unorddf2",

0 commit comments

Comments
 (0)