|
| 1 | +const std = @import("std"); |
| 2 | +const builtin = @import("builtin"); |
| 3 | +const common = @import("common.zig"); |
| 4 | +const shr = std.math.shr; |
| 5 | +const shl = std.math.shl; |
| 6 | + |
| 7 | +const max_limbs = std.math.divCeil(usize, 65535, 32) catch unreachable; // max supported type is u65535 |
| 8 | + |
| 9 | +comptime { |
| 10 | + @export(__udivei4, .{ .name = "__udivei4", .linkage = common.linkage }); |
| 11 | + @export(__umodei4, .{ .name = "__umodei4", .linkage = common.linkage }); |
| 12 | +} |
| 13 | + |
| 14 | +const endian = builtin.cpu.arch.endian(); |
| 15 | + |
| 16 | +/// Get the value of a limb. |
| 17 | +inline fn limb(x: []const u32, i: usize) u32 { |
| 18 | + return if (endian == .Little) x[i] else x[x.len - 1 - i]; |
| 19 | +} |
| 20 | + |
| 21 | +/// Change the value of a limb. |
| 22 | +inline fn limb_set(x: []u32, i: usize, v: u32) void { |
| 23 | + if (endian == .Little) { |
| 24 | + x[i] = v; |
| 25 | + } else { |
| 26 | + x[x.len - 1 - i] = v; |
| 27 | + } |
| 28 | +} |
| 29 | + |
| 30 | +// Uses Knuth's Algorithm D, 4.3.1, p. 272. |
| 31 | +fn divmod(q: ?[]u32, r: ?[]u32, u: []const u32, v: []const u32) !void { |
| 32 | + if (q) |q_| std.mem.set(u32, q_[0..], 0); |
| 33 | + if (r) |r_| std.mem.set(u32, r_[0..], 0); |
| 34 | + |
| 35 | + if (u.len == 0 or v.len == 0) return error.DivisionByZero; |
| 36 | + |
| 37 | + var m = u.len - 1; |
| 38 | + var n = v.len - 1; |
| 39 | + while (limb(u, m) == 0) : (m -= 1) { |
| 40 | + if (m == 0) return; |
| 41 | + } |
| 42 | + while (limb(v, n) == 0) : (n -= 1) { |
| 43 | + if (n == 0) return error.DivisionByZero; |
| 44 | + } |
| 45 | + |
| 46 | + if (n > m) { |
| 47 | + if (r) |r_| std.mem.copy(u32, r_[0..], u[0..]); |
| 48 | + return; |
| 49 | + } |
| 50 | + |
| 51 | + const s = @clz(limb(v, n)); |
| 52 | + |
| 53 | + var vn: [max_limbs]u32 = undefined; |
| 54 | + var i = n; |
| 55 | + while (i > 0) : (i -= 1) { |
| 56 | + limb_set(&vn, i, shl(u32, limb(v, i), s) | shr(u32, limb(v, i - 1), 32 - s)); |
| 57 | + } |
| 58 | + limb_set(&vn, 0, shl(u32, limb(v, 0), s)); |
| 59 | + |
| 60 | + var un: [max_limbs + 1]u32 = undefined; |
| 61 | + limb_set(&un, m + 1, shr(u32, limb(u, m), 32 - s)); |
| 62 | + i = m; |
| 63 | + while (i > 0) : (i -= 1) { |
| 64 | + limb_set(&un, i, shl(u32, limb(u, i), s) | shr(u32, limb(u, i - 1), 32 - s)); |
| 65 | + } |
| 66 | + limb_set(&un, 0, shl(u32, limb(u, 0), s)); |
| 67 | + |
| 68 | + var j = m - n; |
| 69 | + while (true) : (j -= 1) { |
| 70 | + const uu = (@as(u64, limb(&un, j + n + 1)) << 32) + limb(&un, j + n); |
| 71 | + var qhat = uu / limb(&vn, n); |
| 72 | + var rhat = uu % limb(&vn, n); |
| 73 | + |
| 74 | + while (true) { |
| 75 | + if (qhat >= (1 << 32) or (n > 0 and qhat * limb(&vn, n - 1) > (rhat << 32) + limb(&un, j + n - 1))) { |
| 76 | + qhat -= 1; |
| 77 | + rhat += limb(&vn, n); |
| 78 | + if (rhat < (1 << 32)) continue; |
| 79 | + } |
| 80 | + break; |
| 81 | + } |
| 82 | + var carry: u64 = 0; |
| 83 | + i = 0; |
| 84 | + while (i <= n) : (i += 1) { |
| 85 | + const p = qhat * limb(&vn, i); |
| 86 | + const t = limb(&un, i + j) - carry - @truncate(u32, p); |
| 87 | + limb_set(&un, i + j, @truncate(u32, t)); |
| 88 | + carry = @intCast(u64, p >> 32) - @intCast(u64, t >> 32); |
| 89 | + } |
| 90 | + const t = limb(&un, j + n + 1) - carry; |
| 91 | + limb_set(&un, j + n + 1, @truncate(u32, t)); |
| 92 | + if (q) |q_| limb_set(q_, j, @truncate(u32, qhat)); |
| 93 | + if (t < 0) { |
| 94 | + if (q) |q_| limb_set(q_, j, limb(q_, j) - 1); |
| 95 | + var carry2: u64 = 0; |
| 96 | + i = 0; |
| 97 | + while (i <= n) : (i += 1) { |
| 98 | + const t2 = @as(u64, limb(&un, i + j)) + @as(u64, limb(&vn, i)) + carry2; |
| 99 | + limb_set(&un, i + j, @truncate(u32, t2)); |
| 100 | + carry2 = t2 >> 32; |
| 101 | + } |
| 102 | + limb_set(un, j + n + 1, @truncate(u32, limb(&un, j + n + 1) + carry2)); |
| 103 | + } |
| 104 | + if (j == 0) break; |
| 105 | + } |
| 106 | + if (r) |r_| { |
| 107 | + i = 0; |
| 108 | + while (i <= n) : (i += 1) { |
| 109 | + limb_set(r_, i, shr(u32, limb(&un, i), s) | shl(u32, limb(&un, i + 1), 32 - s)); |
| 110 | + } |
| 111 | + limb_set(r_, n, shr(u32, limb(&un, n), s)); |
| 112 | + } |
| 113 | +} |
| 114 | + |
| 115 | +pub fn __udivei4(r_q: [*c]u32, u_p: [*c]const u32, v_p: [*c]const u32, bits: usize) callconv(.C) void { |
| 116 | + @setRuntimeSafety(builtin.is_test); |
| 117 | + const u = u_p[0 .. bits / 32]; |
| 118 | + const v = v_p[0 .. bits / 32]; |
| 119 | + var q = r_q[0 .. bits / 32]; |
| 120 | + @call(.always_inline, divmod, .{ q, null, u, v }) catch unreachable; |
| 121 | +} |
| 122 | + |
| 123 | +pub fn __umodei4(r_p: [*c]u32, u_p: [*c]const u32, v_p: [*c]const u32, bits: usize) callconv(.C) void { |
| 124 | + @setRuntimeSafety(builtin.is_test); |
| 125 | + const u = u_p[0 .. bits / 32]; |
| 126 | + const v = v_p[0 .. bits / 32]; |
| 127 | + var r = r_p[0 .. bits / 32]; |
| 128 | + @call(.always_inline, divmod, .{ null, r, u, v }) catch unreachable; |
| 129 | +} |
| 130 | + |
| 131 | +test "__udivei4/__umodei4" { |
| 132 | + const RndGen = std.rand.DefaultPrng; |
| 133 | + var rnd = RndGen.init(42); |
| 134 | + var i: usize = 10000; |
| 135 | + while (i > 0) : (i -= 1) { |
| 136 | + const u = rnd.random().int(u1000); |
| 137 | + const v = 1 + rnd.random().int(u1200); |
| 138 | + const q = u / v; |
| 139 | + const r = u % v; |
| 140 | + const z = q * v + r; |
| 141 | + try std.testing.expect(z == u); |
| 142 | + } |
| 143 | +} |
0 commit comments