|
71 | 71 | ;; ---- [INT] Reciprocal approximation
|
72 | 72 | ;; ---- [INT<-FP] Base-2 logarithm
|
73 | 73 | ;; ---- [INT] Polynomial multiplication
|
| 74 | +;; ---- [INT] Misc optab implementations |
74 | 75 | ;;
|
75 | 76 | ;; == Permutation
|
76 | 77 | ;; ---- [INT,FP] General permutes
|
|
2312 | 2313 | "<sve_int_op>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>"
|
2313 | 2314 | )
|
2314 | 2315 |
|
| 2316 | +;; ------------------------------------------------------------------------- |
| 2317 | +;; ---- [INT] Misc optab implementations |
| 2318 | +;; ------------------------------------------------------------------------- |
| 2319 | +;; Includes: |
| 2320 | +;; - aarch64_bitmask_udiv |
| 2321 | +;; ------------------------------------------------------------------------- |
| 2322 | + |
| 2323 | +;; div optimizations using narrowings |
| 2324 | +;; we can do the division e.g. shorts by 255 faster by calculating it as |
| 2325 | +;; (x + ((x + 257) >> 8)) >> 8 assuming the operation is done in |
| 2326 | +;; double the precision of x. |
| 2327 | +;; |
| 2328 | +;; See aarch64-simd.md for bigger explanation. |
| 2329 | +(define_expand "@aarch64_bitmask_udiv<mode>3" |
| 2330 | + [(match_operand:SVE_FULL_HSDI 0 "register_operand") |
| 2331 | + (match_operand:SVE_FULL_HSDI 1 "register_operand") |
| 2332 | + (match_operand:SVE_FULL_HSDI 2 "immediate_operand")] |
| 2333 | + "TARGET_SVE2" |
| 2334 | +{ |
| 2335 | + unsigned HOST_WIDE_INT size |
| 2336 | + = (1ULL << GET_MODE_UNIT_BITSIZE (<VNARROW>mode)) - 1; |
| 2337 | + rtx elt = unwrap_const_vec_duplicate (operands[2]); |
| 2338 | + if (!CONST_INT_P (elt) || UINTVAL (elt) != size) |
| 2339 | + FAIL; |
| 2340 | + |
| 2341 | + rtx addend = gen_reg_rtx (<MODE>mode); |
| 2342 | + rtx tmp1 = gen_reg_rtx (<VNARROW>mode); |
| 2343 | + rtx tmp2 = gen_reg_rtx (<VNARROW>mode); |
| 2344 | + rtx val = aarch64_simd_gen_const_vector_dup (<VNARROW>mode, 1); |
| 2345 | + emit_move_insn (addend, lowpart_subreg (<MODE>mode, val, <VNARROW>mode)); |
| 2346 | + emit_insn (gen_aarch64_sve (UNSPEC_ADDHNB, <MODE>mode, tmp1, operands[1], |
| 2347 | + addend)); |
| 2348 | + emit_insn (gen_aarch64_sve (UNSPEC_ADDHNB, <MODE>mode, tmp2, operands[1], |
| 2349 | + lowpart_subreg (<MODE>mode, tmp1, |
| 2350 | + <VNARROW>mode))); |
| 2351 | + emit_move_insn (operands[0], |
| 2352 | + lowpart_subreg (<MODE>mode, tmp2, <VNARROW>mode)); |
| 2353 | + DONE; |
| 2354 | +}) |
| 2355 | + |
2315 | 2356 | ;; =========================================================================
|
2316 | 2357 | ;; == Permutation
|
2317 | 2358 | ;; =========================================================================
|
|
0 commit comments