1 | /* mpn_mul -- Multiply two natural numbers. |
2 | |
3 | Copyright (C) 1991-2021 Free Software Foundation, Inc. |
4 | |
5 | This file is part of the GNU MP Library. |
6 | |
7 | The GNU MP Library is free software; you can redistribute it and/or modify |
8 | it under the terms of the GNU Lesser General Public License as published by |
9 | the Free Software Foundation; either version 2.1 of the License, or (at your |
10 | option) any later version. |
11 | |
12 | The GNU MP Library is distributed in the hope that it will be useful, but |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
15 | License for more details. |
16 | |
17 | You should have received a copy of the GNU Lesser General Public License |
18 | along with the GNU MP Library; see the file COPYING.LIB. If not, see |
19 | <https://www.gnu.org/licenses/>. */ |
20 | |
21 | #include <gmp.h> |
22 | #include "gmp-impl.h" |
23 | |
24 | /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) |
25 | and v (pointed to by VP, with VSIZE limbs), and store the result at |
26 | PRODP. USIZE + VSIZE limbs are always stored, but if the input |
27 | operands are normalized. Return the most significant limb of the |
28 | result. |
29 | |
30 | NOTE: The space pointed to by PRODP is overwritten before finished |
31 | with U and V, so overlap is an error. |
32 | |
33 | Argument constraints: |
34 | 1. USIZE >= VSIZE. |
35 | 2. PRODP != UP and PRODP != VP, i.e. the destination |
36 | must be distinct from the multiplier and the multiplicand. */ |
37 | |
38 | /* If KARATSUBA_THRESHOLD is not already defined, define it to a |
39 | value which is good on most machines. */ |
40 | #ifndef KARATSUBA_THRESHOLD |
41 | #define KARATSUBA_THRESHOLD 32 |
42 | #endif |
43 | |
44 | mp_limb_t |
45 | mpn_mul (mp_ptr prodp, |
46 | mp_srcptr up, mp_size_t usize, |
47 | mp_srcptr vp, mp_size_t vsize) |
48 | { |
49 | mp_ptr prod_endp = prodp + usize + vsize - 1; |
50 | mp_limb_t cy; |
51 | mp_ptr tspace; |
52 | TMP_DECL (marker); |
53 | |
54 | if (vsize < KARATSUBA_THRESHOLD) |
55 | { |
56 | /* Handle simple cases with traditional multiplication. |
57 | |
58 | This is the most critical code of the entire function. All |
59 | multiplies rely on this, both small and huge. Small ones arrive |
60 | here immediately. Huge ones arrive here as this is the base case |
61 | for Karatsuba's recursive algorithm below. */ |
62 | mp_size_t i; |
63 | mp_limb_t cy_limb; |
64 | mp_limb_t v_limb; |
65 | |
66 | if (vsize == 0) |
67 | return 0; |
68 | |
69 | /* Multiply by the first limb in V separately, as the result can be |
70 | stored (not added) to PROD. We also avoid a loop for zeroing. */ |
71 | v_limb = vp[0]; |
72 | if (v_limb <= 1) |
73 | { |
74 | if (v_limb == 1) |
75 | MPN_COPY (prodp, up, usize); |
76 | else |
77 | MPN_ZERO (prodp, usize); |
78 | cy_limb = 0; |
79 | } |
80 | else |
81 | cy_limb = mpn_mul_1 (prodp, up, usize, v_limb); |
82 | |
83 | prodp[usize] = cy_limb; |
84 | prodp++; |
85 | |
86 | /* For each iteration in the outer loop, multiply one limb from |
87 | U with one limb from V, and add it to PROD. */ |
88 | for (i = 1; i < vsize; i++) |
89 | { |
90 | v_limb = vp[i]; |
91 | if (v_limb <= 1) |
92 | { |
93 | cy_limb = 0; |
94 | if (v_limb == 1) |
95 | cy_limb = mpn_add_n (prodp, prodp, up, usize); |
96 | } |
97 | else |
98 | cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb); |
99 | |
100 | prodp[usize] = cy_limb; |
101 | prodp++; |
102 | } |
103 | return cy_limb; |
104 | } |
105 | |
106 | TMP_MARK (marker); |
107 | |
108 | tspace = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB); |
109 | MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace); |
110 | |
111 | prodp += vsize; |
112 | up += vsize; |
113 | usize -= vsize; |
114 | if (usize >= vsize) |
115 | { |
116 | mp_ptr tp = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB); |
117 | do |
118 | { |
119 | MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace); |
120 | cy = mpn_add_n (prodp, prodp, tp, vsize); |
121 | mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy); |
122 | prodp += vsize; |
123 | up += vsize; |
124 | usize -= vsize; |
125 | } |
126 | while (usize >= vsize); |
127 | } |
128 | |
129 | /* True: usize < vsize. */ |
130 | |
131 | /* Make life simple: Recurse. */ |
132 | |
133 | if (usize != 0) |
134 | { |
135 | mpn_mul (tspace, vp, vsize, up, usize); |
136 | cy = mpn_add_n (prodp, prodp, tspace, vsize); |
137 | mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy); |
138 | } |
139 | |
140 | TMP_FREE (marker); |
141 | return *prod_endp; |
142 | } |
143 | |