🌐 AI搜索 & 代理 主页
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/release/upcoming_changes/29060.change.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
* Multiplication between a string and integer now raises OverflowError instead
of MemoryError if the result of the multiplication would create a string that
is too large to be represented. This follows Python's behavior.
12 changes: 12 additions & 0 deletions numpy/_core/src/umath/string_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,18 @@ struct Buffer {
return num_codepoints;
}

inline size_t
buffer_width()
{
switch (enc) {
case ENCODING::ASCII:
case ENCODING::UTF8:
return after - buf;
case ENCODING::UTF32:
return (after - buf) / sizeof(npy_ucs4);
}
}

inline Buffer<enc>&
operator+=(npy_int64 rhs)
{
Expand Down
48 changes: 36 additions & 12 deletions numpy/_core/src/umath/string_ufuncs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "dtypemeta.h"
#include "convert_datatype.h"
#include "gil_utils.h"
#include "templ_common.h" /* for npy_mul_size_with_overflow_size_t */

#include "string_ufuncs.h"
#include "string_fastsearch.h"
Expand Down Expand Up @@ -166,26 +167,44 @@ string_add(Buffer<enc> buf1, Buffer<enc> buf2, Buffer<enc> out)


template <ENCODING enc>
static inline void
static inline int
string_multiply(Buffer<enc> buf1, npy_int64 reps, Buffer<enc> out)
{
size_t len1 = buf1.num_codepoints();
if (reps < 1 || len1 == 0) {
out.buffer_fill_with_zeros_after_index(0);
return;
return 0;
}

if (len1 == 1) {
out.buffer_memset(*buf1, reps);
out.buffer_fill_with_zeros_after_index(reps);
return 0;
}
else {
for (npy_int64 i = 0; i < reps; i++) {
buf1.buffer_memcpy(out, len1);
out += len1;
}
out.buffer_fill_with_zeros_after_index(0);

size_t newlen;
if (NPY_UNLIKELY(npy_mul_with_overflow_size_t(&newlen, reps, len1) != 0) || newlen > PY_SSIZE_T_MAX) {
return -1;
}

size_t pad = 0;
size_t width = out.buffer_width();
if (width < newlen) {
reps = width / len1;
pad = width % len1;
}

for (npy_int64 i = 0; i < reps; i++) {
buf1.buffer_memcpy(out, len1);
out += len1;
}

buf1.buffer_memcpy(out, pad);
out += pad;

out.buffer_fill_with_zeros_after_index(0);

return 0;
}


Expand Down Expand Up @@ -238,7 +257,9 @@ string_multiply_strint_loop(PyArrayMethod_Context *context,
while (N--) {
Buffer<enc> buf(in1, elsize);
Buffer<enc> outbuf(out, outsize);
string_multiply<enc>(buf, *(npy_int64 *)in2, outbuf);
if (NPY_UNLIKELY(string_multiply<enc>(buf, *(npy_int64 *)in2, outbuf) < 0)) {
npy_gil_error(PyExc_OverflowError, "Overflow detected in string multiply");
}

in1 += strides[0];
in2 += strides[1];
Expand Down Expand Up @@ -267,7 +288,9 @@ string_multiply_intstr_loop(PyArrayMethod_Context *context,
while (N--) {
Buffer<enc> buf(in2, elsize);
Buffer<enc> outbuf(out, outsize);
string_multiply<enc>(buf, *(npy_int64 *)in1, outbuf);
if (NPY_UNLIKELY(string_multiply<enc>(buf, *(npy_int64 *)in1, outbuf) < 0)) {
npy_gil_error(PyExc_OverflowError, "Overflow detected in string multiply");
}

in1 += strides[0];
in2 += strides[1];
Expand Down Expand Up @@ -752,10 +775,11 @@ string_multiply_resolve_descriptors(
if (given_descrs[2] == NULL) {
PyErr_SetString(
PyExc_TypeError,
"The 'out' kwarg is necessary. Use numpy.strings.multiply without it.");
"The 'out' kwarg is necessary when using the string multiply ufunc "
"directly. Use numpy.strings.multiply to multiply strings without "
"specifying 'out'.");
return _NPY_ERROR_OCCURRED_IN_CAST;
}

loop_descrs[0] = NPY_DT_CALL_ensure_canonical(given_descrs[0]);
if (loop_descrs[0] == NULL) {
return _NPY_ERROR_OCCURRED_IN_CAST;
Expand Down
12 changes: 6 additions & 6 deletions numpy/_core/src/umath/stringdtype_ufuncs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,9 @@ static int multiply_loop_core(
size_t newsize;
int overflowed = npy_mul_with_overflow_size_t(
&newsize, cursize, factor);
if (overflowed) {
npy_gil_error(PyExc_MemoryError,
"Failed to allocate string in string multiply");
if (overflowed || newsize > PY_SSIZE_T_MAX) {
npy_gil_error(PyExc_OverflowError,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity, how are these things handled in free threaded Python?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think free-threading changes anything in this code path

Copy link
Member Author

@ngoldbaum ngoldbaum May 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To say another way: we could probably rename npy_gil_error to e.g. npy_error_py_attached to make it clearer that you still need to explicitly attach and detach from the runtime in all builds.

"Overflow encountered in string multiply");
goto fail;
}

Expand Down Expand Up @@ -1748,9 +1748,9 @@ center_ljust_rjust_strided_loop(PyArrayMethod_Context *context,
width - num_codepoints);
newsize += s1.size;

if (overflowed) {
npy_gil_error(PyExc_MemoryError,
"Failed to allocate string in %s", ufunc_name);
if (overflowed || newsize > PY_SSIZE_T_MAX) {
npy_gil_error(PyExc_OverflowError,
"Overflow encountered in %s", ufunc_name);
goto fail;
}

Expand Down
2 changes: 1 addition & 1 deletion numpy/_core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def multiply(a, i):

# Ensure we can do a_len * i without overflow.
if np.any(a_len > sys.maxsize / np.maximum(i, 1)):
raise MemoryError("repeated string is too long")
raise OverflowError("Overflow encountered in string multiply")

buffersizes = a_len * i
out_dtype = f"{a.dtype.char}{buffersizes.max()}"
Expand Down
4 changes: 2 additions & 2 deletions numpy/_core/tests/test_stringdtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ def test_null_roundtripping():

def test_string_too_large_error():
arr = np.array(["a", "b", "c"], dtype=StringDType())
with pytest.raises(MemoryError):
arr * (2**63 - 2)
with pytest.raises(OverflowError):
arr * (sys.maxsize + 1)


@pytest.mark.parametrize(
Expand Down
13 changes: 12 additions & 1 deletion numpy/_core/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,20 @@ def test_multiply_raises(self, dt):
with pytest.raises(TypeError, match="unsupported type"):
np.strings.multiply(np.array("abc", dtype=dt), 3.14)

with pytest.raises(MemoryError):
with pytest.raises(OverflowError):
np.strings.multiply(np.array("abc", dtype=dt), sys.maxsize)

def test_inplace_multiply(self, dt):
arr = np.array(['foo ', 'bar'], dtype=dt)
arr *= 2
if dt != "T":
assert_array_equal(arr, np.array(['foo ', 'barb'], dtype=dt))
else:
assert_array_equal(arr, ['foo foo ', 'barbar'])

with pytest.raises(OverflowError):
arr *= sys.maxsize

@pytest.mark.parametrize("i_dt", [np.int8, np.int16, np.int32,
np.int64, np.int_])
def test_multiply_integer_dtypes(self, i_dt, dt):
Expand Down
3 changes: 2 additions & 1 deletion numpy/typing/tests/data/pass/ma.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
MAR_M_dt64: MaskedArray[np.datetime64] = np.ma.MaskedArray([np.datetime64(1, "D")])
MAR_S: MaskedArray[np.bytes_] = np.ma.MaskedArray([b'foo'], dtype=np.bytes_)
MAR_U: MaskedArray[np.str_] = np.ma.MaskedArray(['foo'], dtype=np.str_)
MAR_T = cast(np.ma.MaskedArray[Any, np.dtypes.StringDType], np.ma.MaskedArray(["a"], "T"))
MAR_T = cast(np.ma.MaskedArray[Any, np.dtypes.StringDType],
np.ma.MaskedArray(["a"], dtype="T"))

AR_b: npt.NDArray[np.bool] = np.array([True, False, True])

Expand Down
Loading