🌐 AI搜索 & 代理 主页
Skip to content

Commit 25d26e5

Browse files
authored
Merge pull request #28619 from r-devulap/xss-openmp
ENH: Use openmp on x86-simd-sort to speed up np.sort and np.argsort
2 parents 0bf61e1 + 6eff29e commit 25d26e5

File tree

7 files changed

+53
-5
lines changed

7 files changed

+53
-5
lines changed

.github/workflows/linux_simd.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ jobs:
212212
python -m pip install pytest pytest-xdist hypothesis typing_extensions
213213
214214
- name: Build
215-
run: CC=gcc-13 CXX=g++-13 spin build -- -Dallow-noblas=true -Dcpu-baseline=avx512_skx -Dtest-simd='BASELINE,AVX512_KNL,AVX512_KNM,AVX512_SKX,AVX512_CLX,AVX512_CNL,AVX512_ICL,AVX512_SPR'
215+
run: CC=gcc-13 CXX=g++-13 spin build -- -Denable-openmp=true -Dallow-noblas=true -Dcpu-baseline=avx512_skx -Dtest-simd='BASELINE,AVX512_KNL,AVX512_KNM,AVX512_SKX,AVX512_CLX,AVX512_CNL,AVX512_ICL,AVX512_SPR'
216216

217217
- name: Meson Log
218218
if: always()
@@ -263,7 +263,7 @@ jobs:
263263
python -m pip install pytest pytest-xdist hypothesis typing_extensions
264264
265265
- name: Build
266-
run: CC=gcc-13 CXX=g++-13 spin build -- -Dallow-noblas=true -Dcpu-baseline=avx512_spr
266+
run: CC=gcc-13 CXX=g++-13 spin build -- -Denable-openmp=true -Dallow-noblas=true -Dcpu-baseline=avx512_spr
267267

268268
- name: Meson Log
269269
if: always()
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Building NumPy with OpenMP Parallelization
2+
-------------------------------------------
3+
NumPy now supports OpenMP parallel processing capabilities when built with the
4+
``-Denable_openmp=true`` Meson build flag. This feature is disabled by default.
5+
When enabled, ``np.sort`` and ``np.argsort`` functions can utilize OpenMP for
6+
parallel thread execution, improving performance for these operations.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Performance improvements to ``np.sort`` and ``np.argsort``
2+
----------------------------------------------------------
3+
``np.sort`` and ``np.argsort`` functions now can leverage OpenMP for parallel
4+
thread execution, resulting in up to 3.5x speedups on x86 architectures with
5+
AVX2 or AVX-512 instructions. This opt-in feature requires NumPy to be built
6+
with the -Denable_openmp Meson flag. Users can control the number of threads
7+
used by setting the OMP_NUM_THREADS environment variable.

meson.options

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ option('disable-intel-sort', type: 'boolean', value: false,
2222
description: 'Disables SIMD-optimized operations related to Intel x86-simd-sort')
2323
option('disable-threading', type: 'boolean', value: false,
2424
description: 'Disable threading support (see `NPY_ALLOW_THREADS` docs)')
25+
option('enable-openmp', type: 'boolean', value: false,
26+
description: 'Enable building NumPy with openmp support')
2527
option('disable-optimization', type: 'boolean', value: false,
2628
description: 'Disable CPU optimized code (dispatch,simd,unroll...)')
2729
option('cpu-baseline', type: 'string', value: 'min',

numpy/_core/meson.build

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,21 @@ if use_intel_sort and not fs.exists('src/npysort/x86-simd-sort/README.md')
128128
error('Missing the `x86-simd-sort` git submodule! Run `git submodule update --init` to fix this.')
129129
endif
130130

131+
# openMP related settings:
132+
if get_option('disable-threading') and get_option('enable-openmp')
133+
error('Build options `disable-threading` and `enable-openmp` are conflicting. Please set at most one to true.')
134+
endif
135+
136+
use_openmp = get_option('enable-openmp') and not get_option('disable-threading')
137+
138+
# Setup openmp flags for x86-simd-sort:
139+
omp = []
140+
omp_dep = []
141+
if use_intel_sort and use_openmp
142+
omp = dependency('openmp', required : true)
143+
omp_dep = declare_dependency(dependencies: omp, compile_args: ['-DXSS_USE_OPENMP'])
144+
endif
145+
131146
if not fs.exists('src/common/pythoncapi-compat')
132147
error('Missing the `pythoncapi-compat` git submodule! ' +
133148
'Run `git submodule update --init` to fix this.')
@@ -867,12 +882,15 @@ foreach gen_mtargets : [
867882
] : []
868883
],
869884
]
885+
886+
887+
870888
mtargets = mod_features.multi_targets(
871889
gen_mtargets[0], multiarray_gen_headers + gen_mtargets[1],
872890
dispatch: gen_mtargets[2],
873891
# baseline: CPU_BASELINE, it doesn't provide baseline fallback
874892
prefix: 'NPY_',
875-
dependencies: [py_dep, np_core_dep],
893+
dependencies: [py_dep, np_core_dep, omp_dep],
876894
c_args: c_args_common + max_opt,
877895
cpp_args: cpp_args_common + max_opt,
878896
include_directories: [
@@ -1286,7 +1304,7 @@ py.extension_module('_multiarray_umath',
12861304
'src/umath',
12871305
'src/highway'
12881306
],
1289-
dependencies: [blas_dep],
1307+
dependencies: [blas_dep, omp],
12901308
link_with: [
12911309
npymath_lib,
12921310
unique_hash_so,

numpy/_core/tests/test_multiarray.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10329,6 +10329,21 @@ def test_argsort_int(N, dtype):
1032910329
arr[N - 1] = maxv
1033010330
assert_arg_sorted(arr, np.argsort(arr, kind='quick'))
1033110331

10332+
# Test large arrays that leverage openMP implementations from x86-simd-sort:
10333+
@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
10334+
def test_sort_largearrays(dtype):
10335+
N = 1000000
10336+
rnd = np.random.RandomState(1100710816)
10337+
arr = -0.5 + rnd.random(N).astype(dtype)
10338+
assert_equal(np.sort(arr, kind='quick'), np.sort(arr, kind='heap'))
10339+
10340+
# Test large arrays that leverage openMP implementations from x86-simd-sort:
10341+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
10342+
def test_argsort_largearrays(dtype):
10343+
N = 1000000
10344+
rnd = np.random.RandomState(1100710816)
10345+
arr = -0.5 + rnd.random(N).astype(dtype)
10346+
assert_arg_sorted(arr, np.argsort(arr, kind='quick'))
1033210347

1033310348
@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
1033410349
def test_gh_22683():

0 commit comments

Comments
 (0)