libstdc++
simd_mask.h
1// Implementation of <simd> -*- C++ -*-
2
3// Copyright The GNU Toolchain Authors.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25#ifndef _GLIBCXX_SIMD_MASK_H
26#define _GLIBCXX_SIMD_MASK_H 1
27
28#ifdef _GLIBCXX_SYSHDR
29#pragma GCC system_header
30#endif
31
32#if __cplusplus >= 202400L
33
34#include "simd_iterator.h"
35#include "vec_ops.h"
36#if _GLIBCXX_X86
37#include "simd_x86.h"
38#endif
39
40#include <bit>
41#include <bitset>
42
43// psabi warnings are bogus because the ABI of the internal types never leaks into user code
44#pragma GCC diagnostic push
45#pragma GCC diagnostic ignored "-Wpsabi"
46
47namespace std _GLIBCXX_VISIBILITY(default)
48{
49_GLIBCXX_BEGIN_NAMESPACE_VERSION
50namespace simd
51{
52 template <unsigned _Np>
53 struct _SwapNeighbors
54 {
55 consteval unsigned
56 operator()(unsigned __i, unsigned __size) const
57 {
58 if (__size % (2 * _Np) != 0)
59 __builtin_abort(); // swap_neighbors<N> permutation requires a multiple of 2N elements
60 else if (std::has_single_bit(_Np))
61 return __i ^ _Np;
62 else if (__i % (2 * _Np) >= _Np)
63 return __i - _Np;
64 else
65 return __i + _Np;
66 }
67 };
68
69 template <size_t _Np, size_t _Mp>
70 constexpr auto
71 __bitset_split(const bitset<_Mp>& __b)
72 {
73 constexpr auto __bits_per_word = __CHAR_BIT__ * __SIZEOF_LONG__;
74 if constexpr (_Np % __bits_per_word == 0)
75 {
76 struct _Tmp
77 {
78 bitset<_Np> _M_lo;
79 bitset<_Mp - _Np> _M_hi;
80 };
81 return __builtin_bit_cast(_Tmp, __b);
82 }
83 else
84 {
85 constexpr auto __bits_per_ullong = __CHAR_BIT__ * __SIZEOF_LONG_LONG__;
86 static_assert(_Mp <= __bits_per_ullong);
87 using _Lo = _Bitmask<_Np>;
88 using _Hi = _Bitmask<_Mp - _Np>;
89 struct _Tmp
90 {
91 _Lo _M_lo;
92 _Hi _M_hi;
93 };
94 return _Tmp {static_cast<_Lo>(__b.to_ullong()), static_cast<_Hi>(__b.to_ullong() >> _Np)};
95 }
96 }
97
98 static_assert(__bitset_split<64>(bitset<128>(1))._M_lo == bitset<64>(1));
99 static_assert(__bitset_split<64>(bitset<128>(1))._M_hi == bitset<64>(0));
100
101 // [simd.traits]
102 // --- rebind ---
103 template <typename _Tp, typename _Vp, _ArchTraits _Traits = {}>
104 struct rebind
105 {};
106
107 /**
108 * Computes a member @c type `basic_vec<_Tp, Abi>`, where @c Abi is chosen such that the
109 * number of elements is equal to `_Vp::size()` and features of the ABI tag (such as the
110 * internal representation of masks, or storage order of complex components) are preserved.
111 */
112 template <__vectorizable _Tp, __simd_vec_type _Vp, _ArchTraits _Traits>
113 //requires requires { typename __deduce_abi_t<_Tp, _Vp::size()>; }
114 struct rebind<_Tp, _Vp, _Traits>
115 { using type = __similar_vec<_Tp, _Vp::size(), typename _Vp::abi_type>; };
116
117 /**
118 * As above, except for @c basic_mask.
119 */
120 template <__vectorizable _Tp, __simd_mask_type _Mp, _ArchTraits _Traits>
121 //requires requires { typename __deduce_abi_t<_Tp, _Mp::size()>; }
122 struct rebind<_Tp, _Mp, _Traits>
123 { using type = __similar_mask<_Tp, _Mp::size(), typename _Mp::abi_type>; };
124
125 template <typename _Tp, typename _Vp>
126 using rebind_t = typename rebind<_Tp, _Vp>::type;
127
128 // --- resize ---
129 template <__simd_size_type _Np, typename _Vp, _ArchTraits _Traits = {}>
130 struct resize
131 {};
132
133 template <__simd_size_type _Np, __simd_vec_type _Vp, _ArchTraits _Traits>
134 requires (_Np >= 1)
135 //requires requires { typename __deduce_abi_t<typename _Vp::value_type, _Np>; }
136 struct resize<_Np, _Vp, _Traits>
137 { using type = __similar_vec<typename _Vp::value_type, _Np, typename _Vp::abi_type>; };
138
139 template <__simd_size_type _Np, __simd_mask_type _Mp, _ArchTraits _Traits>
140 requires (_Np >= 1)
141 //requires requires { typename __deduce_abi_t<typename _Mp::value_type, _Np>; }
142 struct resize<_Np, _Mp, _Traits>
143 {
144 using _A1 = decltype(__abi_rebind<__mask_element_size<_Mp>, _Np, typename _Mp::abi_type,
145 true>());
146
147 static_assert(__abi_tag<_A1>);
148
149 static_assert(_Mp::abi_type::_S_variant == _A1::_S_variant || __scalar_abi_tag<_A1>
150 || __scalar_abi_tag<typename _Mp::abi_type>);
151
152 using type = basic_mask<__mask_element_size<_Mp>, _A1>;
153 };
154
155 template <__simd_size_type _Np, typename _Vp>
156 using resize_t = typename resize<_Np, _Vp>::type;
157
158 // [simd.syn]
159 inline constexpr __simd_size_type zero_element = numeric_limits<int>::min();
160
161 inline constexpr __simd_size_type uninit_element = zero_element + 1;
162
163 // [simd.permute.static]
164 template<__simd_size_type _Np = 0, __simd_vec_or_mask_type _Vp,
165 __index_permutation_function<_Vp> _IdxMap>
166 [[__gnu__::__always_inline__]]
167 constexpr resize_t<_Np == 0 ? _Vp::size() : _Np, _Vp>
168 permute(const _Vp& __v, _IdxMap&& __idxmap)
169 { return resize_t<_Np == 0 ? _Vp::size() : _Np, _Vp>::_S_static_permute(__v, __idxmap); }
170
171 // [simd.permute.dynamic]
172 template<__simd_vec_or_mask_type _Vp, __simd_integral _Ip>
173 [[__gnu__::__always_inline__]]
174 constexpr resize_t<_Ip::size(), _Vp>
175 permute(const _Vp& __v, const _Ip& __indices)
176 { return __v[__indices]; }
177
178 // [simd.creation] ----------------------------------------------------------
179 template<__simd_vec_type _Vp, typename _Ap>
180 [[__gnu__::__always_inline__]]
181 constexpr auto
182 chunk(const basic_vec<typename _Vp::value_type, _Ap>& __x) noexcept
183 { return __x.template _M_chunk<_Vp>(); }
184
185 template<__simd_mask_type _Mp, typename _Ap>
186 [[__gnu__::__always_inline__]]
187 constexpr auto
188 chunk(const basic_mask<__mask_element_size<_Mp>, _Ap>& __x) noexcept
189 { return __x.template _M_chunk<_Mp>(); }
190
191 template<__simd_size_type _Np, typename _Tp, typename _Ap>
192 [[__gnu__::__always_inline__]]
193 constexpr auto
194 chunk(const basic_vec<_Tp, _Ap>& __x) noexcept
195 -> decltype(chunk<resize_t<_Np, basic_vec<_Tp, _Ap>>>(__x))
196 { return chunk<resize_t<_Np, basic_vec<_Tp, _Ap>>>(__x); }
197
198 template<__simd_size_type _Np, size_t _Bytes, typename _Ap>
199 [[__gnu__::__always_inline__]]
200 constexpr auto
201 chunk(const basic_mask<_Bytes, _Ap>& __x) noexcept
202 -> decltype(chunk<resize_t<_Np, basic_mask<_Bytes, _Ap>>>(__x))
203 { return chunk<resize_t<_Np, basic_mask<_Bytes, _Ap>>>(__x); }
204
205 // LWG???? (reported 2025-11-25)
206 template<typename _Tp, typename _A0, typename... _Abis>
207 constexpr resize_t<(_A0::_S_size + ... + _Abis::_S_size), basic_vec<_Tp, _A0>>
208 cat(const basic_vec<_Tp, _A0>& __x0, const basic_vec<_Tp, _Abis>&... __xs) noexcept
209 {
210 return resize_t<(_A0::_S_size + ... + _Abis::_S_size), basic_vec<_Tp, _A0>>
211 ::_S_concat(__x0, __xs...);
212 }
213
214 // LWG???? (reported 2025-11-25)
215 template<size_t _Bytes, typename _A0, typename... _Abis>
216 constexpr resize_t<(_A0::_S_size + ... + _Abis::_S_size), basic_mask<_Bytes, _A0>>
217 cat(const basic_mask<_Bytes, _A0>& __x0, const basic_mask<_Bytes, _Abis>&... __xs) noexcept
218 {
219 return resize_t<(_A0::_S_size + ... + _Abis::_S_size), basic_mask<_Bytes, _A0>>
220 ::_S_concat(__x0, __xs...);
221 }
222
223 // implementation helper for chunk and cat
224 consteval int
225 __packs_to_skip_at_front(int __offset, initializer_list<int> __sizes)
226 {
227 int __i = 0;
228 int __n = 0;
229 for (int __s : __sizes)
230 {
231 __n += __s;
232 if (__n > __offset)
233 return __i;
234 ++__i;
235 }
236 __builtin_trap(); // called out of contract
237 }
238
239 consteval int
240 __packs_to_skip_at_back(int __offset, int __max, initializer_list<int> __sizes)
241 {
242 int __i = 0;
243 int __n = -__offset;
244 for (int __s : __sizes)
245 {
246 ++__i;
247 __n += __s;
248 if (__n >= __max)
249 return int(__sizes.size()) - __i;
250 }
251 return 0;
252 }
253
254 // in principle, this overload allows conversions to _Dst - and it wouldn't be wrong - but the
255 // general overload below is still a better candidate in overload resolution
256 template <typename _Dst>
257 [[__gnu__::__always_inline__]]
258 constexpr _Dst
259 __extract_simd_at(auto _Offset, const _Dst& __r, const auto&...)
260 requires(_Offset.value == 0)
261 { return __r; }
262
263 template <typename _Dst, typename _V0>
264 [[__gnu__::__always_inline__]]
265 constexpr _Dst
266 __extract_simd_at(auto _Offset, const _V0&, const _Dst& __r, const auto&...)
267 requires(_Offset.value == _V0::size.value)
268 { return __r; }
269
270 template <typename _Dst, typename... _Vs>
271 [[__gnu__::__always_inline__]]
272 constexpr _Dst
273 __extract_simd_at(auto _Offset, const _Vs&... __xs)
274 {
275 using _Adst = typename _Dst::abi_type;
276 if constexpr (_Adst::_S_nreg >= 2)
277 {
278 using _Dst0 = remove_cvref_t<decltype(declval<_Dst>()._M_get_low())>;
279 using _Dst1 = remove_cvref_t<decltype(declval<_Dst>()._M_get_high())>;
280 return _Dst::_S_init(__extract_simd_at<_Dst0>(_Offset, __xs...),
281 __extract_simd_at<_Dst1>(_Offset + _Dst0::size, __xs...));
282 }
283 else
284 {
285 using _Ret = remove_cvref_t<decltype(declval<_Dst>()._M_get())>;
286 constexpr bool __use_bitmask = __simd_mask_type<_Dst> && _Adst::_S_is_bitmask;
287 constexpr int __dst_full_size = __bit_ceil(unsigned(_Adst::_S_size));
288 constexpr int __nargs = sizeof...(__xs);
289 using _Afirst = typename _Vs...[0]::abi_type;
290 using _Alast = typename _Vs...[__nargs - 1]::abi_type;
291 const auto& __x0 = __xs...[0];
292 const auto& __xlast = __xs...[__nargs - 1];
293 constexpr int __ninputs = (_Vs::size.value + ...);
294 if constexpr (_Offset.value >= _Afirst::_S_size
295 || __ninputs - _Offset.value - _Alast::_S_size >= _Adst::_S_size)
296 { // can drop inputs at the front and/or back of the pack
297 constexpr int __skip_front = __packs_to_skip_at_front(_Offset.value,
298 {_Vs::size.value...});
299 constexpr int __skip_back = __packs_to_skip_at_back(_Offset.value, _Adst::_S_size,
300 {_Vs::size.value...});
301 static_assert(__skip_front > 0 || __skip_back > 0);
302 constexpr auto [...__skip] = _IotaArray<__skip_front>;
303 constexpr auto [...__is] = _IotaArray<__nargs - __skip_front - __skip_back>;
304 constexpr int __new_offset = _Offset.value - (0 + ... + _Vs...[__skip]::size.value);
305 return __extract_simd_at<_Dst>(cw<__new_offset>, __xs...[__is + __skip_front]...);
306 }
307 else if constexpr (_Adst::_S_size == 1)
308 { // trivial conversion to one value_type
309 return _Dst(__x0[_Offset.value]);
310 }
311 else if constexpr (_Afirst::_S_nreg >= 2 || _Alast::_S_nreg >= 2)
312 { // flatten first and/or last multi-register argument
313 constexpr bool __flatten_first = _Afirst::_S_nreg >= 2;
314 constexpr bool __flatten_last = __nargs > 1 && _Alast::_S_nreg >= 2;
315 constexpr auto [...__is] = _IotaArray<__nargs - __flatten_first - __flatten_last>;
316 if constexpr (__flatten_first && __flatten_last)
317 return __extract_simd_at<_Dst>(
318 _Offset, __x0._M_get_low(), __x0._M_get_high(), __xs...[__is + 1]...,
319 __xlast._M_get_low(), __xlast._M_get_high());
320 else if constexpr (__flatten_first)
321 return __extract_simd_at<_Dst>(
322 _Offset, __x0._M_get_low(), __x0._M_get_high(), __xs...[__is + 1]...);
323 else
324 return __extract_simd_at<_Dst>(
325 _Offset, __xs...[__is]..., __xlast._M_get_low(), __xlast._M_get_high());
326 }
327 else if constexpr (__simd_mask_type<_Dst>
328 && ((_Adst::_S_variant != _Vs::abi_type::_S_variant
329 && !__scalar_abi_tag<typename _Vs::abi_type>) || ...))
330 { // convert ABI tag if incompatible
331 return __extract_simd_at<_Dst>(
332 _Offset, static_cast<const resize_t<_Vs::size.value, _Dst>&>(__xs)...);
333 }
334
335 // at this point __xs should be as small as possible; there may be some corner cases left
336
337 else if constexpr (__nargs == 1)
338 { // simple and optimal
339 if constexpr (__use_bitmask)
340 return _Dst(_Ret(__x0._M_to_uint() >> _Offset.value));
341 else
342 return _VecOps<_Ret>::_S_extract(__x0._M_concat_data(false), _Offset);
343 }
344 else if constexpr (__use_bitmask)
345 { // fairly simple and optimal bit shifting solution
346 static_assert(_Afirst::_S_nreg == 1);
347 static_assert(_Offset.value < _Afirst::_S_size);
348 int __offset = -_Offset.value;
349 _Ret __r;
350 template for (const auto& __x : {__xs...})
351 {
352 if (__offset <= 0)
353 __r = _Ret(__x._M_to_uint() >> -__offset);
354 else if (__offset < _Adst::_S_size)
355 __r |= _Ret(_Ret(__x._M_to_uint()) << __offset);
356 __offset += __x.size.value;
357 }
358 return _Dst(__r);
359 }
360 else if constexpr (__nargs == 2 && _Offset == 0 && _Adst::_S_nreg == 1
361 && _Afirst::_S_size >= _Alast::_S_size
362 && __has_single_bit(unsigned(_Afirst::_S_size)))
363 { // simple __vec_concat
364 if constexpr (_Afirst::_S_size == 1)
365 // even simpler init from two values
366 return _Ret{__x0._M_concat_data()[0], __xlast._M_concat_data()[0]};
367 else
368 {
369 const auto __v0 = __x0._M_concat_data();
370 const auto __v1 = __vec_zero_pad_to<sizeof(__v0)>(__xlast._M_concat_data());
371 return __vec_concat(__v0, __v1);
372 }
373 }
374 else if constexpr (__nargs == 2 && _Adst::_S_nreg == 1 && _Offset == 0
375 && _Afirst::_S_nreg == 1 && _Alast::_S_size == 1)
376 { // optimize insertion of one element at the end
377 _Ret __r = __vec_zero_pad_to<sizeof(_Ret)>(__x0._M_get());
378 __vec_set(__r, _Afirst::_S_size, __xlast._M_concat_data()[0]);
379 return __r;
380 }
381 else if constexpr (__nargs == 2 && _Adst::_S_nreg == 1 && _Offset == 0
382 && _Afirst::_S_nreg == 1 && _Alast::_S_size == 2)
383 { // optimize insertion of two elements at the end
384 _Ret __r = __vec_zero_pad_to<sizeof(_Ret)>(__x0._M_concat_data());
385 const auto __x1 = __xlast._M_concat_data();
386 if constexpr (sizeof(__x1) <= sizeof(double) && (_Afirst::_S_size & 1) == 0)
387 { // can use a single insert instruction
388 using _Up = __conditional_t<
389 is_floating_point_v<__vec_value_type<_Ret>>,
390 __conditional_t<sizeof(__x1) == sizeof(double), double, float>,
391 __integer_from<sizeof(__x1)>>;
392 auto __r2 = __vec_bit_cast<_Up>(__r);
393 __vec_set(__r2, _Afirst::_S_size / 2, __vec_bit_cast<_Up>(__x1)[0]);
394 __r = reinterpret_cast<_Ret>(__r2);
395 }
396 else
397 {
398 __vec_set(__r, _Afirst::_S_size, __x1[0]);
399 __vec_set(__r, _Afirst::_S_size + 1, __x1[1]);
400 }
401 return __r;
402 }
403 else if constexpr (__nargs == 2 && _Afirst::_S_nreg == 1 && _Alast::_S_nreg == 1)
404 { // optimize concat of two input vectors (e.g. using palignr)
405 constexpr auto [...__is] = _IotaArray<__dst_full_size>;
406 constexpr int __v2_offset = __width_of<decltype(__x0._M_concat_data())>;
407 return __builtin_shufflevector(
408 __x0._M_concat_data(), __xlast._M_concat_data(), [](int __i) consteval {
409 if (__i < _Afirst::_S_size)
410 return __i;
411 __i -= _Afirst::_S_size;
412 if (__i < _Alast::_S_size)
413 return __i + __v2_offset;
414 else
415 return -1;
416 }(__is + _Offset.value)...);
417 }
418 else if (__is_const_known(__xs...) || __ninputs == _Adst::_S_size)
419 { // hard to optimize for the compiler, but necessary in constant expressions
420 return _VecOps<_Ret>::_S_extract(
421 __vec_concat_sized<__xs.size.value...>(__xs._M_concat_data(false)...),
422 _Offset);
423 }
424 else
425 { // fallback to concatenation in memory => load the result
426 alignas(_Ret) __vec_value_type<_Ret>
427 __tmp[std::max(__ninputs, _Offset.value + __dst_full_size)] = {};
428 int __offset = 0;
429 template for (const auto& __x : {__xs...})
430 {
431 if constexpr (__simd_mask_type<_Dst>)
432 (-__x)._M_store(__tmp + __offset);
433 else
434 __x._M_store(__tmp + __offset);
435 __offset += __x.size.value;
436 }
437 _Ret __r;
438 __builtin_memcpy(&__r, __tmp + _Offset.value, sizeof(_Ret));
439 return __r;
440 }
441 }
442 }
443
444 // [simd.mask] --------------------------------------------------------------
445 template <size_t _Bytes, typename _Ap>
446 class basic_mask
447 {
448 public:
449 using value_type = bool;
450
451 using abi_type = _Ap;
452
453#define _GLIBCXX_DELETE_SIMD "This specialization is disabled because of an invalid combination " \
454 "of template arguments to basic_mask."
455
456 basic_mask() = delete(_GLIBCXX_DELETE_SIMD);
457
458 ~basic_mask() = delete(_GLIBCXX_DELETE_SIMD);
459
460 basic_mask(const basic_mask&) = delete(_GLIBCXX_DELETE_SIMD);
461
462 basic_mask& operator=(const basic_mask&) = delete(_GLIBCXX_DELETE_SIMD);
463
464#undef _GLIBCXX_DELETE_SIMD
465 };
466
467 template <size_t _Bytes, typename _Ap>
468 class _MaskBase
469 {
470 using _Mp = basic_mask<_Bytes, _Ap>;
471
472 protected:
473 using _VecType = __simd_vec_from_mask_t<_Bytes, _Ap>;
474
475 static_assert(destructible<_VecType> || _Bytes > sizeof(0ull));
476
477 public:
478 using iterator = __iterator<_Mp>;
479
480 using const_iterator = __iterator<const _Mp>;
481
482 constexpr iterator
483 begin() noexcept
484 { return {static_cast<_Mp&>(*this), 0}; }
485
486 constexpr const_iterator
487 begin() const noexcept
488 { return cbegin(); }
489
490 constexpr const_iterator
491 cbegin() const noexcept
492 { return {static_cast<const _Mp&>(*this), 0}; }
493
494 constexpr default_sentinel_t
495 end() const noexcept
496 { return {}; }
497
498 constexpr default_sentinel_t
499 cend() const noexcept
500 { return {}; }
501
502 static constexpr auto size = __simd_size_c<_Ap::_S_size>;
503
504 _MaskBase() = default;
505
506 // LWG issue from 2026-03-04 / P4042R0
507 template <size_t _UBytes, typename _UAbi>
508 requires (_Ap::_S_size != _UAbi::_S_size)
509 explicit
510 _MaskBase(const basic_mask<_UBytes, _UAbi>&) = delete("size mismatch");
511
512 template <typename _Up, typename _UAbi>
513 explicit
514 _MaskBase(const basic_vec<_Up, _UAbi>&)
515 = delete("use operator! or a comparison to convert a vec into a mask");
516
517 template <typename _Up, typename _UAbi>
518 requires (_Ap::_S_size != _UAbi::_S_size)
519 operator basic_vec<_Up, _UAbi>() const
520 = delete("size mismatch");
521 };
522
523 template <size_t _Bytes, __abi_tag _Ap>
524 requires (_Ap::_S_nreg == 1)
525 class basic_mask<_Bytes, _Ap>
526 : public _MaskBase<_Bytes, _Ap>
527 {
528 using _Base = _MaskBase<_Bytes, _Ap>;
529
530 using _VecType = _Base::_VecType;
531
532 template <size_t, typename>
533 friend class basic_mask;
534
535 template <typename, typename>
536 friend class basic_vec;
537
538 static constexpr int _S_size = _Ap::_S_size;
539
540 using _DataType = typename _Ap::template _MaskDataType<_Bytes>;
541
542 static constexpr bool _S_has_bool_member = is_same_v<_DataType, bool>;
543
544 static constexpr bool _S_is_scalar = _S_has_bool_member;
545
546 static constexpr bool _S_use_bitmask = _Ap::_S_is_bitmask;
547
548 static constexpr int _S_full_size = [] {
549 if constexpr (_S_is_scalar)
550 return _S_size;
551 else if constexpr (_S_use_bitmask && _S_size < __CHAR_BIT__)
552 return __CHAR_BIT__;
553 else
554 return __bit_ceil(unsigned(_S_size));
555 }();
556
557 static constexpr bool _S_is_partial = _S_size != _S_full_size;
558
559 static constexpr _DataType _S_implicit_mask = [] {
560 if constexpr (_S_is_scalar)
561 return true;
562 else if (!_S_is_partial)
563 return _DataType(~_DataType());
564 else if constexpr (_S_use_bitmask)
565 return _DataType((_DataType(1) << _S_size) - 1);
566 else
567 {
568 constexpr auto [...__is] = _IotaArray<_S_full_size>;
569 return _DataType{ (__is < _S_size ? -1 : 0)... };
570 }
571 }();
572
573 // Actual padding bytes, not padding elements.
574 // => _S_padding_bytes is 0 even if _S_is_partial is true.
575 static constexpr size_t _S_padding_bytes = 0;
576
577 _DataType _M_data;
578
579 public:
580 using value_type = bool;
581
582 using abi_type = _Ap;
583
584 using iterator = _Base::iterator;
585
586 using const_iterator = _Base::const_iterator;
587
588 // internal but public API ----------------------------------------------
589 [[__gnu__::__always_inline__]]
590 static constexpr basic_mask
591 _S_init(_DataType __x)
592 {
593 basic_mask __r;
594 __r._M_data = __x;
595 return __r;
596 }
597
598 [[__gnu__::__always_inline__]]
599 static constexpr basic_mask
600 _S_init(unsigned_integral auto __bits)
601 { return basic_mask(__bits); }
602
603 [[__gnu__::__always_inline__]]
604 constexpr const _DataType&
605 _M_get() const
606 { return _M_data; }
607
608 /** @internal
609 * Bit-cast the given object @p __x to basic_mask.
610 *
611 * This is necessary for _S_nreg > 1 where the last element can be bool or when the sizeof
612 * doesn't match because of different alignment requirements of the sub-masks.
613 */
614 template <size_t _UBytes, typename _UAbi>
615 [[__gnu__::__always_inline__]]
616 static constexpr basic_mask
617 _S_recursive_bit_cast(const basic_mask<_UBytes, _UAbi>& __x)
618 { return __builtin_bit_cast(basic_mask, __x._M_concat_data()); }
619
620 [[__gnu__::__always_inline__]]
621 constexpr auto
622 _M_concat_data(bool __do_sanitize = _S_is_partial) const
623 {
624 if constexpr (_S_is_scalar)
625 return __vec_builtin_type<__integer_from<_Bytes>, 1>{__integer_from<_Bytes>(-_M_data)};
626 else
627 {
628 if constexpr (_S_is_partial)
629 if (__do_sanitize)
630 return _DataType(_M_data & _S_implicit_mask);
631 return _M_data;
632 }
633 }
634
635 /** @internal
636 * Returns a mask where the first @p __n elements are true. All remaining elements are false.
637 *
638 * @pre @p __n > 0 && @p __n < _S_size
639 */
640 template <_ArchTraits _Traits = {}>
641 [[__gnu__::__always_inline__]]
642 static constexpr basic_mask
643 _S_partial_mask_of_n(int __n)
644 {
645 static_assert(!_S_is_scalar);
646 if constexpr (!_S_use_bitmask)
647 {
648 using _Ip = __integer_from<_Bytes>;
649 __glibcxx_simd_precondition(__n >= 0 && __n <= numeric_limits<_Ip>::max(),
650 "_S_partial_mask_of_n without _S_use_bitmask requires "
651 "positive __n that does not overflow.");
652 constexpr _DataType __0123
653 = __builtin_bit_cast(_DataType, _IotaArray<_Ip(_S_full_size)>);
654 return basic_mask(__0123 < _Ip(__n));
655 }
656 else
657 {
658 __glibcxx_simd_precondition(__n >= 0 && __n <= 255,
659 "The x86 BZHI instruction requires __n to "
660 "only use bits 0:7");
661#if __has_builtin(__builtin_ia32_bzhi_si)
662 if constexpr (_S_size <= 32 && _Traits._M_have_bmi2())
663 return _S_init(_Bitmask<_S_size>(
664 __builtin_ia32_bzhi_si(~0u >> (32 - _S_size), unsigned(__n))));
665#endif
666#if __has_builtin(__builtin_ia32_bzhi_di)
667 else if constexpr (_S_size <= 64 && _Traits._M_have_bmi2())
668 return _S_init(__builtin_ia32_bzhi_di(~0ull >> (64 - _S_size), unsigned(__n)));
669#endif
670 if constexpr (_S_size <= 32)
671 {
672 __glibcxx_simd_precondition(__n < 32, "invalid shift");
673 return _S_init(_Bitmask<_S_size>((1u << unsigned(__n)) - 1));
674 }
675 else if constexpr (_S_size <= 64)
676 {
677 __glibcxx_simd_precondition(__n < 64, "invalid shift");
678 return _S_init((1ull << unsigned(__n)) - 1);
679 }
680 else
681 static_assert(false);
682 }
683 }
684
685 [[__gnu__::__always_inline__]]
686 constexpr basic_mask&
687 _M_and_neighbors()
688 {
689 if constexpr (_S_use_bitmask)
690 _M_data &= ((_M_data >> 1) & 0x5555'5555'5555'5555ull)
691 | ((_M_data << 1) & ~0x5555'5555'5555'5555ull);
692 else
693 _M_data &= _VecOps<_DataType>::_S_swap_neighbors(_M_data);
694 return *this;
695 }
696
697 [[__gnu__::__always_inline__]]
698 constexpr basic_mask&
699 _M_or_neighbors()
700 {
701 if constexpr (_S_use_bitmask)
702 _M_data |= ((_M_data >> 1) & 0x5555'5555'5555'5555ull)
703 | ((_M_data << 1) & ~0x5555'5555'5555'5555ull);
704 else
705 _M_data |= _VecOps<_DataType>::_S_swap_neighbors(_M_data);
706 return *this;
707 }
708
709 template <typename _Mp>
710 [[__gnu__::__always_inline__]]
711 constexpr auto _M_chunk() const noexcept
712 {
713 constexpr int __n = _S_size / _Mp::_S_size;
714 constexpr int __rem = _S_size % _Mp::_S_size;
715 constexpr auto [...__is] = _IotaArray<__n>;
716 if constexpr (__rem == 0)
717 return array<_Mp, __n>{__extract_simd_at<_Mp>(cw<_Mp::_S_size * __is>, *this)...};
718 else
719 {
720 using _Rest = resize_t<__rem, _Mp>;
721 return tuple(__extract_simd_at<_Mp>(cw<_Mp::_S_size * __is>, *this)...,
722 __extract_simd_at<_Rest>(cw<_Mp::_S_size * __n>, *this));
723 }
724 }
725
726 [[__gnu__::__always_inline__]]
727 static constexpr const basic_mask&
728 _S_concat(const basic_mask& __x0) noexcept
729 { return __x0; }
730
731 template <typename... _As>
732 requires (sizeof...(_As) > 1)
733 [[__gnu__::__always_inline__]]
734 static constexpr basic_mask
735 _S_concat(const basic_mask<_Bytes, _As>&... __xs) noexcept
736 {
737 static_assert(_S_size == (_As::_S_size + ...));
738 return __extract_simd_at<basic_mask>(cw<0>, __xs...);
739 }
740
741 // [simd.mask.overview] default constructor -----------------------------
742 basic_mask() = default;
743
744 // [simd.mask.overview] conversion extensions ---------------------------
745 [[__gnu__::__always_inline__]]
746 constexpr
747 basic_mask(_DataType __x) requires(!_S_is_scalar && !_S_use_bitmask)
748 : _M_data(__x)
749 {}
750
751 [[__gnu__::__always_inline__]]
752 constexpr
753 operator _DataType() requires(!_S_is_scalar && !_S_use_bitmask)
754 { return _M_data; }
755
756 // [simd.mask.ctor] broadcast constructor -------------------------------
757 [[__gnu__::__always_inline__]]
758 constexpr explicit
759 basic_mask(same_as<bool> auto __x) noexcept // LWG 4382.
760 : _M_data(__x ? _S_implicit_mask : _DataType())
761 {}
762
763 // [simd.mask.ctor] conversion constructor ------------------------------
764 template <size_t _UBytes, typename _UAbi>
765 requires (_S_size == _UAbi::_S_size)
766 [[__gnu__::__always_inline__]]
767 constexpr explicit(__is_mask_conversion_explicit<_Ap, _UAbi>(_Bytes, _UBytes))
768 basic_mask(const basic_mask<_UBytes, _UAbi>& __x) noexcept
769 : _M_data([&] [[__gnu__::__always_inline__]] {
770 using _UV = basic_mask<_UBytes, _UAbi>;
771 // bool to bool
772 if constexpr (_S_is_scalar)
773 return __x[0];
774
775 // converting from an "array of bool"
776 else if constexpr (_UV::_S_is_scalar)
777 {
778 constexpr auto [...__is] = _IotaArray<_S_size>;
779 if constexpr (_S_use_bitmask)
780 return ((_DataType(__x[__is]) << __is) | ...);
781 else
782 return _DataType{__vec_value_type<_DataType>(-__x[__is])...};
783 }
784
785 // vec-/bit-mask to bit-mask | bit-mask to vec-mask
786 else if constexpr (_S_use_bitmask || _UV::_S_use_bitmask)
787 return basic_mask(__x.to_bitset())._M_data;
788
789 // vec-mask to vec-mask
790 else if constexpr (_Bytes == _UBytes)
791 return _S_recursive_bit_cast(__x)._M_data;
792
793 else
794 {
795#if _GLIBCXX_X86
796 // TODO: turn this into a __vec_mask_cast overload in simd_x86.h
797 if constexpr (_Bytes == 1 && _UBytes == 2)
798 if (!__is_const_known(__x))
799 {
800 if constexpr (_UAbi::_S_nreg == 1)
801 return __x86_cvt_vecmask<_DataType>(__x._M_data);
802 else if constexpr (_UAbi::_S_nreg == 2)
803 {
804 auto __lo = __x._M_data0._M_data;
805 auto __hi = __vec_zero_pad_to<sizeof(__lo)>(
806 __x._M_data1._M_concat_data());
807 return __x86_cvt_vecmask<_DataType>(__lo, __hi);
808 }
809 }
810#endif
811 return __vec_mask_cast<_DataType>(__x._M_concat_data());
812 }
813 }())
814 {}
815
816 using _Base::_MaskBase;
817
818 // [simd.mask.ctor] generator constructor -------------------------------
819 template <__simd_generator_invokable<bool, _S_size> _Fp>
820 [[__gnu__::__always_inline__]]
821 constexpr explicit
822 basic_mask(_Fp&& __gen)
823 : _M_data([&] [[__gnu__::__always_inline__]] {
824 constexpr auto [...__is] = _IotaArray<_S_size>;
825 if constexpr (_S_is_scalar)
826 return __gen(__simd_size_c<0>);
827 else if constexpr (_S_use_bitmask)
828 return _DataType(((_DataType(__gen(__simd_size_c<__is>)) << __is)
829 | ...));
830 else
831 return _DataType{__vec_value_type<_DataType>(
832 __gen(__simd_size_c<__is>) ? -1 : 0)...};
833 }())
834 {}
835
836 // [simd.mask.ctor] bitset constructor ----------------------------------
837 [[__gnu__::__always_inline__]]
838 constexpr
839 basic_mask(const same_as<bitset<_S_size>> auto& __b) noexcept // LWG 4382.
840 : basic_mask(static_cast<_Bitmask<_S_size>>(__b.to_ullong()))
841 {
842 // more than 64 elements in one register? not yet.
843 static_assert(_S_size <= numeric_limits<unsigned long long>::digits);
844 }
845
846 // [simd.mask.ctor] uint constructor ------------------------------------
847 template <unsigned_integral _Tp>
848 requires (!same_as<_Tp, bool>) // LWG 4382.
849 [[__gnu__::__always_inline__]]
850 constexpr explicit
851 basic_mask(_Tp __val) noexcept
852 : _M_data([&] [[__gnu__::__always_inline__]] () {
853 if constexpr (_S_use_bitmask)
854 return __val;
855 else if constexpr (_S_is_scalar)
856 return bool(__val & 1);
857 else if (__is_const_known(__val))
858 {
859 constexpr auto [...__is] = _IotaArray<_S_size>;
860 return _DataType {__vec_value_type<_DataType>((__val & (1ull << __is)) == 0
861 ? 0 : -1)...};
862 }
863 else
864 {
865 using _Ip = typename _VecType::value_type;
866 _VecType __v0 = _Ip(__val);
867 constexpr int __bits_per_element = sizeof(_Ip) * __CHAR_BIT__;
868 constexpr _VecType __pow2 = _VecType(cw<1>)
869 << (__iota<_VecType> % cw<__bits_per_element>);
870 if constexpr (_S_size < __bits_per_element)
871 return ((__v0 & __pow2) > cw<0>)._M_concat_data();
872 else if constexpr (_S_size == __bits_per_element)
873 return ((__v0 & __pow2) != cw<0>)._M_concat_data();
874 else
875 {
876 static_assert(_Bytes == 1);
877 static_assert(sizeof(_Ip) == 1);
878 _Bitmask<_S_size> __bits = __val;
879 static_assert(sizeof(_VecType) % sizeof(__bits) == 0);
880 if constexpr (sizeof(_DataType) == 32)
881 {
882 __vec_builtin_type<_UInt<8>, 4> __v1 = {
883 0xffu & (__bits >> (0 * __CHAR_BIT__)),
884 0xffu & (__bits >> (1 * __CHAR_BIT__)),
885 0xffu & (__bits >> (2 * __CHAR_BIT__)),
886 0xffu & (__bits >> (3 * __CHAR_BIT__)),
887 };
888 __v1 *= 0x0101'0101'0101'0101ull;
889 __v0 = __builtin_bit_cast(_VecType, __v1);
890 return ((__v0 & __pow2) != cw<0>)._M_data;
891 }
892 else
893 {
894 using _V1 = vec<_Ip, sizeof(__bits)>;
895 _V1 __v1 = __builtin_bit_cast(_V1, __bits);
896 __v0 = _VecType::_S_static_permute(__v1, [](int __i) {
897 return __i / __CHAR_BIT__;
898 });
899 return ((__v0 & __pow2) != cw<0>)._M_data;
900 }
901 }
902 }
903 }())
904 {}
905
906 //Effects: Initializes the first M elements to the corresponding bit values in val, where M is
907 //the smaller of size() and the number of bits in the value representation
908 //([basic.types.general]) of the type of val. If M is less than size(), the remaining elements
909 //are initialized to zero.
910
911
912 // [simd.mask.subscr] ---------------------------------------------------
913 [[__gnu__::__always_inline__]]
914 constexpr value_type
915 operator[](__simd_size_type __i) const
916 {
917 __glibcxx_simd_precondition(__i >= 0 && __i < _S_size, "subscript is out of bounds");
918 if constexpr (_S_is_scalar)
919 return _M_data;
920 else if constexpr (_S_use_bitmask)
921 return bool((_M_data >> __i) & 1);
922 else
923 return _M_data[__i] & 1;
924 }
925
926 // [simd.mask.unary] ----------------------------------------------------
927 [[__gnu__::__always_inline__]]
928 constexpr basic_mask
929 operator!() const noexcept
930 {
931 if constexpr (_S_is_scalar)
932 return _S_init(!_M_data);
933 else
934 return _S_init(~_M_data);
935 }
936
937 [[__gnu__::__always_inline__]]
938 constexpr _VecType
939 operator+() const noexcept requires destructible<_VecType>
940 { return operator _VecType(); }
941
942 constexpr _VecType
943 operator+() const noexcept = delete;
944
945 [[__gnu__::__always_inline__]]
946 constexpr _VecType
947 operator-() const noexcept requires destructible<_VecType>
948 {
949 using _Ip = typename _VecType::value_type;
950 if constexpr (_S_is_scalar)
951 return _Ip(-int(_M_data));
952 else if constexpr (_S_use_bitmask)
953 return __select_impl(*this, _Ip(-1), _Ip());
954 else
955 {
956 static_assert(sizeof(_VecType) == sizeof(_M_data));
957 return __builtin_bit_cast(_VecType, _M_data);
958 }
959 }
960
961 constexpr _VecType
962 operator-() const noexcept = delete;
963
964 [[__gnu__::__always_inline__]]
965 constexpr _VecType
966 operator~() const noexcept requires destructible<_VecType>
967 {
968 using _Ip = typename _VecType::value_type;
969 if constexpr (_S_is_scalar)
970 return _Ip(~int(_M_data));
971 else if constexpr (_S_use_bitmask)
972 return __select_impl(*this, _Ip(-2), _Ip(-1));
973 else
974 {
975 static_assert(sizeof(_VecType) == sizeof(_M_data));
976 return __builtin_bit_cast(_VecType, _M_data) - _Ip(1);
977 }
978 }
979
980 constexpr _VecType
981 operator~() const noexcept = delete;
982
983 // [simd.mask.conv] -----------------------------------------------------
984 template <typename _Up, typename _UAbi>
985 requires (_UAbi::_S_size == _S_size)
986 [[__gnu__::__always_inline__]]
987 constexpr explicit(sizeof(_Up) != _Bytes)
988 operator basic_vec<_Up, _UAbi>() const noexcept
989 {
990 if constexpr (_S_is_scalar)
991 return _Up(_M_data);
992 else
993 {
994 using _UV = basic_vec<_Up, _UAbi>;
995 return __select_impl(static_cast<_UV::mask_type>(*this), _Up(1), _UV());
996 }
997 }
998
999 using _Base::operator basic_vec;
1000
1001 // [simd.mask.namedconv] ------------------------------------------------
1002 [[__gnu__::__always_inline__]]
1003 constexpr bitset<_S_size>
1004 to_bitset() const noexcept
1005 {
1006 // more than 64 elements in one register? not yet.
1007 static_assert(_S_size <= numeric_limits<unsigned long long>::digits);
1008 return to_ullong();
1009 }
1010
1011 /** @internal
1012 * Return the mask as the smallest possible unsigned integer (up to 64 bits).
1013 *
1014 * @tparam _Offset Adjust the return type & value to start at bit @p _Offset.
1015 * @tparam _Use_2_for_1 Store the value of every second element into one bit of the result.
1016 * (precondition: each even/odd pair stores the same value)
1017 */
1018 template <int _Offset = 0, _ArchTraits _Traits = {}>
1019 [[__gnu__::__always_inline__]]
1020 constexpr _Bitmask<_S_size + _Offset>
1021 _M_to_uint() const
1022 {
1023 constexpr int __nbits = _S_size;
1024 static_assert(__nbits + _Offset <= numeric_limits<unsigned long long>::digits);
1025 // before shifting
1026 using _U0 = _Bitmask<__nbits>;
1027 // potentially wider type needed for shift by _Offset
1028 using _Ur = _Bitmask<__nbits + _Offset>;
1029 if constexpr (_S_is_scalar || _S_use_bitmask)
1030 {
1031 auto __bits = _M_data;
1032 if constexpr (_S_is_partial)
1033 __bits &= _S_implicit_mask;
1034 return _Ur(__bits) << _Offset;
1035 }
1036 else
1037 {
1038#if _GLIBCXX_X86
1039 if (!__is_const_known(*this))
1040 {
1041 _U0 __uint;
1042 if constexpr (_Bytes != 2) // movmskb would duplicate each bit
1043 __uint = _U0(__x86_movmsk(_M_data));
1044 else if constexpr (_Bytes == 2 && _Traits._M_have_bmi2())
1045 __uint = __bit_extract_even<__nbits>(__x86_movmsk(_M_data));
1046 else if constexpr (_Bytes == 2)
1047 return __similar_mask<char, __nbits, _Ap>(*this).template _M_to_uint<_Offset>();
1048 else
1049 static_assert(false);
1050 // TODO: with AVX512 use __builtin_ia32_cvt[bwdq]2mask(128|256|512)
1051 // TODO: Ask for compiler builtin to do the best of the above. This should also
1052 // combine with a preceding vector-mask compare to produce a bit-mask compare (on
1053 // AVX512)
1054 if constexpr (_S_is_partial)
1055 __uint &= (_U0(1) << _S_size) - 1;
1056 return _Ur(__uint) << _Offset;
1057 }
1058#endif
1059 using _IV = _VecType;
1060 static_assert(destructible<_IV>);
1061 const typename _IV::mask_type& __k = [&] [[__gnu__::__always_inline__]] () {
1062 if constexpr (is_same_v<typename _IV::mask_type, basic_mask>)
1063 return *this;
1064 else
1065 return typename _IV::mask_type(*this);
1066 }();
1067 constexpr int __n = _IV::size();
1068 if constexpr (_Bytes * __CHAR_BIT__ >= __n) // '1 << __iota' cannot overflow
1069 { // reduce(select(k, powers_of_2, 0))
1070 constexpr _IV __pow2 = _IV(cw<1>) << __iota<_IV>;
1071 return _Ur(_U0(__select_impl(__k, __pow2, _IV())
1072 ._M_reduce(bit_or<>()))) << _Offset;
1073 }
1074 else if constexpr (__n % __CHAR_BIT__ != 0)
1075 { // recurse after splitting in two
1076 constexpr int __n_lo = __n - __n % __CHAR_BIT__;
1077 const auto [__lo, __hi] = chunk<__n_lo>(__k);
1078 _Ur __bits = __hi.template _M_to_uint<_Offset + __n_lo>();
1079 return __bits | __lo.template _M_to_uint<_Offset>();
1080 }
1081 else
1082 { // limit powers_of_2 to 1, 2, 4, ..., 128
1083 constexpr _IV __pow2 = _IV(cw<1>) << (__iota<_IV> % _IV(cw<__CHAR_BIT__>));
1084 _IV __x = __select_impl(__k, __pow2, _IV());
1085 // partial reductions of 8 neighboring elements
1086 __x |= _IV::_S_static_permute(__x, _SwapNeighbors<4>());
1087 __x |= _IV::_S_static_permute(__x, _SwapNeighbors<2>());
1088 __x |= _IV::_S_static_permute(__x, _SwapNeighbors<1>());
1089 // permute partial reduction results to the front
1090 __x = _IV::_S_static_permute(__x, [](int __i) {
1091 return __i * 8 < __n ? __i * 8 : uninit_element;
1092 });
1093 // extract front as scalar unsigned
1094 _U0 __bits = __builtin_bit_cast(
1095 __similar_vec<_U0, __n * _Bytes / sizeof(_U0), _Ap>, __x)[0];
1096 // mask off unused bits
1097 if constexpr (!__has_single_bit(unsigned(__nbits)))
1098 __bits &= (_U0(1) << __nbits) - 1;
1099 return _Ur(__bits) << _Offset;
1100 }
1101 }
1102 }
1103
1104 [[__gnu__::__always_inline__]]
1105 constexpr unsigned long long
1106 to_ullong() const
1107 { return _M_to_uint(); }
1108
1109 // [simd.mask.binary] ---------------------------------------------------
1110 [[__gnu__::__always_inline__]]
1111 friend constexpr basic_mask
1112 operator&&(const basic_mask& __x, const basic_mask& __y) noexcept
1113 { return _S_init(__x._M_data & __y._M_data); }
1114
1115 [[__gnu__::__always_inline__]]
1116 friend constexpr basic_mask
1117 operator||(const basic_mask& __x, const basic_mask& __y) noexcept
1118 { return _S_init(__x._M_data | __y._M_data); }
1119
1120 [[__gnu__::__always_inline__]]
1121 friend constexpr basic_mask
1122 operator&(const basic_mask& __x, const basic_mask& __y) noexcept
1123 { return _S_init(__x._M_data & __y._M_data); }
1124
1125 [[__gnu__::__always_inline__]]
1126 friend constexpr basic_mask
1127 operator|(const basic_mask& __x, const basic_mask& __y) noexcept
1128 { return _S_init(__x._M_data | __y._M_data); }
1129
1130 [[__gnu__::__always_inline__]]
1131 friend constexpr basic_mask
1132 operator^(const basic_mask& __x, const basic_mask& __y) noexcept
1133 { return _S_init(__x._M_data ^ __y._M_data); }
1134
1135 // [simd.mask.cassign] --------------------------------------------------
1136 [[__gnu__::__always_inline__]]
1137 friend constexpr basic_mask&
1138 operator&=(basic_mask& __x, const basic_mask& __y) noexcept
1139 {
1140 __x._M_data &= __y._M_data;
1141 return __x;
1142 }
1143
1144 [[__gnu__::__always_inline__]]
1145 friend constexpr basic_mask&
1146 operator|=(basic_mask& __x, const basic_mask& __y) noexcept
1147 {
1148 __x._M_data |= __y._M_data;
1149 return __x;
1150 }
1151
1152 [[__gnu__::__always_inline__]]
1153 friend constexpr basic_mask&
1154 operator^=(basic_mask& __x, const basic_mask& __y) noexcept
1155 {
1156 __x._M_data ^= __y._M_data;
1157 return __x;
1158 }
1159
1160 // [simd.mask.comparison] -----------------------------------------------
1161 [[__gnu__::__always_inline__]]
1162 friend constexpr basic_mask
1163 operator==(const basic_mask& __x, const basic_mask& __y) noexcept
1164 { return !(__x ^ __y); }
1165
1166 [[__gnu__::__always_inline__]]
1167 friend constexpr basic_mask
1168 operator!=(const basic_mask& __x, const basic_mask& __y) noexcept
1169 { return __x ^ __y; }
1170
1171 [[__gnu__::__always_inline__]]
1172 friend constexpr basic_mask
1173 operator>=(const basic_mask& __x, const basic_mask& __y) noexcept
1174 { return __x || !__y; }
1175
1176 [[__gnu__::__always_inline__]]
1177 friend constexpr basic_mask
1178 operator<=(const basic_mask& __x, const basic_mask& __y) noexcept
1179 { return !__x || __y; }
1180
1181 [[__gnu__::__always_inline__]]
1182 friend constexpr basic_mask
1183 operator>(const basic_mask& __x, const basic_mask& __y) noexcept
1184 { return __x && !__y; }
1185
1186 [[__gnu__::__always_inline__]]
1187 friend constexpr basic_mask
1188 operator<(const basic_mask& __x, const basic_mask& __y) noexcept
1189 { return !__x && __y; }
1190
1191 // [simd.mask.cond] -----------------------------------------------------
1192 [[__gnu__::__always_inline__]]
1193 friend constexpr basic_mask
1194 __select_impl(const basic_mask& __k, const basic_mask& __t, const basic_mask& __f) noexcept
1195 {
1196 if constexpr (!_S_use_bitmask)
1197 {
1198#if _GLIBCXX_X86
1199 // this works around bad code-gen when the compiler can't see that __k is a vector-mask.
1200 // This pattern, is recognized to match the x86 blend instructions, which only consider
1201 // the sign bit of the mask register. Also, without SSE4, if the compiler knows that __k
1202 // is a vector-mask, then the '< 0' is elided.
1203 return __k._M_data < 0 ? __t._M_data : __f._M_data;
1204#endif
1205 return __k._M_data ? __t._M_data : __f._M_data;
1206 }
1207 else
1208 return (__k._M_data & __t._M_data) | (~__k._M_data & __f._M_data);
1209 }
1210
1211 [[__gnu__::__always_inline__]]
1212 friend constexpr basic_mask
1213 __select_impl(const basic_mask& __k, same_as<bool> auto __t, same_as<bool> auto __f) noexcept
1214 {
1215 if (__t == __f)
1216 return basic_mask(__t);
1217 else
1218 return __t ? __k : !__k;
1219 }
1220
1221 template <__vectorizable _T0, same_as<_T0> _T1>
1222 requires (sizeof(_T0) == _Bytes)
1223 [[__gnu__::__always_inline__]]
1224 friend constexpr vec<_T0, _S_size>
1225 __select_impl(const basic_mask& __k, const _T0& __t, const _T1& __f) noexcept
1226 {
1227 if constexpr (_S_is_scalar)
1228 return __k._M_data ? __t : __f;
1229 else
1230 {
1231 using _Vp = vec<_T0, _S_size>;
1232 using _Mp = typename _Vp::mask_type;
1233 return __select_impl(_Mp(__k), _Vp(__t), _Vp(__f));
1234 }
1235 }
1236
1237 // [simd.mask.reductions] implementation --------------------------------
1238 [[__gnu__::__always_inline__]]
1239 constexpr bool
1240 _M_all_of() const noexcept
1241 {
1242 if constexpr (_S_is_scalar)
1243 return _M_data;
1244 else if constexpr (_S_use_bitmask)
1245 {
1246 if constexpr (_S_is_partial)
1247 // PR120925 (partial kortest pattern not recognized)
1248 return (_M_data & _S_implicit_mask) == _S_implicit_mask;
1249 else
1250 return _M_data == _S_implicit_mask;
1251 }
1252#if _GLIBCXX_X86
1253 else if (!__is_const_known(_M_data))
1254 return __x86_vecmask_all<_S_size>(_M_data);
1255#endif
1256 else
1257 return _VecOps<_DataType, _S_size>::_S_all_of(_M_data);
1258 }
1259
1260 [[__gnu__::__always_inline__]]
1261 constexpr bool
1262 _M_any_of() const noexcept
1263 {
1264 if constexpr (_S_is_scalar)
1265 return _M_data;
1266 else if constexpr (_S_use_bitmask)
1267 {
1268 if constexpr (_S_is_partial)
1269 // PR120925 (partial kortest pattern not recognized)
1270 return (_M_data & _S_implicit_mask) != 0;
1271 else
1272 return _M_data != 0;
1273 }
1274#if _GLIBCXX_X86
1275 else if (!__is_const_known(_M_data))
1276 return __x86_vecmask_any<_S_size>(_M_data);
1277#endif
1278 else
1279 return _VecOps<_DataType, _S_size>::_S_any_of(_M_data);
1280 }
1281
1282 [[__gnu__::__always_inline__]]
1283 constexpr bool
1284 _M_none_of() const noexcept
1285 {
1286 if constexpr (_S_is_scalar)
1287 return !_M_data;
1288 else if constexpr (_S_use_bitmask)
1289 {
1290 if constexpr (_S_is_partial)
1291 // PR120925 (partial kortest pattern not recognized)
1292 return (_M_data & _S_implicit_mask) == 0;
1293 else
1294 return _M_data == 0;
1295 }
1296#if _GLIBCXX_X86
1297 else if (!__is_const_known(_M_data))
1298 return __x86_vecmask_none<_S_size>(_M_data);
1299#endif
1300 else
1301 return _VecOps<_DataType, _S_size>::_S_none_of(_M_data);
1302 }
1303
1304 [[__gnu__::__always_inline__]]
1305 constexpr __simd_size_type
1306 _M_reduce_count() const noexcept
1307 {
1308 if constexpr (_S_is_scalar)
1309 return int(_M_data);
1310 else if constexpr (_S_size <= numeric_limits<unsigned>::digits)
1311 return __builtin_popcount(_M_to_uint());
1312 else
1313 return __builtin_popcountll(to_ullong());
1314 }
1315
1316 [[__gnu__::__always_inline__]]
1317 constexpr __simd_size_type
1318 _M_reduce_min_index() const
1319 {
1320 const auto __bits = _M_to_uint();
1321 __glibcxx_simd_precondition(__bits, "An empty mask does not have a min_index.");
1322 if constexpr (_S_size == 1)
1323 return 0;
1324 else
1325 return __countr_zero(__bits);
1326 }
1327
1328 [[__gnu__::__always_inline__]]
1329 constexpr __simd_size_type
1330 _M_reduce_max_index() const
1331 {
1332 const auto __bits = _M_to_uint();
1333 __glibcxx_simd_precondition(__bits, "An empty mask does not have a max_index.");
1334 if constexpr (_S_size == 1)
1335 return 0;
1336 else
1337 return __highest_bit(__bits);
1338 }
1339
1340 [[__gnu__::__always_inline__]]
1341 friend constexpr bool
1342 __is_const_known(const basic_mask& __x)
1343 { return __builtin_constant_p(__x._M_data); }
1344 };
1345
1346 template <size_t _Bytes, __abi_tag _Ap>
1347 requires (_Ap::_S_nreg > 1)
1348 class basic_mask<_Bytes, _Ap>
1349 : public _MaskBase<_Bytes, _Ap>
1350 {
1351 using _Base = _MaskBase<_Bytes, _Ap>;
1352
1353 using _VecType = _Base::_VecType;
1354
1355 template <size_t, typename>
1356 friend class basic_mask;
1357
1358 template <typename, typename>
1359 friend class basic_vec;
1360
1361 static constexpr int _S_size = _Ap::_S_size;
1362
1363 static constexpr int _N0 = __bit_ceil(unsigned(_S_size)) / 2;
1364
1365 static constexpr int _N1 = _S_size - _N0;
1366
1367 static constexpr int _Nreg0 = __bit_ceil(unsigned(_Ap::_S_nreg)) / 2;
1368
1369 static constexpr int _Nreg1 = _Ap::_S_nreg - _Nreg0;
1370
1371 // explicitly request _Nreg0 rather than use __abi_rebind. This way _Float16 can use half
1372 // of native registers (since they convert to full float32 registers).
1373 using _Abi0 = decltype(_Ap::template _S_resize<_N0, _Nreg0>());
1374
1375 using _Abi1 = decltype(_Ap::template _S_resize<_N1, _Nreg1>());
1376
1377 using _Mask0 = basic_mask<_Bytes, _Abi0>;
1378
1379 // the implementation (and users) depend on elements being contiguous in memory
1380 static_assert(_Mask0::_S_padding_bytes == 0 && !_Mask0::_S_is_partial);
1381
1382 using _Mask1 = basic_mask<_Bytes, _Abi1>;
1383
1384 static constexpr bool _S_is_partial = _Mask1::_S_is_partial;
1385
1386 // _Ap::_S_nreg determines how deep the recursion goes. E.g. basic_mask<4, _Abi<8, 4>> cannot
1387 // use basic_mask<4, _Abi<4, 1>> as _Mask0/1 types.
1388 static_assert(_Mask0::abi_type::_S_nreg + _Mask1::abi_type::_S_nreg == _Ap::_S_nreg);
1389
1390 static constexpr bool _S_use_bitmask = _Mask0::_S_use_bitmask;
1391
1392 static constexpr bool _S_is_scalar = _Mask0::_S_is_scalar;
1393
1394 _Mask0 _M_data0;
1395
1396 _Mask1 _M_data1;
1397
1398 static constexpr bool _S_has_bool_member = _Mask1::_S_has_bool_member;
1399
1400 // by construction _N0 >= _N1
1401 // => sizeof(_Mask0) >= sizeof(_Mask1)
1402 // and __alignof__(_Mask0) >= __alignof__(_Mask1)
1403 static constexpr size_t _S_padding_bytes
1404 = (__alignof__(_Mask0) == __alignof__(_Mask1)
1405 ? 0 : __alignof__(_Mask0) - (sizeof(_Mask1) % __alignof__(_Mask0)))
1406 + _Mask1::_S_padding_bytes;
1407
1408 public:
1409 using value_type = bool;
1410
1411 using abi_type = _Ap;
1412
1413 using iterator = _Base::iterator;
1414
1415 using const_iterator = _Base::const_iterator;
1416
1417 [[__gnu__::__always_inline__]]
1418 static constexpr basic_mask
1419 _S_init(const _Mask0& __x, const _Mask1& __y)
1420 {
1421 basic_mask __r;
1422 __r._M_data0 = __x;
1423 __r._M_data1 = __y;
1424 return __r;
1425 }
1426
1427 [[__gnu__::__always_inline__]]
1428 static constexpr basic_mask
1429 _S_init(unsigned_integral auto __bits)
1430 { return basic_mask(__bits); }
1431
1432 template <typename _U0, typename _U1>
1433 [[__gnu__::__always_inline__]]
1434 static constexpr basic_mask
1435 _S_init(const __trivial_pair<_U0, _U1>& __bits)
1436 {
1437 if constexpr (is_unsigned_v<_U0>)
1438 {
1439 static_assert(is_unsigned_v<_U1>);
1440 return _S_init(_Mask0(__bits._M_first), _Mask1(__bits._M_second));
1441 }
1442 else if constexpr (is_unsigned_v<_U1>)
1443 return _S_init(_Mask0::_S_init(__bits._M_first), _Mask1(__bits._M_second));
1444 else
1445 return _S_init(_Mask0::_S_init(__bits._M_first), _Mask1::_S_init(__bits._M_second));
1446 }
1447
1448 [[__gnu__::__always_inline__]]
1449 constexpr const _Mask0&
1450 _M_get_low() const
1451 { return _M_data0; }
1452
1453 [[__gnu__::__always_inline__]]
1454 constexpr const _Mask1&
1455 _M_get_high() const
1456 { return _M_data1; }
1457
1458 template <size_t _UBytes, typename _UAbi>
1459 [[__gnu__::__always_inline__]]
1460 static constexpr basic_mask
1461 _S_recursive_bit_cast(const basic_mask<_UBytes, _UAbi>& __x)
1462 {
1463 using _Mp = basic_mask<_UBytes, _UAbi>;
1464 if constexpr (_Mp::_S_has_bool_member || sizeof(basic_mask) > sizeof(__x)
1465 || _Mp::_S_padding_bytes != 0)
1466 return _S_init(__builtin_bit_cast(_Mask0, __x._M_data0),
1467 _Mask1::_S_recursive_bit_cast(__x._M_data1));
1468 else if constexpr (sizeof(basic_mask) == sizeof(__x))
1469 return __builtin_bit_cast(basic_mask, __x);
1470 else
1471 { // e.g. on IvyBridge (different alignment => different sizeof)
1472 struct _Tmp { alignas(_Mp) basic_mask _M_data; };
1473 return __builtin_bit_cast(_Tmp, __x)._M_data;
1474 }
1475 }
1476
1477 [[__gnu__::__always_inline__]]
1478 constexpr auto
1479 _M_concat_data(bool __do_sanitize = _S_is_partial) const
1480 {
1481 if constexpr (_S_use_bitmask)
1482 {
1483 static_assert(_S_size <= numeric_limits<unsigned long long>::digits,
1484 "cannot concat more than 64 bits");
1485 using _Up = _Bitmask<_S_size>;
1486 return _Up(_M_data0._M_concat_data() | (_Up(_M_data1._M_concat_data(__do_sanitize)) << _N0));
1487 }
1488 else
1489 {
1490 auto __lo = _M_data0._M_concat_data();
1491 auto __hi = __vec_zero_pad_to<sizeof(__lo)>(_M_data1._M_concat_data(__do_sanitize));
1492 return __vec_concat(__lo, __hi);
1493 }
1494 }
1495
1496 template <_ArchTraits _Traits = {}>
1497 [[__gnu__::__always_inline__]]
1498 static constexpr basic_mask
1499 _S_partial_mask_of_n(int __n)
1500 {
1501#if __has_builtin(__builtin_ia32_bzhi_di)
1502 if constexpr (_S_use_bitmask && _S_size <= 64 && _Traits._M_have_bmi2())
1503 return basic_mask(__builtin_ia32_bzhi_di(~0ull >> (64 - _S_size), unsigned(__n)));
1504#endif
1505 if constexpr (_N0 == 1)
1506 {
1507 static_assert(_S_size == 2); // => __n == 1
1508 return _S_init(_Mask0(true), _Mask1(false));
1509 }
1510 else if (__n < _N0)
1511 return _S_init(_Mask0::_S_partial_mask_of_n(__n), _Mask1(false));
1512 else if (__n == _N0 || _N1 == 1)
1513 return _S_init(_Mask0(true), _Mask1(false));
1514 else if constexpr (_N1 != 1)
1515 return _S_init(_Mask0(true), _Mask1::_S_partial_mask_of_n(__n - _N0));
1516 }
1517
1518 [[__gnu__::__always_inline__]]
1519 constexpr basic_mask&
1520 _M_and_neighbors()
1521 {
1522 _M_data0._M_and_neighbors();
1523 _M_data1._M_and_neighbors();
1524 return *this;
1525 }
1526
1527 [[__gnu__::__always_inline__]]
1528 constexpr basic_mask&
1529 _M_or_neighbors()
1530 {
1531 _M_data0._M_or_neighbors();
1532 _M_data1._M_or_neighbors();
1533 return *this;
1534 }
1535
1536 template <typename _Mp>
1537 [[__gnu__::__always_inline__]]
1538 constexpr auto
1539 _M_chunk() const noexcept
1540 {
1541 constexpr int __n = _S_size / _Mp::_S_size;
1542 constexpr int __rem = _S_size % _Mp::_S_size;
1543 constexpr auto [...__is] = _IotaArray<__n>;
1544 if constexpr (__rem == 0)
1545 return array<_Mp, __n>{__extract_simd_at<_Mp>(cw<_Mp::_S_size * __is>,
1546 _M_data0, _M_data1)...};
1547 else
1548 {
1549 using _Rest = resize_t<__rem, _Mp>;
1550 return tuple(__extract_simd_at<_Mp>(cw<_Mp::_S_size * __is>, _M_data0, _M_data1)...,
1551 __extract_simd_at<_Rest>(cw<_Mp::_S_size * __n>, _M_data0, _M_data1));
1552 }
1553 }
1554
1555 [[__gnu__::__always_inline__]]
1556 static constexpr basic_mask
1557 _S_concat(const basic_mask& __x0) noexcept
1558 { return __x0; }
1559
1560 template <typename... _As>
1561 requires (sizeof...(_As) >= 2)
1562 [[__gnu__::__always_inline__]]
1563 static constexpr basic_mask
1564 _S_concat(const basic_mask<_Bytes, _As>&... __xs) noexcept
1565 {
1566 static_assert(_S_size == (_As::_S_size + ...));
1567 return _S_init(__extract_simd_at<_Mask0>(cw<0>, __xs...),
1568 __extract_simd_at<_Mask1>(cw<_N0>, __xs...));
1569 }
1570
1571 // [simd.mask.overview] default constructor -----------------------------
1572 basic_mask() = default;
1573
1574 // [simd.mask.overview] conversion extensions ---------------------------
1575 // TODO: any?
1576
1577 // [simd.mask.ctor] broadcast constructor -------------------------------
1578 [[__gnu__::__always_inline__]]
1579 constexpr explicit
1580 basic_mask(same_as<bool> auto __x) noexcept // LWG 4382.
1581 : _M_data0(__x), _M_data1(__x)
1582 {}
1583
1584 // [simd.mask.ctor] conversion constructor ------------------------------
1585 template <size_t _UBytes, typename _UAbi>
1586 requires (_S_size == _UAbi::_S_size)
1587 [[__gnu__::__always_inline__]]
1588 constexpr explicit(__is_mask_conversion_explicit<_Ap, _UAbi>(_Bytes, _UBytes))
1589 basic_mask(const basic_mask<_UBytes, _UAbi>& __x) noexcept
1590 : _M_data0([&] {
1591 if constexpr (_UAbi::_S_nreg > 1)
1592 {
1593 return __x._M_data0;
1594 }
1595 else if constexpr (_N0 == 1)
1596 return _Mask0(__x[0]);
1597 else
1598 return get<0>(chunk<_N0>(__x));
1599 }()),
1600 _M_data1([&] {
1601 if constexpr (_UAbi::_S_nreg > 1)
1602 {
1603 return __x._M_data1;
1604 }
1605 else if constexpr (_N1 == 1)
1606 return _Mask1(__x[_N0]);
1607 else
1608 return get<1>(chunk<_N0>(__x));
1609 }())
1610 {}
1611
1612 using _Base::_MaskBase;
1613
1614 // [simd.mask.ctor] generator constructor -------------------------------
1615 template <__simd_generator_invokable<bool, _S_size> _Fp>
1616 [[__gnu__::__always_inline__]]
1617 constexpr explicit
1618 basic_mask(_Fp&& __gen)
1619 : _M_data0(__gen), _M_data1([&] [[__gnu__::__always_inline__]] (auto __i) {
1620 return __gen(__simd_size_c<__i + _N0>);
1621 })
1622 {}
1623
1624 // [simd.mask.ctor] bitset constructor ----------------------------------
1625 [[__gnu__::__always_inline__]]
1626 constexpr
1627 basic_mask(const same_as<bitset<_S_size>> auto& __b) noexcept // LWG 4382.
1628 : _M_data0(__bitset_split<_N0>(__b)._M_lo), _M_data1(__bitset_split<_N0>(__b)._M_hi)
1629 {}
1630
1631 // [simd.mask.ctor] uint constructor ------------------------------------------
1632 template <unsigned_integral _Tp>
1633 requires (!same_as<_Tp, bool>) // LWG 4382.
1634 [[__gnu__::__always_inline__]]
1635 constexpr explicit
1636 basic_mask(_Tp __val) noexcept
1637 : _M_data0(static_cast<_Bitmask<_N0>>(__val)),
1638 _M_data1(sizeof(_Tp) * __CHAR_BIT__ > _N0
1639 ? static_cast<_Bitmask<_N1>>(__val >> _N0) : _Bitmask<_N1>())
1640 {}
1641
1642 // [simd.mask.subscr] ---------------------------------------------------
1643 [[__gnu__::__always_inline__]]
1644 constexpr value_type
1645 operator[](__simd_size_type __i) const
1646 {
1647 __glibcxx_simd_precondition(__i >= 0 && __i < _S_size, "subscript is out of bounds");
1648 if (__is_const_known(__i))
1649 return __i < _N0 ? _M_data0[__i] : _M_data1[__i - _N0];
1650 else if constexpr (_M_data1._S_has_bool_member)
1651 // in some cases the last element can be 'bool' instead of bit-/vector-mask;
1652 // e.g. mask<short, 17> is {mask<short, 16>, mask<short, 1>}, where the latter uses
1653 // _ScalarAbi<1>, which is stored as 'bool'
1654 return __i < _N0 ? _M_data0[__i] : _M_data1[__i - _N0];
1655 else if constexpr (abi_type::_S_is_bitmask)
1656 {
1657 using _AliasingByte [[__gnu__::__may_alias__]] = unsigned char;
1658 return bool((reinterpret_cast<const _AliasingByte*>(this)
1659 [__i / __CHAR_BIT__] >> (__i % __CHAR_BIT__)) & 1);
1660 }
1661 else
1662 {
1663 using _AliasingInt [[__gnu__::__may_alias__]] = __integer_from<_Bytes>;
1664 return reinterpret_cast<const _AliasingInt*>(this)[__i] != 0;
1665 }
1666 }
1667
1668 // [simd.mask.unary] ----------------------------------------------------
1669 [[__gnu__::__always_inline__]]
1670 constexpr basic_mask
1671 operator!() const noexcept
1672 { return _S_init(!_M_data0, !_M_data1); }
1673
1674 [[__gnu__::__always_inline__]]
1675 constexpr _VecType
1676 operator+() const noexcept requires destructible<_VecType>
1677 { return _VecType::_S_concat(+_M_data0, +_M_data1); }
1678
1679 constexpr _VecType
1680 operator+() const noexcept = delete;
1681
1682 [[__gnu__::__always_inline__]]
1683 constexpr _VecType
1684 operator-() const noexcept requires destructible<_VecType>
1685 { return _VecType::_S_concat(-_M_data0, -_M_data1); }
1686
1687 constexpr _VecType
1688 operator-() const noexcept = delete;
1689
1690 [[__gnu__::__always_inline__]]
1691 constexpr _VecType
1692 operator~() const noexcept requires destructible<_VecType>
1693 { return _VecType::_S_concat(~_M_data0, ~_M_data1); }
1694
1695 constexpr _VecType
1696 operator~() const noexcept = delete;
1697
1698 // [simd.mask.conv] -----------------------------------------------------
1699 template <typename _Up, typename _UAbi>
1700 requires (_UAbi::_S_size == _S_size)
1701 [[__gnu__::__always_inline__]]
1702 constexpr explicit(sizeof(_Up) != _Bytes)
1703 operator basic_vec<_Up, _UAbi>() const noexcept
1704 {
1705 using _Rp = basic_vec<_Up, _UAbi>;
1706 return _Rp::_S_init(static_cast<_Rp::_DataType0>(_M_data0),
1707 static_cast<_Rp::_DataType1>(_M_data1));
1708 }
1709
1710 using _Base::operator basic_vec;
1711
1712 // [simd.mask.namedconv] ------------------------------------------------
1713 [[__gnu__::__always_inline__]]
1714 constexpr bitset<_S_size>
1715 to_bitset() const noexcept
1716 {
1717 if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
1718 return to_ullong();
1719 else
1720 {
1721 static_assert(_N0 % numeric_limits<unsigned long long>::digits == 0);
1722 struct _Tmp
1723 {
1724 bitset<_N0> _M_lo;
1725 bitset<_N1> _M_hi;
1726 } __tmp = {_M_data0.to_bitset(), _M_data1.to_bitset()};
1727 return __builtin_bit_cast(bitset<_S_size>, __tmp);
1728 }
1729 }
1730
1731 template <int _Offset = 0, _ArchTraits _Traits = {}>
1732 [[__gnu__::__always_inline__]]
1733 constexpr auto
1734 _M_to_uint() const
1735 {
1736 constexpr int _N0x = _N0;
1737 if constexpr (_N0x >= numeric_limits<unsigned long long>::digits)
1738 {
1739 static_assert(_Offset == 0);
1740 return __trivial_pair {
1741 _M_data0.template _M_to_uint<0>(),
1742 _M_data1.template _M_to_uint<0>()
1743 };
1744 }
1745 else
1746 {
1747#if _GLIBCXX_X86
1748 if constexpr (_Bytes == 2 && !_Traits._M_have_bmi2() && _Ap::_S_nreg == 2
1749 && !_S_use_bitmask)
1750 return __similar_mask<char, _S_size, _Ap>(*this).template _M_to_uint<_Offset>();
1751#endif
1752 auto __uint = _M_data1.template _M_to_uint<_N0x + _Offset>();
1753 __uint |= _M_data0.template _M_to_uint<_Offset>();
1754 return __uint;
1755 }
1756 }
1757
1758 [[__gnu__::__always_inline__]]
1759 constexpr unsigned long long
1760 to_ullong() const
1761 {
1762 if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
1763 return _M_to_uint();
1764 else
1765 {
1766 __glibcxx_simd_precondition(_M_data1.to_ullong() == 0,
1767 "to_ullong called on mask with 'true' elements at indices"
1768 "higher than representable in a ullong");
1769 return _M_data0.to_ullong();
1770 }
1771 }
1772
1773 // [simd.mask.binary]
1774 [[__gnu__::__always_inline__]]
1775 friend constexpr basic_mask
1776 operator&&(const basic_mask& __x, const basic_mask& __y) noexcept
1777 { return _S_init(__x._M_data0 && __y._M_data0, __x._M_data1 && __y._M_data1); }
1778
1779 [[__gnu__::__always_inline__]]
1780 friend constexpr basic_mask
1781 operator||(const basic_mask& __x, const basic_mask& __y) noexcept
1782 { return _S_init(__x._M_data0 || __y._M_data0, __x._M_data1 || __y._M_data1); }
1783
1784 [[__gnu__::__always_inline__]]
1785 friend constexpr basic_mask
1786 operator&(const basic_mask& __x, const basic_mask& __y) noexcept
1787 { return _S_init(__x._M_data0 & __y._M_data0, __x._M_data1 & __y._M_data1); }
1788
1789 [[__gnu__::__always_inline__]]
1790 friend constexpr basic_mask
1791 operator|(const basic_mask& __x, const basic_mask& __y) noexcept
1792 { return _S_init(__x._M_data0 | __y._M_data0, __x._M_data1 | __y._M_data1); }
1793
1794 [[__gnu__::__always_inline__]]
1795 friend constexpr basic_mask
1796 operator^(const basic_mask& __x, const basic_mask& __y) noexcept
1797 { return _S_init(__x._M_data0 ^ __y._M_data0, __x._M_data1 ^ __y._M_data1); }
1798
1799 // [simd.mask.cassign]
1800 [[__gnu__::__always_inline__]]
1801 friend constexpr basic_mask&
1802 operator&=(basic_mask& __x, const basic_mask& __y) noexcept
1803 {
1804 __x._M_data0 &= __y._M_data0;
1805 __x._M_data1 &= __y._M_data1;
1806 return __x;
1807 }
1808
1809 [[__gnu__::__always_inline__]]
1810 friend constexpr basic_mask&
1811 operator|=(basic_mask& __x, const basic_mask& __y) noexcept
1812 {
1813 __x._M_data0 |= __y._M_data0;
1814 __x._M_data1 |= __y._M_data1;
1815 return __x;
1816 }
1817
1818 [[__gnu__::__always_inline__]]
1819 friend constexpr basic_mask&
1820 operator^=(basic_mask& __x, const basic_mask& __y) noexcept
1821 {
1822 __x._M_data0 ^= __y._M_data0;
1823 __x._M_data1 ^= __y._M_data1;
1824 return __x;
1825 }
1826
1827 // [simd.mask.comparison] -----------------------------------------------
1828 [[__gnu__::__always_inline__]]
1829 friend constexpr basic_mask
1830 operator==(const basic_mask& __x, const basic_mask& __y) noexcept
1831 { return !(__x ^ __y); }
1832
1833 [[__gnu__::__always_inline__]]
1834 friend constexpr basic_mask
1835 operator!=(const basic_mask& __x, const basic_mask& __y) noexcept
1836 { return __x ^ __y; }
1837
1838 [[__gnu__::__always_inline__]]
1839 friend constexpr basic_mask
1840 operator>=(const basic_mask& __x, const basic_mask& __y) noexcept
1841 { return __x || !__y; }
1842
1843 [[__gnu__::__always_inline__]]
1844 friend constexpr basic_mask
1845 operator<=(const basic_mask& __x, const basic_mask& __y) noexcept
1846 { return !__x || __y; }
1847
1848 [[__gnu__::__always_inline__]]
1849 friend constexpr basic_mask
1850 operator>(const basic_mask& __x, const basic_mask& __y) noexcept
1851 { return __x && !__y; }
1852
1853 [[__gnu__::__always_inline__]]
1854 friend constexpr basic_mask
1855 operator<(const basic_mask& __x, const basic_mask& __y) noexcept
1856 { return !__x && __y; }
1857
1858 // [simd.mask.cond] -----------------------------------------------------
1859 [[__gnu__::__always_inline__]]
1860 friend constexpr basic_mask
1861 __select_impl(const basic_mask& __k, const basic_mask& __t, const basic_mask& __f) noexcept
1862 {
1863 return _S_init(__select_impl(__k._M_data0, __t._M_data0, __f._M_data0),
1864 __select_impl(__k._M_data1, __t._M_data1, __f._M_data1));
1865 }
1866
1867 [[__gnu__::__always_inline__]]
1868 friend constexpr basic_mask
1869 __select_impl(const basic_mask& __k, same_as<bool> auto __t, same_as<bool> auto __f) noexcept
1870 {
1871 if (__t == __f)
1872 return basic_mask(__t);
1873 else
1874 return __t ? __k : !__k;
1875 }
1876
1877 template <__vectorizable _T0, same_as<_T0> _T1>
1878 requires (sizeof(_T0) == _Bytes)
1879 [[__gnu__::__always_inline__]]
1880 friend constexpr vec<_T0, _S_size>
1881 __select_impl(const basic_mask& __k, const _T0& __t, const _T1& __f) noexcept
1882 {
1883 using _Vp = vec<_T0, _S_size>;
1884 if constexpr (!is_same_v<basic_mask, typename _Vp::mask_type>)
1885 return __select_impl(static_cast<_Vp::mask_type>(__k), __t, __f);
1886 else
1887 return _Vp::_S_init(__select_impl(__k._M_data0, __t, __f),
1888 __select_impl(__k._M_data1, __t, __f));
1889 }
1890
1891 template <_ArchTraits _Traits = {}>
1892 [[__gnu__::__always_inline__]]
1893 constexpr bool
1894 _M_all_of() const
1895 {
1896 if constexpr (_N0 == _N1)
1897 return (_M_data0 && _M_data1)._M_all_of();
1898 else
1899 return _M_data0._M_all_of() && _M_data1._M_all_of();
1900 }
1901
1902 template <_ArchTraits _Traits = {}>
1903 [[__gnu__::__always_inline__]]
1904 constexpr bool
1905 _M_any_of() const
1906 {
1907 if constexpr (_N0 == _N1)
1908 return (_M_data0 || _M_data1)._M_any_of();
1909 else
1910 return _M_data0._M_any_of() || _M_data1._M_any_of();
1911 }
1912
1913 template <_ArchTraits _Traits = {}>
1914 [[__gnu__::__always_inline__]]
1915 constexpr bool
1916 _M_none_of() const
1917 {
1918 if constexpr (_N0 == _N1)
1919 return (_M_data0 || _M_data1)._M_none_of();
1920 else
1921 return _M_data0._M_none_of() && _M_data1._M_none_of();
1922 }
1923
1924 [[__gnu__::__always_inline__]]
1925 constexpr __simd_size_type
1926 _M_reduce_min_index() const
1927 {
1928 if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
1929 {
1930 const auto __bits = _M_to_uint();
1931 __glibcxx_simd_precondition(__bits, "An empty mask does not have a min_index.");
1932 if constexpr (_S_size == 1)
1933 return 0;
1934 else
1935 return __countr_zero(_M_to_uint());
1936 }
1937 else if (_M_data0._M_none_of())
1938 return _M_data1._M_reduce_min_index() + _N0;
1939 else
1940 return _M_data0._M_reduce_min_index();
1941 }
1942
1943 [[__gnu__::__always_inline__]]
1944 constexpr __simd_size_type
1945 _M_reduce_max_index() const
1946 {
1947 if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
1948 {
1949 const auto __bits = _M_to_uint();
1950 __glibcxx_simd_precondition(__bits, "An empty mask does not have a max_index.");
1951 if constexpr (_S_size == 1)
1952 return 0;
1953 else
1954 return __highest_bit(_M_to_uint());
1955 }
1956 else if (_M_data1._M_none_of())
1957 return _M_data0._M_reduce_max_index();
1958 else
1959 return _M_data1._M_reduce_max_index() + _N0;
1960 }
1961
1962 [[__gnu__::__always_inline__]]
1963 friend constexpr bool
1964 __is_const_known(const basic_mask& __x)
1965 { return __is_const_known(__x._M_data0) && __is_const_known(__x._M_data1); }
1966 };
1967} // namespace simd
1968_GLIBCXX_END_NAMESPACE_VERSION
1969} // namespace std
1970
1971#pragma GCC diagnostic pop
1972#endif // C++26
1973#endif // _GLIBCXX_SIMD_MASK_H
constexpr bool operator<=(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
Definition chrono.h:859
constexpr bool operator>=(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
Definition chrono.h:873
constexpr bool operator<(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
Definition chrono.h:826
constexpr bool operator>(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
Definition chrono.h:866
constexpr complex< _Tp > operator-(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x minus y.
Definition complex:404
constexpr complex< _Tp > operator+(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x plus y.
Definition complex:374
_Tp * end(valarray< _Tp > &__va) noexcept
Return an iterator pointing to one past the last element of the valarray.
Definition valarray:1251
_Tp * begin(valarray< _Tp > &__va) noexcept
Return an iterator pointing to the first element of the valarray.
Definition valarray:1229
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
ISO C++ entities toplevel namespace is std.
constexpr auto cend(const _Container &__cont) noexcept(noexcept(std::end(__cont))) -> decltype(std::end(__cont))
Return an iterator pointing to one past the last element of the const container.
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
constexpr bitset< _Nb > operator^(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1638
constexpr auto cbegin(const _Container &__cont) noexcept(noexcept(std::begin(__cont))) -> decltype(std::begin(__cont))
Return an iterator pointing to the first element of the const container.
constexpr bitset< _Nb > operator|(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1628
constexpr bitset< _Nb > operator&(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition bitset:1618
static constexpr _Tp max() noexcept
Definition limits:328
static constexpr _Tp min() noexcept
Definition limits:324