// Copyright 2022 The Manifold Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Simple implementation of selected functions in PSTL. // Iterators must be RandomAccessIterator. #pragma once #include "./iters.h" #if (MANIFOLD_PAR == 1) #include #include #include #include #include #endif #include #include namespace manifold { enum class ExecutionPolicy { Par, Seq, }; constexpr size_t kSeqThreshold = 1e4; // ExecutionPolicy: // - Sequential for small workload, // - Parallel (CPU) for medium workload, inline constexpr ExecutionPolicy autoPolicy(size_t size, size_t threshold = kSeqThreshold) { if (size <= threshold) { return ExecutionPolicy::Seq; } return ExecutionPolicy::Par; } template >> inline constexpr ExecutionPolicy autoPolicy(Iter first, Iter last, size_t threshold = kSeqThreshold) { if (static_cast(std::distance(first, last)) <= threshold) { return ExecutionPolicy::Seq; } return ExecutionPolicy::Par; } template void copy(ExecutionPolicy policy, InputIter first, InputIter last, OutputIter d_first); template void copy(InputIter first, InputIter last, OutputIter d_first); #if (MANIFOLD_PAR == 1) namespace details { using manifold::kSeqThreshold; // implementation from // https://duvanenko.tech.blog/2018/01/14/parallel-merge/ // https://github.com/DragonSpit/ParallelAlgorithms // note that the ranges are now [p, r) to fit our convention. template void mergeRec(SrcIter src, DestIter dest, size_t p1, size_t r1, size_t p2, size_t r2, size_t p3, Comp comp) { size_t length1 = r1 - p1; size_t length2 = r2 - p2; if (length1 < length2) { std::swap(p1, p2); std::swap(r1, r2); std::swap(length1, length2); } if (length1 == 0) return; if (length1 + length2 <= kSeqThreshold) { std::merge(src + p1, src + r1, src + p2, src + r2, dest + p3, comp); } else { size_t q1 = p1 + length1 / 2; size_t q2 = std::distance(src, std::lower_bound(src + p2, src + r2, src[q1], comp)); size_t q3 = p3 + (q1 - p1) + (q2 - p2); dest[q3] = src[q1]; tbb::parallel_invoke( [=] { mergeRec(src, dest, p1, q1, p2, q2, p3, comp); }, [=] { mergeRec(src, dest, q1 + 1, r1, q2, r2, q3 + 1, comp); }); } } template void mergeSortRec(SrcIter src, DestIter dest, size_t begin, size_t end, Comp comp) { size_t numElements = end - begin; if (numElements <= kSeqThreshold) { std::copy(src + begin, src + end, dest + begin); std::stable_sort(dest + begin, dest + end, comp); } else { size_t middle = begin + numElements / 2; tbb::parallel_invoke([=] { mergeSortRec(dest, src, begin, middle, comp); }, [=] { mergeSortRec(dest, src, middle, end, comp); }); mergeRec(src, dest, begin, middle, middle, end, begin, comp); } } template struct ScanBody { T sum; T identity; BinOp &f; InputIter input; OutputIter output; ScanBody(T sum, T identity, BinOp &f, InputIter input, OutputIter output) : sum(sum), identity(identity), f(f), input(input), output(output) {} ScanBody(ScanBody &b, tbb::split) : sum(b.identity), identity(b.identity), f(b.f), input(b.input), output(b.output) {} template void operator()(const tbb::blocked_range &r, Tag) { T temp = sum; for (size_t i = r.begin(); i < r.end(); ++i) { T inputTmp = input[i]; if (Tag::is_final_scan()) output[i] = temp; temp = f(temp, inputTmp); } sum = temp; } T get_sum() const { return sum; } void reverse_join(ScanBody &a) { sum = f(a.sum, sum); } void assign(ScanBody &b) { sum = b.sum; } }; template struct CopyIfScanBody { size_t sum; P &pred; InputIter input; OutputIter output; CopyIfScanBody(P &pred, InputIter input, OutputIter output) : sum(0), pred(pred), input(input), output(output) {} CopyIfScanBody(CopyIfScanBody &b, tbb::split) : sum(0), pred(b.pred), input(b.input), output(b.output) {} template void operator()(const tbb::blocked_range &r, Tag) { size_t temp = sum; for (size_t i = r.begin(); i < r.end(); ++i) { if (pred(i)) { temp += 1; if (Tag::is_final_scan()) output[temp - 1] = input[i]; } } sum = temp; } size_t get_sum() const { return sum; } void reverse_join(CopyIfScanBody &a) { sum = a.sum + sum; } void assign(CopyIfScanBody &b) { sum = b.sum; } }; template struct Hist { using SizeType = N; static constexpr int k = K; N hist[k][256] = {{0}}; void merge(const Hist &other) { for (int i = 0; i < k; ++i) for (int j = 0; j < 256; ++j) hist[i][j] += other.hist[i][j]; } void prefixSum(N total, bool *canSkip) { for (int i = 0; i < k; ++i) { size_t count = 0; for (int j = 0; j < 256; ++j) { N tmp = hist[i][j]; hist[i][j] = count; count += tmp; if (tmp == total) canSkip[i] = true; } } } }; template void histogram(T *ptr, typename H::SizeType n, H &hist) { auto worker = [](T *ptr, typename H::SizeType n, H &hist) { for (typename H::SizeType i = 0; i < n; ++i) for (int k = 0; k < hist.k; ++k) ++hist.hist[k][(ptr[i] >> (8 * k)) & 0xFF]; }; if (n < kSeqThreshold) { worker(ptr, n, hist); } else { tbb::combinable store; tbb::parallel_for( tbb::blocked_range(0, n, kSeqThreshold), [&worker, &store, ptr](const auto &r) { worker(ptr + r.begin(), r.end() - r.begin(), store.local()); }); store.combine_each([&hist](const H &h) { hist.merge(h); }); } } template void shuffle(T *src, T *target, typename H::SizeType n, H &hist, int k) { for (typename H::SizeType i = 0; i < n; ++i) target[hist.hist[k][(src[i] >> (8 * k)) & 0xFF]++] = src[i]; } template bool LSB_radix_sort(T *input, T *tmp, SizeType n) { Hist hist; if (std::is_sorted(input, input + n)) return false; histogram(input, n, hist); bool canSkip[hist.k] = {0}; hist.prefixSum(n, canSkip); T *a = input, *b = tmp; for (int k = 0; k < hist.k; ++k) { if (!canSkip[k]) { shuffle(a, b, n, hist, k); std::swap(a, b); } } return a == tmp; } // LSB radix sort with merge template struct SortedRange { T *input, *tmp; SizeType offset = 0, length = 0; bool inTmp = false; SortedRange(T *input, T *tmp, SizeType offset = 0, SizeType length = 0) : input(input), tmp(tmp), offset(offset), length(length) {} SortedRange(SortedRange &r, tbb::split) : input(r.input), tmp(r.tmp) {} // FIXME: no idea why thread sanitizer reports data race here #if defined(__has_feature) #if __has_feature(thread_sanitizer) __attribute__((no_sanitize("thread"))) #endif #endif void operator()(const tbb::blocked_range &range) { SortedRange rhs(input, tmp, range.begin(), range.end() - range.begin()); rhs.inTmp = LSB_radix_sort(input + rhs.offset, tmp + rhs.offset, rhs.length); if (length == 0) *this = rhs; else join(rhs); } bool swapBuffer() const { T *src = input, *target = tmp; if (inTmp) std::swap(src, target); copy(src + offset, src + offset + length, target + offset); return !inTmp; } void join(const SortedRange &rhs) { if (inTmp != rhs.inTmp) { if (length < rhs.length) inTmp = swapBuffer(); else rhs.swapBuffer(); } T *src = input, *target = tmp; if (inTmp) std::swap(src, target); if (src[offset + length - 1] > src[rhs.offset]) { mergeRec(src, target, offset, offset + length, rhs.offset, rhs.offset + rhs.length, offset, std::less()); inTmp = !inTmp; } length += rhs.length; } }; template void radix_sort(T *input, SizeTy n) { T *aux = new T[n]; SizeTy blockSize = std::max(n / tbb::this_task_arena::max_concurrency() / 4, static_cast(kSeqThreshold / sizeof(T))); SortedRange result(input, aux); tbb::parallel_reduce(tbb::blocked_range(0, n, blockSize), result); if (result.inTmp) copy(aux, aux + n, input); delete[] aux; } template ::value_type, typename Comp = decltype(std::less())> void mergeSort(ExecutionPolicy policy, Iterator first, Iterator last, Comp comp) { #if (MANIFOLD_PAR == 1) if (policy == ExecutionPolicy::Par) { // apparently this prioritizes threads inside here? tbb::this_task_arena::isolate([&] { size_t length = std::distance(first, last); T *tmp = new T[length]; copy(policy, first, last, tmp); details::mergeSortRec(tmp, first, 0, length, comp); delete[] tmp; }); return; } #endif std::stable_sort(first, last, comp); } // stable_sort using merge sort. // // For simpler implementation, we do not support types that are not trivially // destructable. template ::value_type, typename Dummy = void> struct SortFunctor { void operator()(ExecutionPolicy policy, Iterator first, Iterator last) { static_assert( std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); static_assert(std::is_trivially_destructible_v, "Our simple implementation does not support types that are " "not trivially destructable."); return mergeSort(policy, first, last, std::less()); } }; // stable_sort specialized with radix sort for integral types. // Typically faster than merge sort. template struct SortFunctor< Iterator, T, std::enable_if_t< std::is_integral_v && std::is_pointer_v::pointer>>> { void operator()(ExecutionPolicy policy, Iterator first, Iterator last) { static_assert( std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); static_assert(std::is_trivially_destructible_v, "Our simple implementation does not support types that are " "not trivially destructable."); #if (MANIFOLD_PAR == 1) if (policy == ExecutionPolicy::Par) { radix_sort(&*first, static_cast(std::distance(first, last))); return; } #endif stable_sort(policy, first, last, std::less()); } }; } // namespace details #endif // Applies the function `f` to each element in the range `[first, last)` template void for_each(ExecutionPolicy policy, Iter first, Iter last, F f) { static_assert(std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); #if (MANIFOLD_PAR == 1) if (policy == ExecutionPolicy::Par) { tbb::parallel_for(tbb::blocked_range(first, last), [&f](const tbb::blocked_range &range) { for (Iter i = range.begin(); i != range.end(); i++) f(*i); }); return; } #endif std::for_each(first, last, f); } // Applies the function `f` to each element in the range `[first, last)` template void for_each_n(ExecutionPolicy policy, Iter first, size_t n, F f) { static_assert(std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); for_each(policy, first, first + n, f); } // Reduce the range `[first, last)` using a binary operation `f` with an initial // value `init`. // // The binary operation should be commutative and associative. Otherwise, the // result is non-deterministic. template ::value_type> T reduce(ExecutionPolicy policy, InputIter first, InputIter last, T init, BinaryOp f) { static_assert(std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); #if (MANIFOLD_PAR == 1) if (policy == ExecutionPolicy::Par) { // should we use deterministic reduce here? return tbb::parallel_reduce( tbb::blocked_range(first, last, details::kSeqThreshold), init, [&f](const tbb::blocked_range &range, T value) { return std::reduce(range.begin(), range.end(), value, f); }, f); } #endif return std::reduce(first, last, init, f); } // Reduce the range `[first, last)` using a binary operation `f` with an initial // value `init`. // // The binary operation should be commutative and associative. Otherwise, the // result is non-deterministic. template ::value_type> T reduce(InputIter first, InputIter last, T init, BinaryOp f) { return reduce(autoPolicy(first, last, 1e5), first, last, init, f); } // Transform and reduce the range `[first, last)` by first applying a unary // function `g`, and then combining the results using a binary operation `f` // with an initial value `init`. // // The binary operation should be commutative and associative. Otherwise, the // result is non-deterministic. template ::value_type>> T transform_reduce(ExecutionPolicy policy, InputIter first, InputIter last, T init, BinaryOp f, UnaryOp g) { return reduce(policy, TransformIterator(first, g), TransformIterator(last, g), init, f); } // Transform and reduce the range `[first, last)` by first applying a unary // function `g`, and then combining the results using a binary operation `f` // with an initial value `init`. // // The binary operation should be commutative and associative. Otherwise, the // result is non-deterministic. template ::value_type>> T transform_reduce(InputIter first, InputIter last, T init, BinaryOp f, UnaryOp g) { return manifold::reduce(TransformIterator(first, g), TransformIterator(last, g), init, f); } // Compute the inclusive prefix sum for the range `[first, last)` // using the summation operator, and store the result in the range // starting from `d_first`. // // The input range `[first, last)` and // the output range `[d_first, d_first + last - first)` // must be equal or non-overlapping. template ::value_type> void inclusive_scan(ExecutionPolicy policy, InputIter first, InputIter last, OutputIter d_first) { static_assert(std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); static_assert( std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); #if (MANIFOLD_PAR == 1) if (policy == ExecutionPolicy::Par) { tbb::parallel_scan( tbb::blocked_range(0, std::distance(first, last)), static_cast(0), [&](const tbb::blocked_range &range, T sum, bool is_final_scan) { T temp = sum; for (size_t i = range.begin(); i < range.end(); ++i) { temp = temp + first[i]; if (is_final_scan) d_first[i] = temp; } return temp; }, std::plus()); return; } #endif std::inclusive_scan(first, last, d_first); } // Compute the inclusive prefix sum for the range `[first, last)` using the // summation operator, and store the result in the range // starting from `d_first`. // // The input range `[first, last)` and // the output range `[d_first, d_first + last - first)` // must be equal or non-overlapping. template ::value_type> void inclusive_scan(InputIter first, InputIter last, OutputIter d_first) { return inclusive_scan(autoPolicy(first, last, 1e5), first, last, d_first); } // Compute the inclusive prefix sum for the range `[first, last)` using the // binary operator `f`, with initial value `init` and // identity element `identity`, and store the result in the range // starting from `d_first`. // // This is different from `exclusive_scan` in the sequential algorithm by // requiring an identity element. This is needed so that each block can be // scanned in parallel and combined later. // // The input range `[first, last)` and // the output range `[d_first, d_first + last - first)` // must be equal or non-overlapping. template ::value_type>()), typename T = typename std::iterator_traits::value_type> void exclusive_scan(ExecutionPolicy policy, InputIter first, InputIter last, OutputIter d_first, T init = static_cast(0), BinOp f = std::plus(), T identity = static_cast(0)) { static_assert(std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); static_assert( std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); #if (MANIFOLD_PAR == 1) if (policy == ExecutionPolicy::Par) { details::ScanBody body(init, identity, f, first, d_first); tbb::parallel_scan( tbb::blocked_range(0, std::distance(first, last)), body); return; } #endif std::exclusive_scan(first, last, d_first, init, f); } // Compute the inclusive prefix sum for the range `[first, last)` using the // binary operator `f`, with initial value `init` and // identity element `identity`, and store the result in the range // starting from `d_first`. // // This is different from `exclusive_scan` in the sequential algorithm by // requiring an identity element. This is needed so that each block can be // scanned in parallel and combined later. // // The input range `[first, last)` and // the output range `[d_first, d_first + last - first)` // must be equal or non-overlapping. template ::value_type>()), typename T = typename std::iterator_traits::value_type> void exclusive_scan(InputIter first, InputIter last, OutputIter d_first, T init = static_cast(0), BinOp f = std::plus(), T identity = static_cast(0)) { exclusive_scan(autoPolicy(first, last, 1e5), first, last, d_first, init, f, identity); } // Apply function `f` on the input range `[first, last)` and store the result in // the range starting from `d_first`. // // The input range `[first, last)` and // the output range `[d_first, d_first + last - first)` // must be equal or non-overlapping. template void transform(ExecutionPolicy policy, InputIter first, InputIter last, OutputIter d_first, F f) { static_assert(std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); static_assert( std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); #if (MANIFOLD_PAR == 1) if (policy == ExecutionPolicy::Par) { tbb::parallel_for(tbb::blocked_range( 0, static_cast(std::distance(first, last))), [&](const tbb::blocked_range &range) { std::transform(first + range.begin(), first + range.end(), d_first + range.begin(), f); }); return; } #endif std::transform(first, last, d_first, f); } // Apply function `f` on the input range `[first, last)` and store the result in // the range starting from `d_first`. // // The input range `[first, last)` and // the output range `[d_first, d_first + last - first)` // must be equal or non-overlapping. template void transform(InputIter first, InputIter last, OutputIter d_first, F f) { transform(autoPolicy(first, last, 1e5), first, last, d_first, f); } // Copy the input range `[first, last)` to the output range // starting from `d_first`. // // The input range `[first, last)` and // the output range `[d_first, d_first + last - first)` // must not overlap. template void copy(ExecutionPolicy policy, InputIter first, InputIter last, OutputIter d_first) { static_assert(std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); static_assert( std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); #if (MANIFOLD_PAR == 1) if (policy == ExecutionPolicy::Par) { tbb::parallel_for(tbb::blocked_range( 0, static_cast(std::distance(first, last)), details::kSeqThreshold), [&](const tbb::blocked_range &range) { std::copy(first + range.begin(), first + range.end(), d_first + range.begin()); }); return; } #endif std::copy(first, last, d_first); } // Copy the input range `[first, last)` to the output range // starting from `d_first`. // // The input range `[first, last)` and // the output range `[d_first, d_first + last - first)` // must not overlap. template void copy(InputIter first, InputIter last, OutputIter d_first) { copy(autoPolicy(first, last, 1e6), first, last, d_first); } // Copy the input range `[first, first + n)` to the output range // starting from `d_first`. // // The input range `[first, first + n)` and // the output range `[d_first, d_first + n)` // must not overlap. template void copy_n(ExecutionPolicy policy, InputIter first, size_t n, OutputIter d_first) { copy(policy, first, first + n, d_first); } // Copy the input range `[first, first + n)` to the output range // starting from `d_first`. // // The input range `[first, first + n)` and // the output range `[d_first, d_first + n)` // must not overlap. template void copy_n(InputIter first, size_t n, OutputIter d_first) { copy(autoPolicy(n, 1e6), first, first + n, d_first); } // Fill the range `[first, last)` with `value`. template void fill(ExecutionPolicy policy, OutputIter first, OutputIter last, T value) { static_assert( std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); #if (MANIFOLD_PAR == 1) if (policy == ExecutionPolicy::Par) { tbb::parallel_for(tbb::blocked_range(first, last), [&](const tbb::blocked_range &range) { std::fill(range.begin(), range.end(), value); }); return; } #endif std::fill(first, last, value); } // Fill the range `[first, last)` with `value`. template void fill(OutputIter first, OutputIter last, T value) { fill(autoPolicy(first, last, 5e5), first, last, value); } // Count the number of elements in the input range `[first, last)` satisfying // predicate `pred`, i.e. `pred(x) == true`. template size_t count_if(ExecutionPolicy policy, InputIter first, InputIter last, P pred) { #if (MANIFOLD_PAR == 1) if (policy == ExecutionPolicy::Par) { return reduce(policy, TransformIterator(first, pred), TransformIterator(last, pred), 0, std::plus()); } #endif return std::count_if(first, last, pred); } // Count the number of elements in the input range `[first, last)` satisfying // predicate `pred`, i.e. `pred(x) == true`. template size_t count_if(InputIter first, InputIter last, P pred) { return count_if(autoPolicy(first, last, 1e4), first, last, pred); } // Check if all elements in the input range `[first, last)` satisfy // predicate `pred`, i.e. `pred(x) == true`. template bool all_of(ExecutionPolicy policy, InputIter first, InputIter last, P pred) { static_assert(std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); #if (MANIFOLD_PAR == 1) if (policy == ExecutionPolicy::Par) { // should we use deterministic reduce here? return tbb::parallel_reduce( tbb::blocked_range(first, last), true, [&](const tbb::blocked_range &range, bool value) { if (!value) return false; for (InputIter i = range.begin(); i != range.end(); i++) if (!pred(*i)) return false; return true; }, [](bool a, bool b) { return a && b; }); } #endif return std::all_of(first, last, pred); } // Check if all elements in the input range `[first, last)` satisfy // predicate `pred`, i.e. `pred(x) == true`. template bool all_of(InputIter first, InputIter last, P pred) { return all_of(autoPolicy(first, last, 1e5), first, last, pred); } // Copy values in the input range `[first, last)` to the output range // starting from `d_first` that satisfies the predicate `pred`, // i.e. `pred(x) == true`, and returns `d_first + n` where `n` is the number of // times the predicate is evaluated to true. // // This function is stable, meaning that the relative order of elements in the // output range remains unchanged. // // The input range `[first, last)` and // the output range `[d_first, d_first + last - first)` // must not overlap. template OutputIter copy_if(ExecutionPolicy policy, InputIter first, InputIter last, OutputIter d_first, P pred) { static_assert(std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); static_assert( std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); #if (MANIFOLD_PAR == 1) if (policy == ExecutionPolicy::Par) { auto pred2 = [&](size_t i) { return pred(first[i]); }; details::CopyIfScanBody body(pred2, first, d_first); tbb::parallel_scan( tbb::blocked_range(0, std::distance(first, last)), body); return d_first + body.get_sum(); } #endif return std::copy_if(first, last, d_first, pred); } // Copy values in the input range `[first, last)` to the output range // starting from `d_first` that satisfies the predicate `pred`, i.e. `pred(x) == // true`, and returns `d_first + n` where `n` is the number of times the // predicate is evaluated to true. // // This function is stable, meaning that the relative order of elements in the // output range remains unchanged. // // The input range `[first, last)` and // the output range `[d_first, d_first + last - first)` // must not overlap. template OutputIter copy_if(InputIter first, InputIter last, OutputIter d_first, P pred) { return copy_if(autoPolicy(first, last, 1e5), first, last, d_first, pred); } // Remove values in the input range `[first, last)` that satisfies // the predicate `pred`, i.e. `pred(x) == true`, and returns `first + n` // where `n` is the number of times the predicate is evaluated to false. // // This function is stable, meaning that the relative order of elements that // remained are unchanged. // // Only trivially destructable types are supported. template ::value_type> Iter remove_if(ExecutionPolicy policy, Iter first, Iter last, P pred) { static_assert(std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); static_assert(std::is_trivially_destructible_v, "Our simple implementation does not support types that are " "not trivially destructable."); #if (MANIFOLD_PAR == 1) if (policy == ExecutionPolicy::Par) { T *tmp = new T[std::distance(first, last)]; auto back = copy_if(policy, first, last, tmp, [&](T v) { return !pred(v); }); copy(policy, tmp, back, first); auto d = std::distance(tmp, back); delete[] tmp; return first + d; } #endif return std::remove_if(first, last, pred); } // Remove values in the input range `[first, last)` that satisfies // the predicate `pred`, i.e. `pred(x) == true`, and // returns `first + n` where `n` is the number of times the predicate is // evaluated to false. // // This function is stable, meaning that the relative order of elements that // remained are unchanged. // // Only trivially destructable types are supported. template ::value_type> Iter remove_if(Iter first, Iter last, P pred) { return remove_if(autoPolicy(first, last, 1e4), first, last, pred); } // Remove values in the input range `[first, last)` that are equal to `value`. // Returns `first + n` where `n` is the number of values // that are not equal to `value`. // // This function is stable, meaning that the relative order of elements that // remained are unchanged. // // Only trivially destructable types are supported. template ::value_type> Iter remove(ExecutionPolicy policy, Iter first, Iter last, T value) { static_assert(std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); static_assert(std::is_trivially_destructible_v, "Our simple implementation does not support types that are " "not trivially destructable."); #if (MANIFOLD_PAR == 1) if (policy == ExecutionPolicy::Par) { T *tmp = new T[std::distance(first, last)]; auto back = copy_if(policy, first, last, tmp, [&](T v) { return v != value; }); copy(policy, tmp, back, first); auto d = std::distance(tmp, back); delete[] tmp; return first + d; } #endif return std::remove(first, last, value); } // Remove values in the input range `[first, last)` that are equal to `value`. // Returns `first + n` where `n` is the number of values // that are not equal to `value`. // // This function is stable, meaning that the relative order of elements that // remained are unchanged. // // Only trivially destructable types are supported. template ::value_type> Iter remove(Iter first, Iter last, T value) { return remove(autoPolicy(first, last, 1e4), first, last, value); } // For each group of consecutive elements in the range `[first, last)` with the // same value, unique removes all but the first element of the group. The return // value is an iterator `new_last` such that no two consecutive elements in the // range `[first, new_last)` are equal. // // This function is stable, meaning that the relative order of elements that // remained are unchanged. // // Only trivially destructable types are supported. template ::value_type> Iter unique(ExecutionPolicy policy, Iter first, Iter last) { static_assert(std::is_convertible_v< typename std::iterator_traits::iterator_category, std::random_access_iterator_tag>, "You can only parallelize RandomAccessIterator."); static_assert(std::is_trivially_destructible_v, "Our simple implementation does not support types that are " "not trivially destructable."); #if (MANIFOLD_PAR == 1) if (policy == ExecutionPolicy::Par && first != last) { Iter newSrcStart = first; // cap the maximum buffer size, proved to be beneficial for unique with huge // array size constexpr size_t MAX_BUFFER_SIZE = 1 << 16; T *tmp = new T[std::min(MAX_BUFFER_SIZE, static_cast(std::distance(first, last)))]; auto pred = [&](size_t i) { return tmp[i] != tmp[i + 1]; }; do { size_t length = std::min(MAX_BUFFER_SIZE, static_cast(std::distance(newSrcStart, last))); copy(policy, newSrcStart, newSrcStart + length, tmp); *first = *newSrcStart; // this is not a typo, the index i is offset by 1, so to compare an // element with its predecessor we need to compare i and i + 1. details::CopyIfScanBody body(pred, tmp + 1, first + 1); tbb::parallel_scan(tbb::blocked_range(0, length - 1), body); first += body.get_sum() + 1; newSrcStart += length; } while (newSrcStart != last); delete[] tmp; return first; } #endif return std::unique(first, last); } // For each group of consecutive elements in the range `[first, last)` with the // same value, unique removes all but the first element of the group. The return // value is an iterator `new_last` such that no two consecutive elements in the // range `[first, new_last)` are equal. // // This function is stable, meaning that the relative order of elements that // remained are unchanged. // // Only trivially destructable types are supported. template ::value_type> Iter unique(Iter first, Iter last) { return unique(autoPolicy(first, last, 1e4), first, last); } // Sort the input range `[first, last)` in ascending order. // // This function is stable, meaning that the relative order of elements that are // incomparable remains unchanged. // // Only trivially destructable types are supported. template ::value_type> void stable_sort(ExecutionPolicy policy, Iterator first, Iterator last) { #if (MANIFOLD_PAR == 1) details::SortFunctor()(policy, first, last); #else std::stable_sort(first, last); #endif } // Sort the input range `[first, last)` in ascending order. // // This function is stable, meaning that the relative order of elements that are // incomparable remains unchanged. // // Only trivially destructable types are supported. template ::value_type> void stable_sort(Iterator first, Iterator last) { stable_sort(autoPolicy(first, last, 1e4), first, last); } // Sort the input range `[first, last)` in ascending order using the comparison // function `comp`. // // This function is stable, meaning that the relative order of elements that are // incomparable remains unchanged. // // Only trivially destructable types are supported. template ::value_type, typename Comp = decltype(std::less())> void stable_sort(ExecutionPolicy policy, Iterator first, Iterator last, Comp comp) { #if (MANIFOLD_PAR == 1) details::mergeSort(policy, first, last, comp); #else std::stable_sort(first, last, comp); #endif } // Sort the input range `[first, last)` in ascending order using the comparison // function `comp`. // // This function is stable, meaning that the relative order of elements that are // incomparable remains unchanged. // // Only trivially destructable types are supported. template ::value_type, typename Comp = decltype(std::less())> void stable_sort(Iterator first, Iterator last, Comp comp) { stable_sort(autoPolicy(first, last, 1e4), first, last, comp); } // `scatter` copies elements from a source range into an output array according // to a map. For each iterator `i` in the range `[first, last)`, the value `*i` // is assigned to `outputFirst[mapFirst[i - first]]`. If the same index appears // more than once in the range `[mapFirst, mapFirst + (last - first))`, the // result is undefined. // // The map range, input range and the output range must not overlap. template void scatter(ExecutionPolicy policy, InputIterator1 first, InputIterator1 last, InputIterator2 mapFirst, OutputIterator outputFirst) { for_each(policy, countAt(0), countAt(static_cast(std::distance(first, last))), [first, mapFirst, outputFirst](size_t i) { outputFirst[mapFirst[i]] = first[i]; }); } // `scatter` copies elements from a source range into an output array according // to a map. For each iterator `i` in the range `[first, last)`, the value `*i` // is assigned to `outputFirst[mapFirst[i - first]]`. If the same index appears // more than once in the range `[mapFirst, mapFirst + (last - first))`, // the result is undefined. // // The map range, input range and the output range must not overlap. template void scatter(InputIterator1 first, InputIterator1 last, InputIterator2 mapFirst, OutputIterator outputFirst) { scatter(autoPolicy(first, last, 1e5), first, last, mapFirst, outputFirst); } // `gather` copies elements from a source array into a destination range // according to a map. For each input iterator `i` // in the range `[mapFirst, mapLast)`, the value `inputFirst[*i]` // is assigned to `outputFirst[i - map_first]`. // // The map range, input range and the output range must not overlap. template void gather(ExecutionPolicy policy, InputIterator mapFirst, InputIterator mapLast, RandomAccessIterator inputFirst, OutputIterator outputFirst) { for_each(policy, countAt(0), countAt(static_cast(std::distance(mapFirst, mapLast))), [mapFirst, inputFirst, outputFirst](size_t i) { outputFirst[i] = inputFirst[mapFirst[i]]; }); } // `gather` copies elements from a source array into a destination range // according to a map. For each input iterator `i` // in the range `[mapFirst, mapLast)`, the value `inputFirst[*i]` // is assigned to `outputFirst[i - map_first]`. // // The map range, input range and the output range must not overlap. template void gather(InputIterator mapFirst, InputIterator mapLast, RandomAccessIterator inputFirst, OutputIterator outputFirst) { gather(autoPolicy(std::distance(mapFirst, mapLast), 1e5), mapFirst, mapLast, inputFirst, outputFirst); } // Write `[0, last - first)` to the range `[first, last)`. template void sequence(ExecutionPolicy policy, Iterator first, Iterator last) { for_each(policy, countAt(0), countAt(static_cast(std::distance(first, last))), [first](size_t i) { first[i] = i; }); } // Write `[0, last - first)` to the range `[first, last)`. template void sequence(Iterator first, Iterator last) { sequence(autoPolicy(first, last, 1e5), first, last); } } // namespace manifold