#ifndef TATAMI_STATS_RANGES_HPP #define TATAMI_STATS_RANGES_HPP #include "../base/Matrix.hpp" #include "apply.hpp" #include #include /** * @file ranges.hpp * * Compute row and column ranges from a `tatami::Matrix`. */ namespace tatami { namespace stats { /** * @cond */ template struct ExtremeFactory { public: ExtremeFactory(O* o, size_t d1, size_t d2) : output(o), dim(d1), otherdim(d2) {} private: O* output; size_t dim, otherdim; public: struct DenseDirect { DenseDirect(O* o, size_t d2) : output(o), otherdim(d2) {} template void compute(size_t i, const V* ptr) { if (otherdim) { if constexpr(compute_max) { output[i] = *std::max_element(ptr, ptr + otherdim); } else { output[i] = *std::min_element(ptr, ptr + otherdim); } } } private: O* output; size_t otherdim; }; DenseDirect dense_direct() { return DenseDirect(output, otherdim); } public: struct SparseDirect { SparseDirect(O* o, size_t d2) : output(o), otherdim(d2) {} template void compute(size_t i, const SparseRange& range) { if (range.number) { if constexpr(compute_max) { output[i] = *std::max_element(range.value, range.value + range.number); } else { output[i] = *std::min_element(range.value, range.value + range.number); } if (range.number != otherdim) { if constexpr(compute_max) { if (output[i] < 0) { output[i] = 0; } } else { if (output[i] > 0) { output[i] = 0; } } } } else if (otherdim) { output[i] = 0; } } private: O* output; size_t otherdim; }; SparseDirect sparse_direct() { return SparseDirect(output, otherdim); } public: struct DenseRunning { DenseRunning(O* o, size_t d1) : output(o), dim(d1) {} template void add(const V* ptr) { if (first) { std::copy(ptr, ptr + dim, output); first = false; } else { for (size_t d = 0; d < dim; ++d) { if constexpr(compute_max) { if (output[d] < ptr[d]) { output[d] = ptr[d]; } } else { if (output[d] > ptr[d]) { output[d] = ptr[d]; } } } } } void finish() {} private: O* output; size_t dim; bool first = true; }; DenseRunning dense_running() { return DenseRunning(output, dim); } DenseRunning dense_running(size_t start, size_t end) { return DenseRunning(output + start, end - start); } public: struct SparseRunning { SparseRunning(O* o, size_t d1, size_t d2, size_t s, size_t e) : output(o), collected(d1), otherdim(d2), start(s), end(e) {} template void add(const SparseRange& range) { if (first) { // Assume output is zero-initialized. for (size_t j = 0; j < range.number; ++j) { ++collected[range.index[j]]; output[range.index[j]] = range.value[j]; } first = false; } else { for (size_t j = 0; j < range.number; ++j) { ++collected[range.index[j]]; auto& existing = output[range.index[j]]; if constexpr(compute_max) { if (existing < range.value[j]) { existing = range.value[j]; } } else { if (existing > range.value[j]) { existing = range.value[j]; } } } } } void finish() { for (size_t i = start; i < end; ++i) { if (collected[i] < otherdim) { if constexpr(compute_max) { if (output[i] < 0) { output[i] = 0; } } else { if (output[i] > 0) { output[i] = 0; } } } } } private: O* output; bool first = true; std::vector collected; size_t otherdim; size_t start, end; }; SparseRunning sparse_running() { return SparseRunning(output, dim, otherdim, 0, dim); } SparseRunning sparse_running(size_t start, size_t end) { return SparseRunning(output, dim, otherdim, start, end); } }; template using MaxFactory = ExtremeFactory; template using MinFactory = ExtremeFactory; /** * @endcond */ } /** * @tparam Output Type of the output value. * @tparam T Type of the matrix value. * @tparam IDX Type of the row/column indices. * * @param p Pointer to a `tatami::Matrix`. * * @return A vector of length equal to the number of columns, containing the maximum value in each column. */ template std::vector column_maxs(const Matrix* p) { std::vector output(p->ncol()); stats::MaxFactory factory(output.data(), p->ncol(), p->nrow()); apply<1>(p, factory); return output; } /** * @tparam Output Type of the output value. * @tparam T Type of the matrix value. * @tparam IDX Type of the row/column indices. * * @param p Pointer to a `tatami::Matrix`. * * @return A vector of length equal to the number of rows, containing the maximum value in each row. */ template std::vector row_maxs(const Matrix* p) { std::vector output(p->nrow()); stats::MaxFactory factory(output.data(), p->nrow(), p->ncol()); apply<0>(p, factory); return output; } /** * @tparam Output Type of the output value. * @tparam T Type of the matrix value. * @tparam IDX Type of the row/column indices. * * @param p Pointer to a `tatami::Matrix`. * * @return A vector of length equal to the number of columns, containing the minimum value in each column. */ template std::vector column_mins(const Matrix* p) { std::vector output(p->ncol()); stats::MinFactory factory(output.data(), p->ncol(), p->nrow()); apply<1>(p, factory); return output; } /** * @tparam Output Type of the output value. * @tparam T Type of the matrix value. * @tparam IDX Type of the row/column indices. * * @param p Pointer to a `tatami::Matrix`. * * @return A vector of length equal to the number of rows, containing the minimum value in each row. */ template std::vector row_mins(const Matrix* p) { std::vector output(p->nrow()); stats::MinFactory factory(output.data(), p->nrow(), p->ncol()); apply<0>(p, factory); return output; } namespace stats { /** * @cond */ template struct RangeFactory { public: RangeFactory(O* min, O* max, size_t d1, size_t d2) : mins(min, d1, d2), maxs(max, d1, d2) {} private: MinFactory mins; MaxFactory maxs; public: struct DenseDirect { DenseDirect(typename MinFactory::DenseDirect mn, typename MaxFactory::DenseDirect mx) : mins(std::move(mn)), maxs(std::move(mx)) {} template void compute(size_t i, const V* ptr) { mins.compute(i, ptr); maxs.compute(i, ptr); return; } private: typename MinFactory::DenseDirect mins; typename MaxFactory::DenseDirect maxs; }; DenseDirect dense_direct() { return DenseDirect(mins.dense_direct(), maxs.dense_direct()); } public: struct SparseDirect { SparseDirect(typename MinFactory::SparseDirect mn, typename MaxFactory::SparseDirect mx) : mins(std::move(mn)), maxs(std::move(mx)) {} template void compute(size_t i, const SparseRange& range) { mins.compute(i, range); maxs.compute(i, range); return; } private: typename MinFactory::SparseDirect mins; typename MaxFactory::SparseDirect maxs; }; SparseDirect sparse_direct() { return SparseDirect(mins.sparse_direct(), maxs.sparse_direct()); } public: struct DenseRunning { DenseRunning(typename MinFactory::DenseRunning mn, typename MaxFactory::DenseRunning mx) : mins(std::move(mn)), maxs(std::move(mx)) {} template void add(const V* ptr) { mins.add(ptr); maxs.add(ptr); return; } void finish() {}; private: typename MinFactory::DenseRunning mins; typename MaxFactory::DenseRunning maxs; }; DenseRunning dense_running() { return DenseRunning(mins.dense_running(), maxs.dense_running()); } DenseRunning dense_running(size_t start, size_t end) { return DenseRunning(mins.dense_running(start, end), maxs.dense_running(start, end)); } public: struct SparseRunning { SparseRunning(typename MinFactory::SparseRunning mn, typename MaxFactory::SparseRunning mx) : mins(std::move(mn)), maxs(std::move(mx)) {} template void add(const SparseRange& range) { mins.add(range); maxs.add(range); return; } void finish() { mins.finish(); maxs.finish(); return; }; private: typename MinFactory::SparseRunning mins; typename MaxFactory::SparseRunning maxs; }; SparseRunning sparse_running() { return SparseRunning(mins.sparse_running(), maxs.sparse_running()); } SparseRunning sparse_running(size_t start, size_t end) { return SparseRunning(mins.sparse_running(start, end), maxs.sparse_running(start, end)); } }; /** * @endcond */ } /** * @tparam Output Type of the output value. * @tparam T Type of the matrix value. * @tparam IDX Type of the row/column indices. * * @param p Pointer to a `tatami::Matrix`. * * @return A pair of vectors, each of length equal to the number of rows. * The first and second vector contains the minimum and maximum value per row, respectively. */ template std::pair, std::vector > column_ranges(const Matrix* p) { std::vector mins(p->ncol()), maxs(p->ncol()); stats::RangeFactory factory(mins.data(), maxs.data(), p->ncol(), p->nrow()); apply<1>(p, factory); return std::make_pair(std::move(mins), std::move(maxs)); } /** * @tparam Output Type of the output value. * @tparam T Type of the matrix value. * @tparam IDX Type of the row/column indices. * * @param p Pointer to a `tatami::Matrix`. * * @return A pair of vectors, each of length equal to the number of rows. * The first and second vector contains the minimum and maximum value per row, respectively. */ template std::pair, std::vector > row_ranges(const Matrix* p) { std::vector mins(p->nrow()), maxs(p->nrow()); stats::RangeFactory factory(mins.data(), maxs.data(), p->nrow(), p->ncol()); apply<0>(p, factory); return std::make_pair(std::move(mins), std::move(maxs)); } } #endif