Skip to content

Commit

Permalink
GH-45190: [C++][Compute] Add rank_quantile function (#45259)
Browse files Browse the repository at this point in the history
### Rationale for this change

Add a "rank_quantile" function following the Wikipedia definition:
https://en.wikipedia.org/wiki/Percentile_rank

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes, an additional compute function.

* GitHub Issue: #45190

Lead-authored-by: Antoine Pitrou <[email protected]>
Co-authored-by: Rossi Sun <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
  • Loading branch information
3 people authored Jan 23, 2025
1 parent dc581f0 commit 17a0ff5
Show file tree
Hide file tree
Showing 6 changed files with 429 additions and 173 deletions.
11 changes: 11 additions & 0 deletions cpp/src/arrow/compute/api_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ using compute::DictionaryEncodeOptions;
using compute::FilterOptions;
using compute::NullPlacement;
using compute::RankOptions;
using compute::RankQuantileOptions;

template <>
struct EnumTraits<FilterOptions::NullSelectionBehavior>
Expand Down Expand Up @@ -151,6 +152,9 @@ static auto kRankOptionsType = GetFunctionOptionsType<RankOptions>(
DataMember("sort_keys", &RankOptions::sort_keys),
DataMember("null_placement", &RankOptions::null_placement),
DataMember("tiebreaker", &RankOptions::tiebreaker));
static auto kRankQuantileOptionsType = GetFunctionOptionsType<RankQuantileOptions>(
DataMember("sort_keys", &RankQuantileOptions::sort_keys),
DataMember("null_placement", &RankQuantileOptions::null_placement));
static auto kPairwiseOptionsType = GetFunctionOptionsType<PairwiseOptions>(
DataMember("periods", &PairwiseOptions::periods));
static auto kListFlattenOptionsType = GetFunctionOptionsType<ListFlattenOptions>(
Expand Down Expand Up @@ -228,6 +232,13 @@ RankOptions::RankOptions(std::vector<SortKey> sort_keys, NullPlacement null_plac
tiebreaker(tiebreaker) {}
constexpr char RankOptions::kTypeName[];

RankQuantileOptions::RankQuantileOptions(std::vector<SortKey> sort_keys,
NullPlacement null_placement)
: FunctionOptions(internal::kRankQuantileOptionsType),
sort_keys(std::move(sort_keys)),
null_placement(null_placement) {}
constexpr char RankQuantileOptions::kTypeName[];

PairwiseOptions::PairwiseOptions(int64_t periods)
: FunctionOptions(internal::kPairwiseOptionsType), periods(periods) {}
constexpr char PairwiseOptions::kTypeName[];
Expand Down
19 changes: 19 additions & 0 deletions cpp/src/arrow/compute/api_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,25 @@ class ARROW_EXPORT RankOptions : public FunctionOptions {
Tiebreaker tiebreaker;
};

/// \brief Quantile rank options
class ARROW_EXPORT RankQuantileOptions : public FunctionOptions {
public:
explicit RankQuantileOptions(std::vector<SortKey> sort_keys = {},
NullPlacement null_placement = NullPlacement::AtEnd);
/// Convenience constructor for array inputs
explicit RankQuantileOptions(SortOrder order,
NullPlacement null_placement = NullPlacement::AtEnd)
: RankQuantileOptions({SortKey("", order)}, null_placement) {}

static constexpr char const kTypeName[] = "RankQuantileOptions";
static RankQuantileOptions Defaults() { return RankQuantileOptions(); }

/// Column key(s) to order by and how to order by these sort keys.
std::vector<SortKey> sort_keys;
/// Whether nulls and NaNs are placed at the start or at the end
NullPlacement null_placement;
};

/// \brief Partitioning options for NthToIndices
class ARROW_EXPORT PartitionNthOptions : public FunctionOptions {
public:
Expand Down
Loading

0 comments on commit 17a0ff5

Please sign in to comment.