Skip to content

Commit

Permalink
[SPARK-48259][CONNECT][TESTS] Add 3 missing methods in dsl
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Add 3 missing methods in dsl

### Why are the changes needed?
those methods could be used in tests

### Does this PR introduce _any_ user-facing change?
no, test only

### How was this patch tested?
ci

### Was this patch authored or co-authored using generative AI tooling?
no

Closes apache#46559 from zhengruifeng/missing_3_func.

Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
  • Loading branch information
zhengruifeng committed May 14, 2024
1 parent d9ff78e commit 28cf3db
Showing 1 changed file with 27 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,25 @@ package object dsl {
freqItems(cols.toArray, support)

def freqItems(cols: Seq[String]): Relation = freqItems(cols, 0.01)

def sampleBy(col: String, fractions: Map[Any, Double], seed: Long): Relation = {
Relation
.newBuilder()
.setSampleBy(
StatSampleBy
.newBuilder()
.setInput(logicalPlan)
.addAllFractions(fractions.toSeq.map { case (k, v) =>
StatSampleBy.Fraction
.newBuilder()
.setStratum(toLiteralProto(k))
.setFraction(v)
.build()
}.asJava)
.setSeed(seed)
.build())
.build()
}
}

def select(exprs: Expression*): Relation = {
Expand Down Expand Up @@ -587,6 +606,10 @@ package object dsl {
.build()
}

def filter(condition: Expression): Relation = {
where(condition)
}

def deduplicate(colNames: Seq[String]): Relation =
Relation
.newBuilder()
Expand Down Expand Up @@ -641,6 +664,10 @@ package object dsl {
join(otherPlan, joinType, usingColumns, None)
}

def crossJoin(otherPlan: Relation): Relation = {
join(otherPlan, JoinType.JOIN_TYPE_CROSS, Seq(), None)
}

private def join(
otherPlan: Relation,
joinType: JoinType = JoinType.JOIN_TYPE_INNER,
Expand Down

0 comments on commit 28cf3db

Please sign in to comment.