typelevel · cchantep · Sep 6, 2021 · Aug 8, 2021 · Aug 8, 2021 · Aug 9, 2021
diff --git a/README.md b/README.md
@@ -26,16 +26,17 @@ associated channels (e.g. GitHub, Discord) to be a safe and friendly environment
 The compatible versions of [Spark](http://spark.apache.org/) and 
 [cats](https://github.com/typelevel/cats) are as follows:   
 
-| Frameless  | Spark | Cats | Cats-Effect | Scala | 
-| --- | --- | --- | --- | --- |
-| 0.4.0  | 2.2.0  | 1.0.0-IF | 0.4 | 2.11
-| 0.4.1  | 2.2.0  | 1.x | 0.8 | 2.11
-| 0.5.2  | 2.2.1  | 1.x | 0.8 | 2.11
-| 0.6.1  | 2.3.0  | 1.x | 0.8 | 2.11
-| 0.7.0  | 2.3.1  | 1.x | 1.x | 2.11
-| 0.8.0  | 2.4.0  | 1.x | 1.x | 2.11/2.12
-| 0.9.0  | 3.0.0  | 1.x | 1.x | 2.12
-| 0.10.1  | 3.1.0  | 2.x | 2.x | 2.12
+| Frameless | Spark | Cats     | Cats-Effect | Scala
+| --------- | ----- | -------- | ----------- | ---
+| 0.4.0     | 2.2.0 | 1.0.0-IF | 0.4         | 2.11
+| 0.4.1     | 2.2.0 | 1.x      | 0.8         | 2.11
+| 0.5.2     | 2.2.1 | 1.x      | 0.8         | 2.11
+| 0.6.1     | 2.3.0 | 1.x      | 0.8         | 2.11
+| 0.7.0     | 2.3.1 | 1.x      | 1.x         | 2.11
+| 0.8.0     | 2.4.0 | 1.x      | 1.x         | 2.11/2.12
+| 0.9.0     | 3.0.0 | 1.x      | 1.x         | 2.12
+| 0.10.1    | 3.1.0 | 2.x      | 2.x         | 2.12
+| 0.11.0    | 3.1.0 | 2.x      | 2.x         | 2.12
 
 
 Versions 0.5.x and 0.6.x have identical features. The first is compatible with Spark 2.2.1 and the second with 2.3.0. 

diff --git a/build.sbt b/build.sbt
@@ -50,13 +50,31 @@ lazy val cats = project
 
 lazy val dataset = project
   .settings(name := "frameless-dataset")
-  .settings(framelessSettings: _*)
-  .settings(framelessTypedDatasetREPL: _*)
-  .settings(publishSettings: _*)
-  .settings(libraryDependencies ++= Seq(
-    "org.apache.spark" %% "spark-core"      % sparkVersion % Provided,
-    "org.apache.spark" %% "spark-sql"       % sparkVersion % Provided,
-    "net.ceedubs"      %% "irrec-regex-gen" % irrecVersion % Test
+  .settings(framelessSettings)
+  .settings(framelessTypedDatasetREPL)
+  .settings(publishSettings)
+  .settings(Seq(
+    libraryDependencies ++= Seq(
+      "org.apache.spark" %% "spark-core"      % sparkVersion % Provided,
+      "org.apache.spark" %% "spark-sql"       % sparkVersion % Provided,
+      "net.ceedubs"      %% "irrec-regex-gen" % irrecVersion % Test
+    ),
+    mimaBinaryIssueFilters ++= {
+      import com.typesafe.tools.mima.core._
+
+      val imt = ProblemFilters.exclude[IncompatibleMethTypeProblem](_)
+      val mc = ProblemFilters.exclude[MissingClassProblem](_)
+      val dmm = ProblemFilters.exclude[DirectMissingMethodProblem](_)
+
+      // TODO: Remove have version bump
+      Seq(
+        imt("frameless.RecordEncoderFields.deriveRecordCons"),
+        imt("frameless.RecordEncoderFields.deriveRecordLast"),
+        mc("frameless.functions.FramelessLit"),
+        mc(f"frameless.functions.FramelessLit$$"),
+        dmm("frameless.functions.package.litAggr")
+      )
+    }
   ))
   .dependsOn(core % "test->test;compile->compile")
 

diff --git a/dataset/src/main/scala/frameless/IsValueClass.scala b/dataset/src/main/scala/frameless/IsValueClass.scala
@@ -0,0 +1,17 @@
+package frameless
+
+import shapeless._
+import shapeless.labelled.FieldType
+
+/** Evidence that `T` is a Value class */
+@annotation.implicitNotFound(msg = "${T} is not a Value class")
+final class IsValueClass[T] private() {}
+
+object IsValueClass {
+  /** Provides an evidence `A` is a Value class */
+  implicit def apply[A <: AnyVal, G <: ::[_, HNil], H <: ::[_ <: FieldType[_ <: Symbol, _], HNil]](
+    implicit
+      i0: LabelledGeneric.Aux[A, G],
+    i1: DropUnitValues.Aux[G, H]): IsValueClass[A] = new IsValueClass[A]
+
+}
diff --git a/dataset/src/main/scala/frameless/RecordEncoder.scala b/dataset/src/main/scala/frameless/RecordEncoder.scala
@@ -2,11 +2,15 @@ package frameless
 
 import org.apache.spark.sql.FramelessInternals
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, NewInstance}
+import org.apache.spark.sql.catalyst.expressions.objects.{
+  Invoke, NewInstance, UnwrapOption, WrapOption
+}
 import org.apache.spark.sql.types._
+
 import shapeless._
 import shapeless.labelled.FieldType
 import shapeless.ops.hlist.IsHCons
+import shapeless.ops.record.Keys
 
 import scala.reflect.ClassTag
 
@@ -25,24 +29,22 @@ object RecordEncoderFields {
   implicit def deriveRecordLast[K <: Symbol, H]
     (implicit
       key: Witness.Aux[K],
-      head: TypedEncoder[H]
+      head: RecordFieldEncoder[H]
     ): RecordEncoderFields[FieldType[K, H] :: HNil] = new RecordEncoderFields[FieldType[K, H] :: HNil] {
-      def value: List[RecordEncoderField] = RecordEncoderField(0, key.value.name, head) :: Nil
+      def value: List[RecordEncoderField] = fieldEncoder[K, H] :: Nil
     }
 
   implicit def deriveRecordCons[K <: Symbol, H, T <: HList]
     (implicit
       key: Witness.Aux[K],
-      head: TypedEncoder[H],
+      head: RecordFieldEncoder[H],
       tail: RecordEncoderFields[T]
     ): RecordEncoderFields[FieldType[K, H] :: T] = new RecordEncoderFields[FieldType[K, H] :: T] {
-      def value: List[RecordEncoderField] = {
-        val fieldName = key.value.name
-        val fieldEncoder = RecordEncoderField(0, fieldName, head)
+      def value: List[RecordEncoderField] =
+        fieldEncoder[K, H] :: tail.value.map(x => x.copy(ordinal = x.ordinal + 1))
+  }
 
-        fieldEncoder :: tail.value.map(x => x.copy(ordinal = x.ordinal + 1))
-      }
-    }
+  private def fieldEncoder[K <: Symbol, H](implicit key: Witness.Aux[K], e: RecordFieldEncoder[H]): RecordEncoderField = RecordEncoderField(0, key.value.name, e.encoder)
 }
 
 /**
@@ -156,6 +158,7 @@ class RecordEncoder[F, G <: HList, H <: HList]
 
       val createExpr = CreateNamedStruct(exprs)
       val nullExpr = Literal.create(null, createExpr.dataType)
+
       If(IsNull(path), nullExpr, createExpr)
     }
 
@@ -168,6 +171,86 @@ class RecordEncoder[F, G <: HList, H <: HList]
       val newExpr = NewInstance(classTag.runtimeClass, newArgs, jvmRepr, propagateNull = true)
 
       val nullExpr = Literal.create(null, jvmRepr)
+
       If(IsNull(path), nullExpr, newExpr)
     }
 }
+
+final class RecordFieldEncoder[T](
+  val encoder: TypedEncoder[T]) extends Serializable
+
+object RecordFieldEncoder extends RecordFieldEncoderLowPriority {
+
+  /**
+   * @tparam F the value class
+   * @tparam G the single field of the value class
+   * @tparam H the single field of the value class (with guarantee it's not a `Unit` value)
+   * @tparam K the key type for the fields
+   * @tparam V the inner value type
+   */
+  implicit def optionValueClass[F : IsValueClass, G <: ::[_, HNil], H <: ::[_ <: FieldType[_ <: Symbol, _], HNil], K <: Symbol, V, KS <: ::[_ <: Symbol, HNil]]
+    (implicit
+      i0: LabelledGeneric.Aux[F, G],
+      i1: DropUnitValues.Aux[G, H],
+      i2: IsHCons.Aux[H, _ <: FieldType[K, V], HNil],
+      i3: Keys.Aux[H, KS],
+      i4: IsHCons.Aux[KS, K, HNil],
+      i5: TypedEncoder[V],
+      i6: ClassTag[F]
+    ): RecordFieldEncoder[Option[F]] = RecordFieldEncoder[Option[F]](new TypedEncoder[Option[F]] {
+      val nullable = true
+
+      val jvmRepr = ObjectType(classOf[Option[F]])
+
+      @inline def catalystRepr: DataType = i5.catalystRepr
+
+      val innerJvmRepr = ObjectType(i6.runtimeClass)
+
+      def fromCatalyst(path: Expression): Expression = {
+        val javaValue = i5.fromCatalyst(path)
+        val value = NewInstance(i6.runtimeClass, Seq(javaValue), innerJvmRepr)
+
+        WrapOption(value, innerJvmRepr)
+      }
+
+      @inline def toCatalyst(path: Expression): Expression = {
+        val value = UnwrapOption(innerJvmRepr, path)
+
+        val fieldName = i4.head(i3()).name
+        val javaValue = Invoke(value, fieldName, i5.jvmRepr, Nil)
+
+        i5.toCatalyst(javaValue)
+      }
+    })
+
+  /**
+   * @tparam F the value class
+   * @tparam G the single field of the value class
+   * @tparam H the single field of the value class (with guarantee it's not a `Unit` value)
+   * @tparam V the inner value type
+   */
+  implicit def valueClass[F : IsValueClass, G <: ::[_, HNil], H <: ::[_, HNil], V]
+    (implicit
+      i0: LabelledGeneric.Aux[F, G],
+      i1: DropUnitValues.Aux[G, H],
+      i2: IsHCons.Aux[H, _ <: FieldType[_, V], HNil],
+      i3: TypedEncoder[V],
+      i4: ClassTag[F]
+    ): RecordFieldEncoder[F] = RecordFieldEncoder[F](new TypedEncoder[F] {
+      def nullable = i3.nullable
+
+      def jvmRepr = i3.jvmRepr
+
+      def catalystRepr: DataType = i3.catalystRepr
+
+      def fromCatalyst(path: Expression): Expression =
+        i3.fromCatalyst(path)
+
+      @inline def toCatalyst(path: Expression): Expression =
+        i3.toCatalyst(path)
+    })
+}
+
+private[frameless] sealed trait RecordFieldEncoderLowPriority {
+  implicit def apply[T](implicit e: TypedEncoder[T]): RecordFieldEncoder[T] = new RecordFieldEncoder[T](e)
+}
diff --git a/dataset/src/main/scala/frameless/TypedColumn.scala b/dataset/src/main/scala/frameless/TypedColumn.scala
@@ -33,7 +33,7 @@ sealed class TypedColumn[T, U](expr: Expression)(
 
   override def typed[W, U1: TypedEncoder](c: Column): TypedColumn[W, U1] = c.typedColumn
 
-  override def lit[U1: TypedEncoder](c: U1): TypedColumn[T,U1] = flit(c)
+  override def lit[U1: TypedEncoder](c: U1): TypedColumn[T, U1] = flit(c)
 }
 
 /** Expression used in `agg`-like constructions.
@@ -49,6 +49,7 @@ sealed class TypedAggregate[T, U](expr: Expression)(
   }
 
   override def typed[W, U1: TypedEncoder](c: Column): TypedAggregate[W, U1] = c.typedAggregate
+
   override def lit[U1: TypedEncoder](c: U1): TypedAggregate[T, U1] = litAggr(c)
 }
 
@@ -835,7 +836,8 @@ abstract class AbstractTypedColumn[T, U]
   /**
     * Returns a nested column matching the field `symbol`.
     * 
-    * @param V the type of the nested field
+    * @param symbol the field symbol
+    * @tparam V the type of the nested field
     */
   def field[V](symbol: Witness.Lt[Symbol])(implicit
       i0: TypedColumn.Exists[U, symbol.T, V],

diff --git a/dataset/src/main/scala/frameless/TypedExpressionEncoder.scala b/dataset/src/main/scala/frameless/TypedExpressionEncoder.scala
@@ -10,30 +10,33 @@ object TypedExpressionEncoder {
 
   /** In Spark, DataFrame has always schema of StructType
     *
-    * DataFrames of primitive types become records with a single field called "value" set in ExpressionEncoder.
+    * DataFrames of primitive types become records 
+    * with a single field called "value" set in ExpressionEncoder.
     */
-  def targetStructType[A](encoder: TypedEncoder[A]): StructType = {
+  def targetStructType[A](encoder: TypedEncoder[A]): StructType =
    encoder.catalystRepr match {
       case x: StructType =>
         if (encoder.nullable) StructType(x.fields.map(_.copy(nullable = true)))
         else x
+
       case dt => new StructType().add("value", dt, nullable = encoder.nullable)
     }
-  }
 
-  def apply[T: TypedEncoder]: Encoder[T] = {
-    val encoder = TypedEncoder[T]
+  def apply[T](implicit encoder: TypedEncoder[T]): Encoder[T] = {
     val in = BoundReference(0, encoder.jvmRepr, encoder.nullable)
 
     val (out, serializer) = encoder.toCatalyst(in) match {
-      case it @ If(_, _, _: CreateNamedStruct) =>
+      case it @ If(_, _, _: CreateNamedStruct) => {
         val out = GetColumnByOrdinal(0, encoder.catalystRepr)
 
-        (out, it)
-      case other =>
+        out -> it
+      }
+
+      case other => {
         val out = GetColumnByOrdinal(0, encoder.catalystRepr)
 
-       (out, other)
+        out -> other
+      }
     }
 
     new ExpressionEncoder[T](

diff --git a/dataset/src/main/scala/frameless/functions/Lit.scala b/dataset/src/main/scala/frameless/functions/Lit.scala
@@ -1,30 +1,34 @@
 package frameless.functions
 
-import frameless.TypedEncoder
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen._
-import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, NonSQLExpression}
+import org.apache.spark.sql.catalyst.expressions.{Expression, NonSQLExpression}
 import org.apache.spark.sql.types.DataType
 
-case class FramelessLit[A](obj: A, encoder: TypedEncoder[A]) extends Expression with NonSQLExpression {
-  override def nullable: Boolean = encoder.nullable
-  override def toString: String = s"FramelessLit($obj)"
+private[frameless] case class Lit[T <: AnyVal](
+    dataType: DataType,
+    nullable: Boolean,
+    toCatalyst: CodegenContext => ExprCode,
+    show: () => String)
+    extends Expression
+    with NonSQLExpression {
+  override def toString: String = s"FramelessLit(${show()})"
 
   def eval(input: InternalRow): Any = {
     val ctx = new CodegenContext()
     val eval = genCode(ctx)
 
     val codeBody = s"""
       public scala.Function1<InternalRow, Object> generate(Object[] references) {
-        return new FramelessLitEvalImpl(references);
+        return new LiteralEvalImpl(references);
       }
 
-      class FramelessLitEvalImpl extends scala.runtime.AbstractFunction1<InternalRow, Object> {
+      class LiteralEvalImpl extends scala.runtime.AbstractFunction1<InternalRow, Object> {
         private final Object[] references;
         ${ctx.declareMutableStates()}
         ${ctx.declareAddedFunctions()}
 
-        public FramelessLitEvalImpl(Object[] references) {
+        public LiteralEvalImpl(Object[] references) {
           this.references = references;
           ${ctx.initMutableStates()}
         }
@@ -38,20 +42,19 @@ case class FramelessLit[A](obj: A, encoder: TypedEncoder[A]) extends Expression
     """
 
     val code = CodeFormatter.stripOverlappingComments(
-      new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()))
+      new CodeAndComment(codeBody, ctx.getPlaceHolderToComments())
+    )
 
     val (clazz, _) = CodeGenerator.compile(code)
-    val codegen = clazz.generate(ctx.references.toArray).asInstanceOf[InternalRow => AnyRef]
+    val codegen =
+      clazz.generate(ctx.references.toArray).asInstanceOf[InternalRow => AnyRef]
 
     codegen(input)
   }
 
-  def dataType: DataType = encoder.catalystRepr
   def children: Seq[Expression] = Nil
 
-  override def genCode(ctx: CodegenContext): ExprCode = {
-    encoder.toCatalyst(new Literal(obj, encoder.jvmRepr)).genCode(ctx)
-  }
+  override def genCode(ctx: CodegenContext): ExprCode = toCatalyst(ctx)
 
   protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = ???
 }