Skip to content

Commit

Permalink
deduplication, opts
Browse files Browse the repository at this point in the history
  • Loading branch information
pshirshov committed Sep 9, 2024
1 parent 77b11d0 commit 28af3a3
Show file tree
Hide file tree
Showing 10 changed files with 212 additions and 229 deletions.
4 changes: 2 additions & 2 deletions json-sick-scala/src/main/scala/izumi/sick/SICK.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ import izumi.sick.indexes.{IndexRO, IndexRW, SICKSettings}
import izumi.sick.model.Ref

trait SICK {
def pack(json: Json, name: String, settings: SICKSettings = SICKSettings.default): EBA = {
def pack(json: Json, name: String, dedup: Boolean, settings: SICKSettings = SICKSettings.default): EBA = {
import izumi.sick.sickcirce.CirceTraverser.*
val rwIndex = IndexRW()
val rwIndex = IndexRW(dedup = dedup)
val root = rwIndex.append(name, json)
EBA(rwIndex.freeze(settings), root, rwIndex)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ final class IndexRO(
val roots: RefTableRO[Root],
) {
def findRoot(str: String): Option[Root] = {
roots.asSeq.find(r => strings(r.id) == str)
roots.asIterable.find(r => strings(r.id) == str)
}

def summary: String =
Expand All @@ -41,6 +41,7 @@ final class IndexRO(

def packFile(f: Path): Packed = {
val out = new FileOutputStream(f.toFile, false)

try {
val chan = out.getChannel
chan.truncate(0)
Expand Down
51 changes: 26 additions & 25 deletions json-sick-scala/src/main/scala/izumi/sick/indexes/IndexRW.scala
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
package izumi.sick.indexes

import izumi.sick.model
import izumi.sick.model._
import izumi.sick.tables.RefTableRW
import izumi.sick.model.*
import izumi.sick.tables.{DeduplicatingRefTableBuilder, GenericRefTableBuilder}

object IndexRW {
def apply(): IndexRW = {
val strings = RefTableRW[String]("Strings")
def apply(dedup: Boolean): IndexRW = {
val strings = GenericRefTableBuilder[String]("Strings", dedup = true)

val ints = RefTableRW[Int]("Integers")
val longs = RefTableRW[Long]("Longs")
val bigints = RefTableRW[BigInt]("Bigints")
val ints = GenericRefTableBuilder[Int]("Integers", dedup = true)
val longs = GenericRefTableBuilder[Long]("Longs", dedup = true)
val bigints = GenericRefTableBuilder[BigInt]("Bigints", dedup = true)

val floats = RefTableRW[Float]("Floats")
val doubles = RefTableRW[Double]("Doubles")
val bigDecimals = RefTableRW[BigDecimal]("BigDecs")
val floats = GenericRefTableBuilder[Float]("Floats", dedup = true)
val doubles = GenericRefTableBuilder[Double]("Doubles", dedup = true)
val bigDecimals = GenericRefTableBuilder[BigDecimal]("BigDecs", dedup = true)

val arrs = GenericRefTableBuilder[Arr]("Arrays", dedup)
val objs = GenericRefTableBuilder[Obj]("Objects", dedup)
val roots = GenericRefTableBuilder[Root]("Roots", dedup)

val arrs = RefTableRW[Arr]("Arrays")
val objs = RefTableRW[Obj]("Objects")
val roots = RefTableRW[Root]("Roots")
new IndexRW(
strings,
ints,
Expand All @@ -34,16 +35,16 @@ object IndexRW {
}
}
class IndexRW private (
strings: RefTableRW[String],
ints: RefTableRW[Int],
longs: RefTableRW[Long],
bigints: RefTableRW[BigInt],
floats: RefTableRW[Float],
doubles: RefTableRW[Double],
bigDecimals: RefTableRW[BigDecimal],
arrs: RefTableRW[Arr],
objs: RefTableRW[Obj],
roots: RefTableRW[Root],
strings: GenericRefTableBuilder[String],
ints: GenericRefTableBuilder[Int],
longs: GenericRefTableBuilder[Long],
bigints: GenericRefTableBuilder[BigInt],
floats: GenericRefTableBuilder[Float],
doubles: GenericRefTableBuilder[Double],
bigDecimals: GenericRefTableBuilder[BigDecimal],
arrs: GenericRefTableBuilder[Arr],
objs: GenericRefTableBuilder[Obj],
roots: GenericRefTableBuilder[Root],
) {

def freeze(settings: SICKSettings): IndexRO = {
Expand All @@ -67,12 +68,12 @@ class IndexRW private (
}

/*def rebuild(): IndexRW = {
def rebuildSimpleTable[V](table: RefTableRW[V], tpe: RefKind) = {
def rebuildSimpleTable[V](table: GenericRefTableBuilder[V], tpe: RefKind) = {
val data = table.enumerate().toSeq.zipWithIndex.map {
case ((originalRef, (target)), newRef) =>
(originalRef, (newRef, target))
}
val updated = RefTableRW.fromMonotonic(table.name, data.map(_._2))
val updated = GenericRefTableBuilder.fromMonotonic(table.name, data.map(_._2))
(updated, data.map { case (origRef, (newRef, _)) => Ref(tpe, origRef) -> Ref(tpe, newRef) }.toMap)
}
Expand Down
18 changes: 13 additions & 5 deletions json-sick-scala/src/main/scala/izumi/sick/model/ToBytes.scala
Original file line number Diff line number Diff line change
Expand Up @@ -149,14 +149,20 @@ object ToBytes {
}

override def write(stream: FileOutputStream, table: RefTableRO[T]): Long = {
val before = stream.getChannel.position()
stream.write(table.size.bytes.toArray)
// val before = stream.getChannel.position()

val sz = table.size.bytes
var added: Long = sz.length
stream.write(sz.toArray)
table.forEach {
s =>
stream.write(s.bytes.toArray)
val el = s.bytes
stream.write(el.toArray)
added += el.size
}
val after = stream.getChannel.position()
after - before
// val after = stream.getChannel.position()
// assert(after - before == added)
added
}
}

Expand Down Expand Up @@ -187,7 +193,9 @@ object ToBytes {
stream.write(lastOffset.bytes.toArray)

assert(afterHeader == stream.getChannel.position())

stream.getChannel.position(after)

after - before
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package izumi.sick.tables

import izumi.sick.model.Ref.RefVal

import scala.collection.mutable

class DeduplicatingRefTableBuilder[V](
val name: String,
reverse: mutable.HashMap[V, RefVal],
) extends GenericRefTableBuilder[V] {

private var count = 0

def insert(v: V): RefVal = {
reverse.get(v) match {
case Some(value) =>
value
case None =>
val k = count
reverse.put(v, k)
count += 1
k
}
}

def enumerate(): Map[RefVal, V] = {
reverse.map(_.swap).toMap
}

def isEmpty: Boolean = reverse.isEmpty

def size: Int = reverse.size

def freeze(): RefTableRO[V] = new RefTableRO[V](name, reverse.map(_.swap).toMap)

def rewrite(mapping: V => V): DeduplicatingRefTableBuilder[V] = {
new DeduplicatingRefTableBuilder[V](
name,
reverse.view.map { case (k, v) => mapping(k) -> v }.to(mutable.HashMap),
)
}

override def toString: String = {
s"""$name:
|${reverse.map { case (v, k) => s"$k --> $v" }.mkString("\n")}""".stripMargin
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package izumi.sick.tables

import izumi.sick.model.Ref.RefVal

import scala.collection.mutable

trait GenericRefTableBuilder[V] {
def name: String
def insert(v: V): RefVal
def enumerate(): Map[RefVal, V]
def isEmpty: Boolean
def size: Int
def freeze(): RefTableRO[V]
def rewrite(mapping: V => V): GenericRefTableBuilder[V]
}

object GenericRefTableBuilder {
def apply[V](name: String, dedup: Boolean): GenericRefTableBuilder[V] = {
if (dedup) {
val reverse = mutable.HashMap.empty[V, RefVal]
new DeduplicatingRefTableBuilder[V](name, reverse)
} else {
val content = mutable.HashMap.empty[RefVal, V]
new QuickRefTableBuilder[V](name, content)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package izumi.sick.tables

import izumi.sick.model.Ref.RefVal

import scala.collection.mutable

class QuickRefTableBuilder[V](
val name: String,
content: mutable.HashMap[RefVal, V],
) extends GenericRefTableBuilder[V] {

private var count = 0

def insert(v: V): RefVal = {
val k = count
content.put(k, v)
count += 1
k
}

def enumerate(): Map[RefVal, V] = {
content.toMap
}

def isEmpty: Boolean = content.isEmpty

def size: Int = content.size

def freeze(): RefTableRO[V] = new RefTableRO[V](name, content.toMap)

def rewrite(mapping: V => V): QuickRefTableBuilder[V] = {
new QuickRefTableBuilder[V](
name,
content.view.map { case (k, v) => k -> mapping(v) }.to(mutable.HashMap),
)
}

override def toString: String = {
s"""$name:
|${content.map { case (k, v) => s"$k --> $v" }.mkString("\n")}""".stripMargin
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ class RefTableRO[V](val name: String, val data: Map[RefVal, V]) {

def size: RefVal = data.size

def asSeq: Seq[V] = {
(0 until data.size).map(data)
@inline final def asIterable: Iterable[V] = {
data.view.values
}

def forEach(f: V => Unit): Unit = {
(0 until data.size).foreach(i => f(data(i)))
@inline final def forEach(f: V => Unit): Unit = {
asIterable.foreach(f)
}

override def toString: String = {
Expand Down
110 changes: 0 additions & 110 deletions json-sick-scala/src/main/scala/izumi/sick/tables/RefTableRW.scala

This file was deleted.

Loading

0 comments on commit 28af3a3

Please sign in to comment.