Skip to content

Commit

Permalink
Add Compressor serialization
Browse files Browse the repository at this point in the history
  • Loading branch information
kasperjj committed Aug 16, 2016
1 parent 2b1c9bf commit e5d9714
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 36 deletions.
8 changes: 5 additions & 3 deletions src/main/java/me/doubledutch/lazyjson/LazyNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,15 @@ protected long getLongValue(char[] source) throws LazyException{
* @throws LazyException if the value could not be parsed
*/
protected double getDoubleValue(char[] source) throws LazyException{
double d=0.0;
String str=getStringValue(source);
try{
double d=Double.parseDouble(str);
return d;
d=Double.parseDouble(str);
}catch(NumberFormatException nfe){
throw new LazyException("'"+str+"' is not a valid double",startIndex);
// This basically can't happen since we already validate the numeric format when parsing
// throw new LazyException("'"+str+"' is not a valid double",startIndex);
}
return d;
}

/**
Expand Down
52 changes: 48 additions & 4 deletions src/main/java/me/doubledutch/lazyjson/compressor/Compressor.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,13 @@ public class Compressor{
private LinkedHashMap<Template,Integer> slidingWindow;
private final int windowSize;
private int minRepetitions;
private boolean useDictionary;
private String prefix;
private boolean dirtyFlag=false;
private DictionaryCache dictionary;

public Compressor(String prefix,int windowSizeArg,int minRepetitions, boolean useDictionary){
public Compressor(String prefix,int windowSizeArg,int minRepetitions) throws IOException{
this.windowSize=windowSizeArg;
this.minRepetitions=minRepetitions;
this.useDictionary=useDictionary;
this.prefix=prefix;
// We are going to use a linked hash map to maintain our sliding window
slidingWindow=new LinkedHashMap<Template,Integer>(windowSize+1, .75F, false){
Expand All @@ -31,6 +29,7 @@ protected boolean removeEldestEntry(Map.Entry<Template,Integer> eldest){
}
};
dictionary=new DictionaryCache(windowSize,minRepetitions);
reloadState();
}

private boolean shouldCompress(Template t){
Expand Down Expand Up @@ -75,7 +74,11 @@ public byte[] compress(String str){
ByteBuffer buf=ByteBuffer.allocate(str.length()-2);
buf.putShort((short)templateSet.get(t));
elm.writeTemplateValues(buf,dictionary);
return buf.array();
int pos=buf.position();
buf.rewind();
byte[] result=new byte[pos];
buf.get(result);
return result;
}catch(BufferOverflowException boe){
// Compressed output larger than raw data
}
Expand Down Expand Up @@ -104,7 +107,48 @@ public String decompress(byte[] data){
return str;
}

private void reloadState() throws IOException{
File ftest=new File(prefix+".templates");
if(ftest.exists()){
DataInputStream in=new DataInputStream(new FileInputStream(prefix+".templates"));
nextTemplate=(short)in.readInt();
for(int i=0;i<nextTemplate;i++){
Template t=Template.fromDataInput(in);
templateSet.put(t,(short)i);
templateIdMap.put((short)i,t);
}
in.close();
}
ftest=new File(prefix+".dictionary");
if(ftest.exists()){
DataInputStream in=new DataInputStream(new FileInputStream(prefix+".dictionary"));
dictionary.fromDataInputStream(in);
in.close();
}
}

public void commit() throws IOException{
// Save state of templates and dictionary if needed
if(dirtyFlag){
DataOutputStream out=new DataOutputStream(new FileOutputStream(prefix+".templates-tmp"));
out.writeInt(nextTemplate);
for(Template t:templateSet.keySet()){
t.toDataOutput(out);
}
out.flush();
out.close();
File ftest=new File(prefix+".templates-tmp");
ftest.renameTo(new File(prefix+".templates"));
dirtyFlag=false;
}
// Save dictionary
if(dictionary.isDirty()){
DataOutputStream out=new DataOutputStream(new FileOutputStream(prefix+".dictionary-tmp"));
dictionary.toDataOutputStream(out);
out.flush();
out.close();
File ftest=new File(prefix+".dictionary-tmp");
ftest.renameTo(new File(prefix+".dictionary"));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,6 @@ public DictionaryCache(int windowSizeArg,int minRepetitions){
init();
}

/**
* Create a new dictionary with the given window size and given repetition
* requirement before new values are added to the dictionary.
* The dictionary is initialized with the data in the given byte array.
*
* @param dictdata the dictionary contents to initialize with
* @param windowSizeArg the size of the sliding window of values
* @param minRepetitions the number of times a value must be seen within the sliding window before its added to the dictionary
* @throws IOException if the data could not be read
*/
public DictionaryCache(byte[] dictdata, int windowSizeArg,int minRepetitions) throws IOException{
this.windowSize=windowSizeArg;
this.minRepetitions=minRepetitions;
init();
fromByteArray(dictdata);
}

/**
* Initializes the internal sliding window data structure.
*/
Expand Down Expand Up @@ -72,8 +55,7 @@ public void clearDirtyFlag(){
dirty=false;
}

private void fromByteArray(byte[] dictdata) throws IOException{
DataInputStream din=new DataInputStream(new ByteArrayInputStream(dictdata));
protected void fromDataInputStream(DataInputStream din) throws IOException{
next=(short)din.readInt();
for(int i=0;i<next;i++){
int val=0;
Expand All @@ -93,9 +75,7 @@ private void fromByteArray(byte[] dictdata) throws IOException{
}
}

public byte[] toByteArray() throws IOException{
ByteArrayOutputStream out=new ByteArrayOutputStream();
DataOutputStream dout=new DataOutputStream(out);
protected void toDataOutputStream(DataOutputStream dout) throws IOException{
dout.writeInt(next);
for(int i=0;i<next;i++){
String raw=data[i];
Expand All @@ -113,7 +93,6 @@ public byte[] toByteArray() throws IOException{
dout.write(encoded);
}
dout.flush();
return out.toByteArray();
}

/**
Expand Down
6 changes: 3 additions & 3 deletions src/main/resources/version.properties
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#Tue Aug 16 15:42:19 EDT 2016
#Tue Aug 16 18:32:29 EDT 2016
BUILD_VERSION=1.1.0
BUILD_DATE=2016-08-16T19\:42\:19Z
BUILD_NUMBER=518
BUILD_DATE=2016-08-16T22\:32\:29Z
BUILD_NUMBER=556
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package me.doubledutch.lazyjson.compressor;

import java.io.File;
import org.junit.*;
import static org.junit.Assert.*;
import java.util.List;
import java.util.ArrayList;
import java.net.*;
import java.nio.ByteBuffer;

import me.doubledutch.lazyjson.*;


public class CompressionTest{
@Test
public void testInOut() throws Exception{
String str="{\"foo\":42}";
Compressor c=new Compressor("./ctest",1000,0);
byte[] out=c.compress(str);
// System.out.println(out.length+" vs "+str.length());
String str2=c.decompress(out);
assertEquals(str,str2);
}

@Test
public void testSet() throws Exception{
Compressor c=new Compressor("./ctest",1000,3);
List<byte[]> list=new ArrayList<byte[]>();
for(int i=0;i<100;i++){
String str="{\"foo\":"+i+"}";
list.add(c.compress(str));
}
for(int i=0;i<100;i++){
String str=c.decompress(list.get(i));
LazyObject obj=new LazyObject(str);
assertEquals(i,obj.getInt("foo"));
}
}

@Test
public void testBadCompression() throws Exception{
String str="{\"foo\":[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]}";
Compressor c=new Compressor("./ctest",1000,0);
byte[] out=c.compress(str);
// System.out.println(out.length+" vs "+str.length());
String str2=c.decompress(out);
assertEquals(str,str2);
}

@Test
public void testCommitAndReload() throws Exception{
String str="{\"foo\":42,\"bar\":\"Hello World!\"}";
Compressor c=new Compressor("./ctest",10,0);
byte[] out=c.compress(str);
c.commit();
c=new Compressor("./ctest",10,0);
// System.out.println(out.length+" vs "+str.length());
String str2=c.decompress(out);
assertEquals(str,str2);
File ftest=new File("./ctest.templates");
ftest.delete();
ftest=new File("./ctest.dictionary");
ftest.delete();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,21 @@ public void addValuesImmediately(){
assertEquals(i2,d.get("bar"));
assertEquals(i3,d.get("baz"));
}

@Test
public void serialize() throws IOException{
DictionaryCache d=new DictionaryCache(1000,0);
int i1=d.put("foo");
int i2=d.put("bar");
int i3=d.put("baz");
int i4=d.put("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789");
byte[] buf=d.toByteArray();
DictionaryCache d2=new DictionaryCache(buf,1000,0);
ByteArrayOutputStream out=new ByteArrayOutputStream();
DataOutputStream dout=new DataOutputStream(out);
d.toDataOutputStream(dout);
byte[] bytes=out.toByteArray();
DictionaryCache d2=new DictionaryCache(1000,0);
DataInputStream din=new DataInputStream(new ByteArrayInputStream(bytes));
d2.fromDataInputStream(din);
assertEquals(i1,d2.get("foo"));
assertEquals(i2,d2.get("bar"));
assertEquals(i3,d2.get("baz"));
Expand Down

0 comments on commit e5d9714

Please sign in to comment.