Skip to content

Commit

Permalink
feat: Adds support for 0x, X'...', x'...' type hex strings in udf:enc…
Browse files Browse the repository at this point in the history
…ode (#6118)
  • Loading branch information
cprasad1 authored Sep 3, 2020
1 parent ba88f2a commit d492556
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ public String encode(
return ENCODER_MAP.get(encodedString).apply(str);
}


interface Encoder {
String apply(String input) throws KsqlFunctionException;
}
Expand All @@ -85,7 +84,11 @@ static class HexToAscii implements Encoder {
@Override
public String apply(final String input) {
try {
final byte[] decoded = Hex.decodeHex(input);
//strip away "Ox" from front or "X\'" + "\'" from front or back of hex if present
final String processedInput;
processedInput = hexStrip(input);

final byte[] decoded = Hex.decodeHex(processedInput);
return new String(decoded, StandardCharsets.US_ASCII);
} catch (DecoderException e) {
throw new KsqlFunctionException(e.getMessage());
Expand All @@ -98,8 +101,11 @@ static class HexToBase64 implements Encoder {
@Override
public String apply(final String input) throws KsqlFunctionException {
final byte[] decodedHex;
//strip away "Ox" from front or "X\'" + "\'" from front and back of hex if present
final String processedInput;
processedInput = hexStrip(input);
try {
decodedHex = Hex.decodeHex(input);
decodedHex = Hex.decodeHex(processedInput);
} catch (DecoderException e) {
throw new KsqlFunctionException(e.getMessage());
}
Expand All @@ -114,8 +120,11 @@ static class HexToUtf8 implements Encoder {
@Override
public String apply(final String input) throws KsqlFunctionException {
final byte[] decodedHex;
//strip away "Ox" from front or "X\'" + "\'" from front and back of hex if present
final String processedInput;
processedInput = hexStrip(input);
try {
decodedHex = Hex.decodeHex(input);
decodedHex = Hex.decodeHex(processedInput);
} catch (DecoderException e) {
throw new KsqlFunctionException(e.getMessage());
}
Expand Down Expand Up @@ -203,4 +212,28 @@ public String apply(final String input) throws KsqlFunctionException {
return new String(decodedB64, StandardCharsets.US_ASCII);
}
}

/**
Strips away the "0x" from hex of type "0xAB79" and
strips away the "X\'" from front and "\'" from end of hex of type "X'AB79'".
Leaves every other type of hex (like AB79) untouched
@param hexString unstripped hex String
@return the string after removing
*/
public static String hexStrip(final String hexString) {
final int hexLen = hexString.length();

if (hexString.matches("0x.*")) {
//matches with things like "0x" and "0x...."

//add an extra "0" to the front if there are odd number of digits
return hexLen % 2 != 0 ? "0" + hexString.substring(2) : hexString.substring(2);
} else if (hexString.matches("(x|X)\'.*\'")) {
//matches with things like "x''", "X''", "x'....'" and "X'....'"
return hexString.substring(2, hexLen - 1);
} else {
return hexString;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static org.hamcrest.Matchers.nullValue;

import io.confluent.ksql.function.KsqlFunctionException;
import org.junit.Assert;
import org.junit.Test;

public class EncodeTest {
Expand All @@ -41,6 +42,20 @@ public void shouldEncodeHexToAscii() {
assertThat(udf.encode("31202b2031203d2031", "hex", "ascii"), is("1 + 1 = 1"));
assertThat(udf.encode("ce95cebbcebbceacceb4ceb1", "hex", "ascii"), is("������������"));
assertThat(udf.encode("c39c6265726d656e736368", "hex", "ascii"), is("��bermensch"));

assertThat(udf.encode("0x48656c6c6f20576f726c6421", "hex", "ascii"), is("Hello World!"));
assertThat(udf.encode("0x9", "hex", "ascii"), is("\t"));
assertThat(udf.encode("0x", "hex", "ascii"), is(""));
assertThat(udf.encode("X'436c6f7564792a7e2a3f'", "hex", "ascii"), is("Cloudy*~*?"));
assertThat(udf.encode("x'4578616d706C6521'", "hex", "ascii"), is("Example!"));

assertThat(udf.encode("X''", "hex", "ascii"), is(""));
assertThat(udf.encode("x''", "hex", "ascii"), is(""));
assertThat(udf.encode("0x578616d706C6521", "hex", "ascii"), is("\u0005xample!"));
Assert.assertThrows(KsqlFunctionException.class, () -> udf.encode("578616d706C6521", "hex", "ascii"));
Assert.assertThrows(KsqlFunctionException.class, () -> udf.encode("X'578616d706C6521'", "hex", "ascii"));
Assert.assertThrows(KsqlFunctionException.class, () -> udf.encode("x'578616d706C6521'", "hex", "ascii"));

}

@Test
Expand All @@ -51,6 +66,13 @@ public void shouldEncodeHexToUtf8() {
assertThat(udf.encode("ce95cebbcebbceacceb4ceb1", "hex", "utf8"), is("Ελλάδα"));
assertThat(udf.encode("c39c6265726d656e736368", "hex", "utf8"), is("Übermensch"));

assertThat(udf.encode("0x4578616d706c6521", "hex", "utf8"), is("Example!"));
assertThat(udf.encode("0x", "hex", "utf8"), is(""));
assertThat(udf.encode("X'506C6174666F726D2D7C5F5F5F5F5F7C2D'", "hex", "utf8"), is("Platform-|_____|-"));
assertThat(udf.encode("x'31202b2031203d2031'", "hex", "utf8"), is("1 + 1 = 1"));

assertThat(udf.encode("X''", "hex", "utf8"), is(""));
assertThat(udf.encode("x''", "hex", "utf8"), is(""));
}

@Test
Expand All @@ -61,6 +83,12 @@ public void shouldEncodeHexToBase64() {
assertThat(udf.encode("ce95cebbcebbceacceb4ceb1", "hex", "base64"), is("zpXOu867zqzOtM6x"));
assertThat(udf.encode("c39c6265726d656e736368", "hex", "base64"), is("w5xiZXJtZW5zY2g="));

assertThat(udf.encode("0x4578616d706c6521", "hex", "base64"), is("RXhhbXBsZSE="));
assertThat(udf.encode("X'7e8a016abfff'", "hex", "base64"), is("fooBar//"));
assertThat(udf.encode("x'328ba7b5a8a75627b0'", "hex", "base64"), is("MountainView"));
assertThat(udf.encode("0x", "hex", "base64"), is(""));
assertThat(udf.encode("X''", "hex", "base64"), is(""));
assertThat(udf.encode("x''", "hex", "base64"), is(""));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,39 @@
{"topic": "test_topic", "value": {"input_string": "4578616d706C6521"}},
{"topic": "test_topic", "value": {"input_string": "ce95cebbcebbceacceb4ceb1"}},
{"topic": "test_topic", "value": {"input_string": "c39c6265726d656e736368"}},
{"topic": "test_topic", "value": {"input_string": null}}
{"topic": "test_topic", "value": {"input_string": null}},
{"topic": "test_topic", "value": {"input_string": "0x4578616d706C6521"}},
{"topic": "test_topic", "value": {"input_string": "X'4578616d706C6521'"}},
{"topic": "test_topic", "value": {"input_string": "x'4578616d706C6521'"}},
{"topic": "test_topic", "value": {"input_string": "0x"}},
{"topic": "test_topic", "value": {"input_string": "X''"}},
{"topic": "test_topic", "value": {"input_string": "x''"}},
{"topic": "test_topic", "value": {"input_string": "0x0x"}},
{"topic": "test_topic", "value": {"input_string": "X'"}},
{"topic": "test_topic", "value": {"input_string": "x'4578616d706C6521"}},
{"topic": "test_topic", "value": {"input_string": "x'578616d706C6521'"}},
{"topic": "test_topic", "value": {"input_string": "0x578616d706C6521"}},
{"topic": "test_topic", "value": {"input_string": "578616d706C6521"}}


],
"outputs": [
{"topic": "OUTPUT", "value": {"ASCII":"Example!", "UTF8": "Example!", "BASE64": "RXhhbXBsZSE="}},
{"topic": "OUTPUT", "value": {"ASCII":"������������", "UTF8": "Ελλάδα", "BASE64": "zpXOu867zqzOtM6x"}},
{"topic": "OUTPUT", "value": {"ASCII":"��bermensch", "UTF8": "Übermensch", "BASE64": "w5xiZXJtZW5zY2g="}},
{"topic": "OUTPUT", "value": {"HEX":null, "UTF8": null, "BASE64": null}}
{"topic": "OUTPUT", "value": {"HEX":null, "UTF8": null, "BASE64": null}},
{"topic": "OUTPUT", "value": {"ASCII":"Example!", "UTF8": "Example!", "BASE64": "RXhhbXBsZSE="}},
{"topic": "OUTPUT", "value": {"ASCII":"Example!", "UTF8": "Example!", "BASE64": "RXhhbXBsZSE="}},
{"topic": "OUTPUT", "value": {"ASCII":"Example!", "UTF8": "Example!", "BASE64": "RXhhbXBsZSE="}},
{"topic": "OUTPUT", "value": {"ASCII":"", "UTF8": "", "BASE64": ""}},
{"topic": "OUTPUT", "value": {"ASCII":"", "UTF8": "", "BASE64": ""}},
{"topic": "OUTPUT", "value": {"ASCII":"", "UTF8": "", "BASE64": ""}},
{"topic": "OUTPUT", "value": {"ASCII":null, "UTF8": null, "BASE64": null}},
{"topic": "OUTPUT", "value": {"ASCII":null, "UTF8": null, "BASE64": null}},
{"topic": "OUTPUT", "value": {"ASCII":null, "UTF8": null, "BASE64": null}},
{"topic": "OUTPUT", "value": {"ASCII":null, "UTF8": null, "BASE64": null}},
{"topic": "OUTPUT", "value": {"ASCII":"\u0005xample!", "UTF8": "\u0005xample!", "BASE64": "BXhhbXBsZSE="}},
{"topic": "OUTPUT", "value": {"ASCII":null, "UTF8": null, "BASE64": null}}
]
},
{
Expand Down

0 comments on commit d492556

Please sign in to comment.