Skip to content

Commit

Permalink
Merge pull request opendistro-for-elasticsearch#9 from rupal-bq/bench…
Browse files Browse the repository at this point in the history
…marking

benchmark updates
  • Loading branch information
rupal-bq committed Oct 27, 2020
2 parents 9cfb51a + 902df52 commit 8876589
Show file tree
Hide file tree
Showing 18 changed files with 269 additions and 111 deletions.
7 changes: 7 additions & 0 deletions benchmark/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"types": ["elasticsearch"],
"outputFile": "index.html",
"benchmarkPath": "",
"scaleFactors": [0.1],
"systemPassword": "password"
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
import com.amazon.opendistroforelasticsearch.sql.benchmark.utils.load.DataLoader;
import com.amazon.opendistroforelasticsearch.sql.benchmark.utils.load.DataTransformer;
import com.amazon.opendistroforelasticsearch.sql.benchmark.utils.load.DataUtilHolderFactory;
import com.amazon.opendistroforelasticsearch.sql.benchmark.utils.query.Queries;
import com.amazon.opendistroforelasticsearch.sql.benchmark.utils.query.QueryGenerator;
import com.amazon.opendistroforelasticsearch.sql.benchmark.utils.query.ResultGrabber;
import com.amazon.opendistroforelasticsearch.sql.benchmark.utils.query.TpchQueries;
import com.amazon.opendistroforelasticsearch.sql.benchmark.utils.results.BenchmarkResults;
import com.amazon.opendistroforelasticsearch.sql.benchmark.utils.results.BenchmarkResultsInterpreter;
import com.fasterxml.jackson.databind.ObjectMapper;
Expand All @@ -45,7 +45,7 @@ public class BenchmarkService {
private String outputFile;
private String benchmarkPath;
private String dataDirectoryPath;
private List<Integer> scaleFactors;
private List<Double> scaleFactors;
private String systemPassword;

private static final String TYPES = "types";
Expand All @@ -71,7 +71,7 @@ private BenchmarkService(final String filePath) throws Exception {
*/
private void runBenchmarks() throws Exception {
final List<BenchmarkResults> results = new ArrayList<>();
for (Integer sf: scaleFactors) {
for (Double sf: scaleFactors) {
DataGenerator.generateData(benchmarkPath, sf);
QueryGenerator.generateQueries(benchmarkPath, sf);
for (final String type: types) {
Expand Down Expand Up @@ -111,10 +111,10 @@ private void performDataLoad(final String type) throws Exception {
* @return BenchmarkResults for the query execution.
* @throws Exception Thrown if benchmarking fails.
*/
private BenchmarkResults performBenchmark(final String type, final Integer scaleFactor)
private BenchmarkResults performBenchmark(final String type, final Double scaleFactor)
throws Exception {
final ResultGrabber resultGrabber = new ResultGrabber(type, scaleFactor);
return resultGrabber.runQueries(TpchQueries.tpchQueries, benchmarkPath);
return resultGrabber.runQueries(Queries.queries, benchmarkPath);
}

/**
Expand All @@ -132,7 +132,7 @@ private void interpretResults(final List<BenchmarkResults> results) throws Excep
* @throws Exception Thrown if config file parsing fails.
*/
private void parseFile(final String filePath) throws Exception {
final String jsonString = new String(Files.readAllBytes(Paths.get(filePath)));
final String jsonString = new String(Files.readAllBytes(Paths.get(filePath).toAbsolutePath()));
final ObjectMapper mapper = new ObjectMapper();
final Map map = mapper.readValue(jsonString, Map.class);
if (!map.keySet().equals(EXPECTED_KEYS)) {
Expand All @@ -141,12 +141,13 @@ private void parseFile(final String filePath) throws Exception {
+ "Instead it contained the following keys: '%s'.",
EXPECTED_KEYS.toString(), map.keySet().toString()));
}
types = getValueCheckType(map, TYPES, types.getClass());
outputFile = getValueCheckType(map, OUTPUT_FILE, outputFile.getClass());
benchmarkPath = getValueCheckType(map, BENCHMARK_PATH, benchmarkPath.getClass());
dataDirectoryPath = benchmarkPath + "data/";
scaleFactors = getValueCheckType(map, SCALE_FACTORS, scaleFactors.getClass());
systemPassword = getValueCheckType(map, SYSTEM_PASSWORD, systemPassword.getClass());
types = getValueCheckType(map, TYPES, ArrayList.class);
outputFile = getValueCheckType(map, OUTPUT_FILE, String.class);
final String basePath = getValueCheckType(map, BENCHMARK_PATH, String.class);
benchmarkPath = Paths.get(basePath).toAbsolutePath().toString() + "/";
dataDirectoryPath = Paths.get(basePath, "data").toString() + "/";
scaleFactors = getValueCheckType(map, SCALE_FACTORS, ArrayList.class);
systemPassword = getValueCheckType(map, SYSTEM_PASSWORD, String.class);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@

package com.amazon.opendistroforelasticsearch.sql.benchmark.utils;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;

/**
* Class to handle command execution.
*/
public class CommandExecution {

/**
* Function to execute commands.
*
Expand All @@ -34,5 +37,13 @@ public static void executeCommand(final String commands)
if (process != null) {
process.waitFor();
}
System.out.println("Output of " + commands);
try (BufferedReader input = new BufferedReader(
new InputStreamReader(process.getInputStream()))) {
String line;
while ((line = input.readLine()) != null) {
System.out.println(line);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,88 +11,88 @@ public class TpchSchema {
schemaMap = new LinkedHashMap<>();

LinkedHashMap customerArgs = new LinkedHashMap<>();
customerArgs.put("custkey", "integer");
customerArgs.put("name", "keyword");
customerArgs.put("address", "keyword");
customerArgs.put("nationkey", "integer");
customerArgs.put("phone", "keyword");
customerArgs.put("acctbal", "double");
customerArgs.put("mktsegment", "keyword");
customerArgs.put("comment", "keyword");
customerArgs.put("c_custkey", "integer");
customerArgs.put("c_name", "keyword");
customerArgs.put("c_address", "keyword");
customerArgs.put("c_nationkey", "integer");
customerArgs.put("c_phone", "keyword");
customerArgs.put("c_acctbal", "double");
customerArgs.put("c_mktsegment", "keyword");
customerArgs.put("c_comment", "keyword");
schemaMap.put("customer", customerArgs);

LinkedHashMap lineitemArgs = new LinkedHashMap<>();
lineitemArgs.put("orderkey", "integer");
lineitemArgs.put("partkey", "integer");
lineitemArgs.put("suppkey", "integer");
lineitemArgs.put("linenumber", "integer");
lineitemArgs.put("quantity", "integer");
lineitemArgs.put("extendedprice", "double");
lineitemArgs.put("discount", "double");
lineitemArgs.put("tax", "double");
lineitemArgs.put("returnflag", "keyword");
lineitemArgs.put("linestatus", "keyword");
lineitemArgs.put("shipdate", "date");
lineitemArgs.put("commitdate", "date");
lineitemArgs.put("receiptdate", "date");
lineitemArgs.put("shipinstruct", "keyword");
lineitemArgs.put("shipmode", "keyword");
lineitemArgs.put("comment", "keyword");
lineitemArgs.put("l_orderkey", "integer");
lineitemArgs.put("l_partkey", "integer");
lineitemArgs.put("l_suppkey", "integer");
lineitemArgs.put("l_linenumber", "integer");
lineitemArgs.put("l_quantity", "integer");
lineitemArgs.put("l_extendedprice", "double");
lineitemArgs.put("l_discount", "double");
lineitemArgs.put("l_tax", "double");
lineitemArgs.put("l_returnflag", "keyword");
lineitemArgs.put("l_linestatus", "keyword");
lineitemArgs.put("l_shipdate", "date");
lineitemArgs.put("l_commitdate", "date");
lineitemArgs.put("l_receiptdate", "date");
lineitemArgs.put("l_shipinstruct", "keyword");
lineitemArgs.put("l_shipmode", "keyword");
lineitemArgs.put("l_comment", "keyword");
schemaMap.put("lineitem", lineitemArgs);

LinkedHashMap nationArgs = new LinkedHashMap<>();
nationArgs.put("nationkey", "integer");
nationArgs.put("name", "keyword");
nationArgs.put("regionkey", "integer");
nationArgs.put("comment", "keyword");
nationArgs.put("n_nationkey", "integer");
nationArgs.put("n_name", "keyword");
nationArgs.put("n_regionkey", "integer");
nationArgs.put("n_comment", "keyword");
schemaMap.put("nation", nationArgs);

LinkedHashMap ordersArgs = new LinkedHashMap<>();
ordersArgs.put("orderkey", "integer");
ordersArgs.put("custkey", "integer");
ordersArgs.put("orderstatus", "keyword");
ordersArgs.put("totalprice", "double");
ordersArgs.put("orderdate", "date");
ordersArgs.put("orderpriority", "keyword");
ordersArgs.put("clerk", "keyword");
ordersArgs.put("shippriority", "integer");
ordersArgs.put("comment", "keyword");
ordersArgs.put("o_orderkey", "integer");
ordersArgs.put("o_custkey", "integer");
ordersArgs.put("o_orderstatus", "keyword");
ordersArgs.put("o_totalprice", "double");
ordersArgs.put("o_orderdate", "date");
ordersArgs.put("o_orderpriority", "keyword");
ordersArgs.put("o_clerk", "keyword");
ordersArgs.put("o_shippriority", "integer");
ordersArgs.put("o_comment", "keyword");
schemaMap.put("orders", ordersArgs);

LinkedHashMap partArgs = new LinkedHashMap<>();
partArgs.put("partkey", "integer");
partArgs.put("name", "keyword");
partArgs.put("mfgr", "keyword");
partArgs.put("brand", "keyword");
partArgs.put("type", "keyword");
partArgs.put("size", "integer");
partArgs.put("container", "keyword");
partArgs.put("retailprice", "double");
partArgs.put("comment", "keyword");
partArgs.put("p_partkey", "integer");
partArgs.put("p_name", "keyword");
partArgs.put("p_mfgr", "keyword");
partArgs.put("p_brand", "keyword");
partArgs.put("p_type", "keyword");
partArgs.put("p_size", "integer");
partArgs.put("p_container", "keyword");
partArgs.put("p_retailprice", "double");
partArgs.put("p_comment", "keyword");
schemaMap.put("part", partArgs);

LinkedHashMap partsuppArgs = new LinkedHashMap<>();
partsuppArgs.put("partkey", "integer");
partsuppArgs.put("suppkey", "integer");
partsuppArgs.put("availqty", "integer");
partsuppArgs.put("supplycost", "double");
partsuppArgs.put("comment", "keyword");
partsuppArgs.put("ps_partkey", "integer");
partsuppArgs.put("ps_suppkey", "integer");
partsuppArgs.put("ps_availqty", "integer");
partsuppArgs.put("ps_supplycost", "double");
partsuppArgs.put("ps_comment", "keyword");
schemaMap.put("partsupp", partsuppArgs);

LinkedHashMap regionArgs = new LinkedHashMap<>();
regionArgs.put("regionkey", "integer");
regionArgs.put("name", "keyword");
regionArgs.put("comment", "keyword");
regionArgs.put("r_regionkey", "integer");
regionArgs.put("r_name", "keyword");
regionArgs.put("r_comment", "keyword");
schemaMap.put("region", regionArgs);

LinkedHashMap supplierArgs = new LinkedHashMap<>();
supplierArgs.put("suppkey", "integer");
supplierArgs.put("name", "keyword");
supplierArgs.put("address", "keyword");
supplierArgs.put("nationkey", "integer");
supplierArgs.put("phone", "keyword");
supplierArgs.put("acctbal", "double");
supplierArgs.put("comment", "keyword");
supplierArgs.put("s_suppkey", "integer");
supplierArgs.put("s_name", "keyword");
supplierArgs.put("s_address", "keyword");
supplierArgs.put("s_nationkey", "integer");
supplierArgs.put("s_phone", "keyword");
supplierArgs.put("s_acctbal", "double");
supplierArgs.put("s_comment", "keyword");
schemaMap.put("supplier", supplierArgs);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,17 @@
package com.amazon.opendistroforelasticsearch.sql.benchmark.utils.load.elasticsearch;

import com.amazon.opendistroforelasticsearch.sql.benchmark.utils.load.DataFormat;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.Map;

/**
* Data format for Elasticsearch database.
*/
public class ElasticsearchDataFormat extends DataFormat {

private String dataPath;
private Map<String, LinkedList<String>> tableDataFilesList = new LinkedHashMap<>();

public void setDataPath(String path) {
dataPath = path;
Expand All @@ -31,4 +35,20 @@ public void setDataPath(String path) {
public String getDataPath() {
return dataPath;
}

/**
* Add a file to tableDataFilesList.
* @param tablename Table name
* @param filename File name
*/
public void addFile(String tablename, String filename) {
if (!tableDataFilesList.containsKey(tablename)) {
tableDataFilesList.put(tablename, new LinkedList<>());
}
tableDataFilesList.get(tablename).add(filename);
}

public Map<String, LinkedList<String>> getTableDataFilesList() {
return tableDataFilesList;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import com.amazon.opendistroforelasticsearch.sql.benchmark.utils.load.DataFormat;
import com.amazon.opendistroforelasticsearch.sql.benchmark.utils.load.DataLoader;
import com.amazon.opendistroforelasticsearch.sql.benchmark.utils.load.TpchSchema;
import java.util.LinkedList;
import java.util.Map;

/**
* Data loader for Elasticsearch database.
Expand All @@ -38,16 +40,26 @@ public void loadData(final DataFormat data) throws Exception {
}
String commands = "cd " + ((ElasticsearchDataFormat) data).getDataPath();

// Add table mappings
for (String tableName : TpchSchema.schemaMap.keySet()) {
commands += " && curl -H 'Content-Type: application/x-ndjson' -XPUT 'https://localhost:9200/"
+ tableName + "?pretty' -u admin:admin --insecure --data-binary @" + tableName
+ "_mappings.json"
+ " && curl -H 'Content-Type: application/x-ndjson' -XPOST 'https://localhost:9200/"
+ tableName + "/_bulk?pretty' -u admin:admin --insecure --data-binary @" + tableName
+ "_data.json >> "
+ tableName + "_upload.log";
+ "_mappings.json";
}

CommandExecution.executeCommand(commands);

// Add table data
Map<String, LinkedList<String>> tableDataFilesList = ((ElasticsearchDataFormat) data)
.getTableDataFilesList();
for (String tableName : tableDataFilesList.keySet()) {
for (String fileName : tableDataFilesList.get(tableName)) {

String dataCommand = "cd " + ((ElasticsearchDataFormat) data).getDataPath()
+ " && curl -H 'Content-Type: application/x-ndjson' -XPOST 'https://localhost:9200/"
+ tableName + "/_bulk?pretty' -u admin:admin --insecure --data-binary @" + fileName
+ " >> " + fileName.replace(".json", "") + "_upload.log";
CommandExecution.executeCommand(dataCommand);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ public DataFormat transformData(final String dataPath) throws Exception {
throw new FileNotFoundException("Invalid Directory");
}

ElasticsearchDataFormat result = new ElasticsearchDataFormat();

for (String tableName : TpchSchema.schemaMap.keySet()) {
File table = new File(dataPath + tableName + ".tbl");
if (!table.exists() || !table.isFile()) {
Expand All @@ -65,10 +67,13 @@ public DataFormat transformData(final String dataPath) throws Exception {

// Create new json file for every table / .tbl file
transformedDataPath = dataPath + "elasticsearch/";
int tableDataFilesIndex = 1;
String filename = tableName + "_data_" + tableDataFilesIndex++ + ".json";
BufferedWriter bufferedWriter = new BufferedWriter(
new FileWriter(transformedDataPath + tableName + "_data.json", true));
new FileWriter(transformedDataPath + filename, true));
long tableLineIndex = 1;
try {
result.addFile(tableName, filename);
String line;
while ((line = bufferedReader.readLine()) != null) {
List<String> argsList = Arrays.asList(line.split("\\|"));
Expand All @@ -91,6 +96,14 @@ public DataFormat transformData(final String dataPath) throws Exception {
}
bufferedWriter.write(jsonLine.toJSONString());
bufferedWriter.newLine();

if (tableLineIndex == 10000 * (tableDataFilesIndex - 1)) {
bufferedWriter.close();
filename = tableName + "_data_" + tableDataFilesIndex++ + ".json";
bufferedWriter = new BufferedWriter(
new FileWriter(transformedDataPath + filename, true));
result.addFile(tableName, filename);
}
}
} finally {
bufferedWriter.close();
Expand All @@ -99,7 +112,6 @@ public DataFormat transformData(final String dataPath) throws Exception {
}

createTableMappings();
ElasticsearchDataFormat result = new ElasticsearchDataFormat();
result.setDataPath(transformedDataPath);
return result;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

import java.util.LinkedList;

public class TpchQueries {
public class Queries {

public static int tpchQueriesCountMax = 22;
public static LinkedList<String> tpchQueries = new LinkedList<>();
public static LinkedList<String> queries = new LinkedList<>();
}
Loading

0 comments on commit 8876589

Please sign in to comment.