From a45eabbbacb331647c01cacc570eafaf824a18b6 Mon Sep 17 00:00:00 2001 From: libailin Date: Fri, 20 Sep 2024 11:44:24 +0800 Subject: [PATCH] [Feature-#1918][s3] Add support for reading all types of documents supported by Apache Tika, read excel format [Feature-#1918][s3] Add support for reading all types of documents supported by Apache Tika, read excel format --- .../chunjun-connector-s3/pom.xml | 12 + .../chunjun/connector/s3/config/S3Config.java | 21 + .../connector/s3/sink/S3DynamicTableSink.java | 2 +- .../connector/s3/sink/S3OutputFormat.java | 76 +- .../s3/source/S3DynamicTableSource.java | 8 +- .../connector/s3/source/S3InputFormat.java | 118 +- .../s3/table/S3DynamicTableFactory.java | 69 +- .../connector/s3/table/options/S3Options.java | 27 + .../chunjun/connector/s3/util/S3Util.java | 25 +- chunjun-formats/chunjun-format-excel/pom.xml | 59 + .../excel/client/ExcelReadListener.java | 86 + .../excel/client/ExcelReaderExecutor.java | 68 + .../client/ExcelSubExceptionCarrier.java | 31 + .../chunjun/format/excel/client/Row.java | 38 + .../format/excel/common/ExcelData.java | 36 + .../excel/config/ExcelFormatConfig.java | 40 + .../excel/options/ExcelFormatOptions.java | 42 + .../format/excel/source/ExcelInputFormat.java | 161 + .../chunjun-format-protobuf/pom.xml | 59 + .../format/protobuf/PbFormatFactory.java | 111 + .../format/protobuf/PbFormatOptions.java | 35 + .../chunjun/format/protobuf/PbFormatType.java | 113 + .../format/protobuf/PbMessageAdaptor.java | 96 + .../protobuf/deserialize/PbMessageGetter.java | 131 + .../format/protobuf/deserialize/PbParser.java | 53 + .../PbRowDataDeserializationSchema.java | 95 + .../deserialize/PbToRowDataPbConverter.java | 230 ++ .../protobuf/serialize/PbMessageSetter.java | 102 + .../PbRowDataSerializationSchema.java | 82 + .../serialize/RowDataToPbConverter.java | 328 ++ .../format/protobuf/util/FormatCheckUtil.java | 217 ++ .../format/protobuf/util/PbReflectUtil.java | 131 + .../protobuf/test/MessageTestOuterClass.java | 2816 +++++++++++++++++ .../protobuf/test/PbFormatFactoryTest.java | 143 + chunjun-formats/chunjun-format-tika/pom.xml | 70 + .../chunjun/format/tika/common/TikaData.java | 37 + .../format/tika/config/TikaReadConfig.java | 47 + .../format/tika/options/TikaOptions.java | 43 + .../format/tika/source/TikaInputFormat.java | 119 + .../tika/source/TikaReaderExecutor.java | 178 ++ chunjun-formats/pom.xml | 133 + pom.xml | 3 +- 42 files changed, 6270 insertions(+), 21 deletions(-) create mode 100644 chunjun-formats/chunjun-format-excel/pom.xml create mode 100644 chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/client/ExcelReadListener.java create mode 100644 chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/client/ExcelReaderExecutor.java create mode 100644 chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/client/ExcelSubExceptionCarrier.java create mode 100644 chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/client/Row.java create mode 100644 chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/common/ExcelData.java create mode 100644 chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/config/ExcelFormatConfig.java create mode 100644 chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/options/ExcelFormatOptions.java create mode 100644 chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/source/ExcelInputFormat.java create mode 100644 chunjun-formats/chunjun-format-protobuf/pom.xml create mode 100644 chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/PbFormatFactory.java create mode 100644 chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/PbFormatOptions.java create mode 100644 chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/PbFormatType.java create mode 100644 chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/PbMessageAdaptor.java create mode 100644 chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/deserialize/PbMessageGetter.java create mode 100644 chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/deserialize/PbParser.java create mode 100644 chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/deserialize/PbRowDataDeserializationSchema.java create mode 100644 chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/deserialize/PbToRowDataPbConverter.java create mode 100644 chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/serialize/PbMessageSetter.java create mode 100644 chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/serialize/PbRowDataSerializationSchema.java create mode 100644 chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/serialize/RowDataToPbConverter.java create mode 100644 chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/util/FormatCheckUtil.java create mode 100644 chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/util/PbReflectUtil.java create mode 100644 chunjun-formats/chunjun-format-protobuf/src/test/java/com/dtstack/chunjun/format/protobuf/test/MessageTestOuterClass.java create mode 100644 chunjun-formats/chunjun-format-protobuf/src/test/java/com/dtstack/chunjun/format/protobuf/test/PbFormatFactoryTest.java create mode 100644 chunjun-formats/chunjun-format-tika/pom.xml create mode 100644 chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/common/TikaData.java create mode 100644 chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/config/TikaReadConfig.java create mode 100644 chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/options/TikaOptions.java create mode 100644 chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/source/TikaInputFormat.java create mode 100644 chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/source/TikaReaderExecutor.java create mode 100644 chunjun-formats/pom.xml diff --git a/chunjun-connectors/chunjun-connector-s3/pom.xml b/chunjun-connectors/chunjun-connector-s3/pom.xml index 51faa129a1..5f62bdf3bc 100644 --- a/chunjun-connectors/chunjun-connector-s3/pom.xml +++ b/chunjun-connectors/chunjun-connector-s3/pom.xml @@ -68,6 +68,18 @@ 1.11-8 test + + com.dtstack.chunjun + chunjun-format-tika + ${project.version} + provided + + + com.dtstack.chunjun + chunjun-format-excel + ${project.version} + provided + diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/config/S3Config.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/config/S3Config.java index 2fe73532da..2d71c4e59b 100644 --- a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/config/S3Config.java +++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/config/S3Config.java @@ -19,6 +19,8 @@ package com.dtstack.chunjun.connector.s3.config; import com.dtstack.chunjun.config.CommonConfig; +import com.dtstack.chunjun.format.excel.config.ExcelFormatConfig; +import com.dtstack.chunjun.format.tika.config.TikaReadConfig; import com.amazonaws.regions.Regions; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; @@ -86,4 +88,23 @@ public class S3Config extends CommonConfig implements Serializable { /** 生成的文件名后缀 */ private String suffix; + + /** 对象匹配规则 */ + private String objectsRegex; + + /** 是否使用文本限定符 */ + private boolean useTextQualifier = true; + + /** 是否开启每条记录生成一个对应的文件 */ + private boolean enableWriteSingleRecordAsFile = false; + + /** 保留原始文件名 */ + private boolean keepOriginalFilename = false; + + /** 禁用 Bucket 名称注入到 endpoint 前缀 */ + private boolean disableBucketNameInEndpoint = false; + + private TikaReadConfig tikaReadConfig = new TikaReadConfig(); + + private ExcelFormatConfig excelFormatConfig = new ExcelFormatConfig(); } diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3DynamicTableSink.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3DynamicTableSink.java index e50a2e1125..bea54bc953 100644 --- a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3DynamicTableSink.java +++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3DynamicTableSink.java @@ -84,6 +84,6 @@ public DynamicTableSink copy() { @Override public String asSummaryString() { - return "StreamDynamicTableSink"; + return S3DynamicTableSink.class.getName(); } } diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3OutputFormat.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3OutputFormat.java index 7abd7d4c87..94b8609a76 100644 --- a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3OutputFormat.java +++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3OutputFormat.java @@ -27,6 +27,7 @@ import com.dtstack.chunjun.sink.format.BaseRichOutputFormat; import com.dtstack.chunjun.throwable.ChunJunRuntimeException; import com.dtstack.chunjun.throwable.WriteRecordException; +import com.dtstack.chunjun.util.GsonUtil; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.table.data.RowData; @@ -34,15 +35,20 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.PartETag; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang3.StringUtils; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Objects; +import java.util.UUID; import java.util.stream.Collectors; +import static com.dtstack.chunjun.format.tika.config.TikaReadConfig.ORIGINAL_FILENAME; + /** The OutputFormat Implementation which write data to Amazon S3. */ @Slf4j public class S3OutputFormat extends BaseRichOutputFormat { @@ -137,7 +143,8 @@ private void checkOutputDir() { amazonS3, s3Config.getBucket(), s3Config.getObject(), - s3Config.getFetchSize()); + s3Config.getFetchSize(), + s3Config.getObjectsRegex()); } else { subObjects = S3Util.listObjectsByv1( @@ -166,11 +173,17 @@ private void nextBlock() { sw = new StringWriter(); } this.writerUtil = new WriterUtil(sw, s3Config.getFieldDelimiter()); + if (!s3Config.isUseTextQualifier()) { + writerUtil.setUseTextQualifier(false); + } this.currentPartNumber = this.currentPartNumber + 1; } /** Create file multipart upload ID */ private void createActionFinishedTag() { + if (s3Config.isEnableWriteSingleRecordAsFile()) { + return; + } if (!StringUtils.isNotBlank(currentUploadId)) { this.currentUploadId = S3Util.initiateMultipartUploadAndGetId( @@ -193,8 +206,11 @@ private void beforeWriteRecords() { } protected void flushDataInternal() { + if (sw == null) { + return; + } StringBuffer sb = sw.getBuffer(); - if (sb.length() > MIN_SIZE || willClose) { + if (sb.length() > MIN_SIZE || willClose || s3Config.isEnableWriteSingleRecordAsFile()) { byte[] byteArray; try { byteArray = sb.toString().getBytes(s3Config.getEncoding()); @@ -202,17 +218,23 @@ protected void flushDataInternal() { throw new ChunJunRuntimeException(e); } log.info("Upload part size:" + byteArray.length); - PartETag partETag = - S3Util.uploadPart( - amazonS3, - s3Config.getBucket(), - s3Config.getObject(), - this.currentUploadId, - this.currentPartNumber, - byteArray); - - MyPartETag myPartETag = new MyPartETag(partETag); - myPartETags.add(myPartETag); + + if (s3Config.isEnableWriteSingleRecordAsFile()) { + S3Util.putStringObject( + amazonS3, s3Config.getBucket(), s3Config.getObject(), sb.toString()); + } else { + PartETag partETag = + S3Util.uploadPart( + amazonS3, + s3Config.getBucket(), + s3Config.getObject(), + this.currentUploadId, + this.currentPartNumber, + byteArray); + + MyPartETag myPartETag = new MyPartETag(partETag); + myPartETags.add(myPartETag); + } log.debug( "task-{} upload etag:[{}]", @@ -225,6 +247,9 @@ protected void flushDataInternal() { } private void completeMultipartUploadFile() { + if (s3Config.isEnableWriteSingleRecordAsFile()) { + return; + } if (this.currentPartNumber > 10000) { throw new IllegalArgumentException("part can not bigger than 10000"); } @@ -282,7 +307,11 @@ protected void writeSingleRecordInternal(RowData rowData) throws WriteRecordExce // convert row to string stringRecord = (String[]) rowConverter.toExternal(rowData, stringRecord); try { - for (int i = 0; i < columnNameList.size(); ++i) { + int columnSize = columnNameList.size(); + if (s3Config.isEnableWriteSingleRecordAsFile()) { + columnSize = 1; + } + for (int i = 0; i < columnSize; ++i) { String column = stringRecord[i]; @@ -292,6 +321,25 @@ protected void writeSingleRecordInternal(RowData rowData) throws WriteRecordExce writerUtil.write(column); } writerUtil.endRecord(); + + if (s3Config.isEnableWriteSingleRecordAsFile()) { + Map metadataMap = + GsonUtil.GSON.fromJson(stringRecord[1], Map.class); + String key = FilenameUtils.getPath(s3Config.getObject()); + // 是否保留原始文件名 + if (s3Config.isKeepOriginalFilename()) { + key += metadataMap.get(ORIGINAL_FILENAME) + getExtension(); + } else { + key += + jobId + + "_" + + taskNumber + + "_" + + UUID.randomUUID().toString() + + getExtension(); + } + s3Config.setObject(key); + } flushDataInternal(); } catch (Exception ex) { String msg = "RowData2string error RowData(" + rowData + ")"; diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3DynamicTableSource.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3DynamicTableSource.java index 5ee4beec7b..01986de2b8 100644 --- a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3DynamicTableSource.java +++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3DynamicTableSource.java @@ -66,7 +66,13 @@ public ScanRuntimeProvider getScanRuntimeProvider(ScanContext runtimeProviderCon field.setName(column.getName()); field.setType( TypeConfig.fromString(column.getDataType().getLogicalType().asSummaryString())); - field.setIndex(i); + int index = + s3Config.getExcelFormatConfig().getColumnIndex() != null + ? s3Config.getExcelFormatConfig() + .getColumnIndex() + .get(columns.indexOf(column)) + : columns.indexOf(column); + field.setIndex(index); columnList.add(field); } s3Config.setColumn(columnList); diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3InputFormat.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3InputFormat.java index c450c96854..9b1ac7ee32 100644 --- a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3InputFormat.java +++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3InputFormat.java @@ -18,12 +18,17 @@ package com.dtstack.chunjun.connector.s3.source; +import com.dtstack.chunjun.config.FieldConfig; import com.dtstack.chunjun.config.RestoreConfig; import com.dtstack.chunjun.connector.s3.config.S3Config; import com.dtstack.chunjun.connector.s3.enums.CompressType; import com.dtstack.chunjun.connector.s3.util.ReaderUtil; import com.dtstack.chunjun.connector.s3.util.S3SimpleObject; import com.dtstack.chunjun.connector.s3.util.S3Util; +import com.dtstack.chunjun.format.excel.common.ExcelData; +import com.dtstack.chunjun.format.excel.source.ExcelInputFormat; +import com.dtstack.chunjun.format.tika.common.TikaData; +import com.dtstack.chunjun.format.tika.source.TikaInputFormat; import com.dtstack.chunjun.restore.FormatState; import com.dtstack.chunjun.source.format.BaseRichInputFormat; import com.dtstack.chunjun.throwable.ChunJunRuntimeException; @@ -38,6 +43,8 @@ import com.amazonaws.services.s3.model.S3Object; import com.amazonaws.services.s3.model.S3ObjectInputStream; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang3.StringUtils; import java.io.IOException; @@ -71,6 +78,12 @@ public class S3InputFormat extends BaseRichInputFormat { private RestoreConfig restoreConf; + private transient TikaData tikaData; + private TikaInputFormat tikaInputFormat; + + private transient ExcelData excelData; + private ExcelInputFormat excelInputFormat; + @Override public void openInputFormat() throws IOException { super.openInputFormat(); @@ -137,7 +150,31 @@ protected InputSplit[] createInputSplitsInternal(int minNumSplits) { protected RowData nextRecordInternal(RowData rowData) throws ReadRecordException { String[] fields; try { - fields = readerUtil.getValues(); + if (s3Config.getTikaReadConfig().isUseExtract() && tikaData != null) { + fields = tikaData.getData(); + } else if (s3Config.getExcelFormatConfig().isUseExcelFormat() && excelData != null) { + fields = excelData.getData(); + } else { + fields = readerUtil.getValues(); + } + // 处理字段配置了对应的列索引 + if (s3Config.getExcelFormatConfig().getColumnIndex() != null) { + List columns = s3Config.getColumn(); + String[] fieldsData = new String[columns.size()]; + for (int i = 0; i < CollectionUtils.size(columns); i++) { + FieldConfig fieldConfig = columns.get(i); + if (fieldConfig.getIndex() >= fields.length) { + String errorMessage = + String.format( + "The column index is greater than the data size." + + " The current column index is [%s], but the data size is [%s]. Data loss may occur.", + fieldConfig.getIndex(), fields.length); + throw new IllegalArgumentException(errorMessage); + } + fieldsData[i] = fields[fieldConfig.getIndex()]; + } + fields = fieldsData; + } rowData = rowConverter.toInternal(fields); } catch (IOException e) { throw new ChunJunRuntimeException(e); @@ -164,9 +201,82 @@ protected void closeInternal() { @Override public boolean reachedEnd() throws IOException { + if (s3Config.getTikaReadConfig().isUseExtract()) { + tikaData = getTikaData(); + return tikaData == null || tikaData.getData() == null; + } else if (s3Config.getExcelFormatConfig().isUseExcelFormat()) { + excelData = getExcelData(); + return excelData == null || excelData.getData() == null; + } return reachedEndWithoutCheckState(); } + public ExcelData getExcelData() { + if (excelInputFormat == null) { + nextExcelDataStream(); + } + if (excelInputFormat != null) { + if (!excelInputFormat.hasNext()) { + excelInputFormat.close(); + excelInputFormat = null; + return getExcelData(); + } + String[] record = excelInputFormat.nextRecord(); + return new ExcelData(record); + } else { + return null; + } + } + + private void nextExcelDataStream() { + if (splits.hasNext()) { + currentObject = splits.next(); + GetObjectRequest rangeObjectRequest = + new GetObjectRequest(s3Config.getBucket(), currentObject); + log.info("Current read file {}", currentObject); + S3Object o = amazonS3.getObject(rangeObjectRequest); + S3ObjectInputStream s3is = o.getObjectContent(); + excelInputFormat = new ExcelInputFormat(); + excelInputFormat.open(s3is, s3Config.getExcelFormatConfig()); + } else { + excelInputFormat = null; + } + } + + public TikaData getTikaData() { + if (tikaInputFormat == null) { + nextTikaDataStream(); + } + if (tikaInputFormat != null) { + if (!tikaInputFormat.hasNext()) { + tikaInputFormat.close(); + tikaInputFormat = null; + return getTikaData(); + } + String[] record = tikaInputFormat.nextRecord(); + return new TikaData(record); + } else { + return null; + } + } + + private void nextTikaDataStream() { + if (splits.hasNext()) { + currentObject = splits.next(); + GetObjectRequest rangeObjectRequest = + new GetObjectRequest(s3Config.getBucket(), currentObject); + log.info("Current read file {}", currentObject); + S3Object o = amazonS3.getObject(rangeObjectRequest); + S3ObjectInputStream s3is = o.getObjectContent(); + tikaInputFormat = + new TikaInputFormat( + s3Config.getTikaReadConfig(), s3Config.getFieldNameList().size()); + tikaInputFormat.open(s3is, FilenameUtils.getName(currentObject)); + } else { + tikaInputFormat = null; + } + } + public boolean reachedEndWithoutCheckState() throws IOException { // br is empty, indicating that a new file needs to be read if (readerUtil == null) { @@ -259,7 +369,11 @@ public List resolveObjects() { if (s3Config.isUseV2()) { subObjects = S3Util.listObjectsKeyByPrefix( - amazonS3, bucket, prefix, s3Config.getFetchSize()); + amazonS3, + bucket, + prefix, + s3Config.getFetchSize(), + s3Config.getObjectsRegex()); } else { subObjects = S3Util.listObjectsByv1( diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/table/S3DynamicTableFactory.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/table/S3DynamicTableFactory.java index fa05abaf7f..1d793e74fe 100644 --- a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/table/S3DynamicTableFactory.java +++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/table/S3DynamicTableFactory.java @@ -22,11 +22,17 @@ import com.dtstack.chunjun.connector.s3.sink.S3DynamicTableSink; import com.dtstack.chunjun.connector.s3.source.S3DynamicTableSource; import com.dtstack.chunjun.connector.s3.table.options.S3Options; +import com.dtstack.chunjun.format.excel.config.ExcelFormatConfig; +import com.dtstack.chunjun.format.excel.options.ExcelFormatOptions; +import com.dtstack.chunjun.format.tika.config.TikaReadConfig; +import com.dtstack.chunjun.format.tika.options.TikaOptions; import com.dtstack.chunjun.table.options.SinkOptions; import com.dtstack.chunjun.util.GsonUtil; import org.apache.flink.configuration.ConfigOption; import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.table.catalog.Column; +import org.apache.flink.table.catalog.ResolvedSchema; import org.apache.flink.table.connector.sink.DynamicTableSink; import org.apache.flink.table.connector.source.DynamicTableSource; import org.apache.flink.table.factories.DynamicTableSinkFactory; @@ -34,9 +40,14 @@ import org.apache.flink.table.factories.FactoryUtil; import com.google.common.collect.Sets; +import org.apache.commons.lang3.StringUtils; import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.IntStream; public class S3DynamicTableFactory implements DynamicTableSourceFactory, DynamicTableSinkFactory { private static final String IDENTIFIER = "s3-x"; @@ -61,7 +72,46 @@ public DynamicTableSource createDynamicTableSource(Context context) { s3Config.setFirstLineHeader(options.get(S3Options.IS_FIRST_LINE_HEADER)); s3Config.setEndpoint(options.get(S3Options.ENDPOINT)); s3Config.setCompress(options.get(S3Options.COMPRESS)); - return new S3DynamicTableSource(context.getCatalogTable().getResolvedSchema(), s3Config); + s3Config.setObjectsRegex(options.get(S3Options.OBJECTS_REGEX)); + s3Config.setDisableBucketNameInEndpoint( + options.get(S3Options.DISABLE_BUCKET_NAME_IN_ENDPOINT)); + TikaReadConfig tikaReadConfig = new TikaReadConfig(); + tikaReadConfig.setUseExtract(options.get(TikaOptions.USE_EXTRACT)); + tikaReadConfig.setOverlapRatio(options.get(TikaOptions.OVERLAP_RATIO)); + tikaReadConfig.setChunkSize(options.get(TikaOptions.CHUNK_SIZE)); + s3Config.setTikaReadConfig(tikaReadConfig); + ResolvedSchema resolvedSchema = context.getCatalogTable().getResolvedSchema(); + List columns = resolvedSchema.getColumns(); + ExcelFormatConfig excelFormatConfig = new ExcelFormatConfig(); + excelFormatConfig.setUseExcelFormat(options.get(ExcelFormatOptions.USE_EXCEL_FORMAT)); + excelFormatConfig.setFirstLineHeader(options.get(S3Options.IS_FIRST_LINE_HEADER)); + if (StringUtils.isNotBlank(options.get(ExcelFormatOptions.SHEET_NO))) { + List sheetNo = + Arrays.stream(options.get(ExcelFormatOptions.SHEET_NO).split(",")) + .map(Integer::parseInt) + .collect(Collectors.toList()); + excelFormatConfig.setSheetNo(sheetNo); + } + if (StringUtils.isNotBlank(options.get(ExcelFormatOptions.COLUMN_INDEX))) { + List columnIndex = + Arrays.stream(options.get(ExcelFormatOptions.COLUMN_INDEX).split(",")) + .map(Integer::parseInt) + .collect(Collectors.toList()); + excelFormatConfig.setColumnIndex(columnIndex); + } + final String[] fields = new String[columns.size()]; + IntStream.range(0, fields.length).forEach(i -> fields[i] = columns.get(i).getName()); + excelFormatConfig.setFields(fields); + s3Config.setExcelFormatConfig(excelFormatConfig); + if (s3Config.getExcelFormatConfig().getColumnIndex() != null + && columns.size() != s3Config.getExcelFormatConfig().getColumnIndex().size()) { + throw new IllegalArgumentException( + String.format( + "The number of fields (%s) is inconsistent with the number of indexes (%s).", + columns.size(), + s3Config.getExcelFormatConfig().getColumnIndex().size())); + } + return new S3DynamicTableSource(resolvedSchema, s3Config); } @Override @@ -94,6 +144,17 @@ public Set> optionalOptions() { options.add(S3Options.SUFFIX); options.add(SinkOptions.SINK_PARALLELISM); options.add(S3Options.WRITE_MODE); + options.add(S3Options.OBJECTS_REGEX); + options.add(S3Options.USE_TEXT_QUALIFIER); + options.add(S3Options.ENABLE_WRITE_SINGLE_RECORD_AS_FILE); + options.add(S3Options.KEEP_ORIGINAL_FILENAME); + options.add(S3Options.DISABLE_BUCKET_NAME_IN_ENDPOINT); + options.add(TikaOptions.USE_EXTRACT); + options.add(TikaOptions.CHUNK_SIZE); + options.add(TikaOptions.OVERLAP_RATIO); + options.add(ExcelFormatOptions.SHEET_NO); + options.add(ExcelFormatOptions.COLUMN_INDEX); + options.add(ExcelFormatOptions.USE_EXCEL_FORMAT); return options; } @@ -121,6 +182,12 @@ public DynamicTableSink createDynamicTableSink(Context context) { s3Config.setSuffix(options.get(S3Options.SUFFIX)); s3Config.setParallelism(options.get(SinkOptions.SINK_PARALLELISM)); s3Config.setWriteMode(options.get(S3Options.WRITE_MODE)); + s3Config.setUseTextQualifier(options.get(S3Options.USE_TEXT_QUALIFIER)); + s3Config.setEnableWriteSingleRecordAsFile( + options.get(S3Options.ENABLE_WRITE_SINGLE_RECORD_AS_FILE)); + s3Config.setKeepOriginalFilename(options.get(S3Options.KEEP_ORIGINAL_FILENAME)); + s3Config.setDisableBucketNameInEndpoint( + options.get(S3Options.DISABLE_BUCKET_NAME_IN_ENDPOINT)); return new S3DynamicTableSink(context.getCatalogTable().getResolvedSchema(), s3Config); } diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/table/options/S3Options.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/table/options/S3Options.java index 1f6236438f..a8fff32659 100644 --- a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/table/options/S3Options.java +++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/table/options/S3Options.java @@ -95,4 +95,31 @@ public class S3Options { public static final ConfigOption WRITE_MODE = key("writeMode").stringType().defaultValue("overwrite").withDescription("writeMode"); + + public static final ConfigOption OBJECTS_REGEX = + key("objectsRegex").stringType().noDefaultValue().withDescription("objects regex rule"); + + public static final ConfigOption USE_TEXT_QUALIFIER = + key("useTextQualifier") + .booleanType() + .defaultValue(true) + .withDescription("use text qualifier"); + + public static final ConfigOption ENABLE_WRITE_SINGLE_RECORD_AS_FILE = + key("enableWriteSingleRecordAsFile") + .booleanType() + .defaultValue(false) + .withDescription("enable write single record as each file"); + + public static final ConfigOption KEEP_ORIGINAL_FILENAME = + key("keepOriginalFilename") + .booleanType() + .defaultValue(false) + .withDescription("keep original filename"); + + public static final ConfigOption DISABLE_BUCKET_NAME_IN_ENDPOINT = + key("disableBucketNameInEndpoint") + .booleanType() + .defaultValue(false) + .withDescription("disable Bucket Name In Endpoint"); } diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/util/S3Util.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/util/S3Util.java index a7d849a4a0..02d708f17e 100644 --- a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/util/S3Util.java +++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/util/S3Util.java @@ -30,6 +30,7 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.S3ClientOptions; import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; import com.amazonaws.services.s3.model.DeleteObjectsRequest; @@ -46,12 +47,14 @@ import com.amazonaws.services.s3.model.UploadPartRequest; import com.amazonaws.services.s3.model.UploadPartResult; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang3.StringUtils; import java.io.ByteArrayInputStream; import java.io.InputStream; import java.util.ArrayList; import java.util.List; +import java.util.regex.Pattern; @Slf4j public class S3Util { @@ -75,6 +78,10 @@ public static AmazonS3 getS3Client(S3Config s3Config) { } else { builder = builder.withRegion(clientRegion.getName()); } + // 禁用 Bucket 名称注入到 endpoint 前缀 + if (s3Config.isDisableBucketNameInEndpoint()) { + builder = builder.withPathStyleAccessEnabled(true); + } return builder.build(); } else { @@ -89,6 +96,11 @@ public static AmazonS3 getS3Client(S3Config s3Config) { } AmazonS3Client client = new AmazonS3Client(cred, ccfg); client.setEndpoint(s3Config.getEndpoint()); + // 禁用 Bucket 名称注入到 endpoint 前缀 + if (s3Config.isDisableBucketNameInEndpoint()) { + client.setS3ClientOptions( + S3ClientOptions.builder().setPathStyleAccess(true).build()); + } return client; } } else { @@ -103,18 +115,29 @@ public static PutObjectResult putStringObject( } public static List listObjectsKeyByPrefix( - AmazonS3 s3Client, String bucketName, String prefix, int fetchSize) { + AmazonS3 s3Client, String bucketName, String prefix, int fetchSize, String regex) { List objects = new ArrayList<>(fetchSize); ListObjectsV2Request req = new ListObjectsV2Request().withBucketName(bucketName).withMaxKeys(fetchSize); if (StringUtils.isNotBlank(prefix)) { req.setPrefix(prefix); } + // 定义正则表达式 + Pattern pattern = null; + if (StringUtils.isNotBlank(regex)) { + pattern = Pattern.compile(regex); + } + ListObjectsV2Result result; do { result = s3Client.listObjectsV2(req); for (S3ObjectSummary objectSummary : result.getObjectSummaries()) { + // 如果对象键与正则表达式匹配,则进行相应处理 + if (pattern != null + && !pattern.matcher(FilenameUtils.getName(objectSummary.getKey())).find()) { + continue; + } objects.add(objectSummary.getKey()); } String token = result.getNextContinuationToken(); diff --git a/chunjun-formats/chunjun-format-excel/pom.xml b/chunjun-formats/chunjun-format-excel/pom.xml new file mode 100644 index 0000000000..035d992ed5 --- /dev/null +++ b/chunjun-formats/chunjun-format-excel/pom.xml @@ -0,0 +1,59 @@ + + + + + 4.0.0 + + com.dtstack.chunjun + chunjun-formats + ${revision} + + + chunjun-format-excel + ChunJun : Formats : Excel + + + excel + + + + + com.alibaba + easyexcel + 3.2.0 + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + org.apache.maven.plugins + maven-antrun-plugin + + + + + diff --git a/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/client/ExcelReadListener.java b/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/client/ExcelReadListener.java new file mode 100644 index 0000000000..677dd37297 --- /dev/null +++ b/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/client/ExcelReadListener.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.excel.client; + +import com.dtstack.chunjun.util.DateUtil; + +import com.alibaba.excel.context.AnalysisContext; +import com.alibaba.excel.read.listener.ReadListener; + +import java.time.LocalDateTime; +import java.util.Map; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; + +public class ExcelReadListener implements ReadListener> { + + private final BlockingQueue queue = new LinkedBlockingQueue<>(4096); + + @Override + public void invoke(Map data, AnalysisContext context) { + String[] piece = new String[data.size()]; + for (Map.Entry entry : data.entrySet()) { + String value = ""; + if (entry.getValue() != null) { + if (entry.getValue() instanceof LocalDateTime) { + value = + DateUtil.timestampToString( + DateUtil.localDateTimetoDate((LocalDateTime) entry.getValue())); + } else { + value = String.valueOf(entry.getValue()); + } + } + piece[entry.getKey()] = value; + } + Row row = + new Row( + piece, + context.readSheetHolder().getSheetNo(), + context.readRowHolder().getRowIndex(), + false); + try { + queue.put(row); + } catch (InterruptedException e) { + throw new RuntimeException( + "because the current thread was interrupted, adding data to the queue failed", + e); + } + } + + @Override + public void doAfterAllAnalysed(AnalysisContext context) { + Row row = + new Row( + new String[0], + context.readSheetHolder().getSheetNo(), + context.readRowHolder().getRowIndex(), + true); + try { + queue.put(row); + } catch (InterruptedException e) { + throw new RuntimeException( + "because the current thread was interrupted, adding data to the queue failed", + e); + } + } + + public BlockingQueue getQueue() { + return queue; + } +} diff --git a/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/client/ExcelReaderExecutor.java b/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/client/ExcelReaderExecutor.java new file mode 100644 index 0000000000..e129a5e1fb --- /dev/null +++ b/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/client/ExcelReaderExecutor.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.excel.client; + +import com.dtstack.chunjun.format.excel.config.ExcelFormatConfig; + +import com.alibaba.excel.ExcelReader; +import com.alibaba.excel.read.metadata.ReadSheet; + +import java.util.ArrayList; +import java.util.List; + +public class ExcelReaderExecutor implements Runnable { + + private final ExcelReader reader; + private ExcelSubExceptionCarrier ec; + private ExcelFormatConfig config; + + public ExcelReaderExecutor( + ExcelReader reader, ExcelSubExceptionCarrier ec, ExcelFormatConfig config) { + this.reader = reader; + this.ec = ec; + this.config = config; + } + + @Override + public void run() { + try { + if (config.getSheetNo() != null) { + List readSheetList = new ArrayList<>(); + for (int i = 0; i < config.getSheetNo().size(); i++) { + readSheetList.add(new ReadSheet(config.getSheetNo().get(i))); + } + reader.read(readSheetList); + } else { + reader.readAll(); + } + } catch (Exception e) { + ec.setThrowable(e); + } finally { + close(); + } + } + + private void close() { + if (reader != null) { + // Don’t forget to close it here. A temporary file will be + // created when you read it, and the disk will crash. + reader.finish(); + } + } +} diff --git a/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/client/ExcelSubExceptionCarrier.java b/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/client/ExcelSubExceptionCarrier.java new file mode 100644 index 0000000000..8a8abd68aa --- /dev/null +++ b/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/client/ExcelSubExceptionCarrier.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.excel.client; + +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +@NoArgsConstructor +@Getter +@Setter +public class ExcelSubExceptionCarrier { + + private Exception throwable; +} diff --git a/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/client/Row.java b/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/client/Row.java new file mode 100644 index 0000000000..f11039280d --- /dev/null +++ b/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/client/Row.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.excel.client; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.io.Serializable; + +@NoArgsConstructor +@AllArgsConstructor +@Data +public class Row implements Serializable { + + private static final long serialVersionUID = 7560468513368962794L; + + private String[] data; + private int sheetIndex; + private int rowIndex; + private boolean end; +} diff --git a/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/common/ExcelData.java b/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/common/ExcelData.java new file mode 100644 index 0000000000..6e08177a7a --- /dev/null +++ b/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/common/ExcelData.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.excel.common; + +import lombok.Data; + +@Data +public class ExcelData { + private String[] data; + private boolean end; + + public ExcelData(String[] data, boolean end) { + this.data = data; + this.end = end; + } + + public ExcelData(String[] data) { + this.data = data; + } +} diff --git a/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/config/ExcelFormatConfig.java b/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/config/ExcelFormatConfig.java new file mode 100644 index 0000000000..f1632c5352 --- /dev/null +++ b/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/config/ExcelFormatConfig.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.excel.config; + +import lombok.Data; + +import java.io.Serializable; +import java.util.List; + +@Data +public class ExcelFormatConfig implements Serializable { + + private static final long serialVersionUID = 4014091971538738019L; + private boolean isFirstLineHeader; + private String[] fields; + + /** 工作表 */ + public List sheetNo; + + /** 字段对应的列索引 */ + public List columnIndex; + + public boolean useExcelFormat = false; +} diff --git a/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/options/ExcelFormatOptions.java b/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/options/ExcelFormatOptions.java new file mode 100644 index 0000000000..03cf776ff5 --- /dev/null +++ b/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/options/ExcelFormatOptions.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.excel.options; + +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; + +public class ExcelFormatOptions { + + public static final ConfigOption USE_EXCEL_FORMAT = + ConfigOptions.key("use-excel-format") + .booleanType() + .defaultValue(false) + .withDescription("use excel format"); + + public static final ConfigOption SHEET_NO = + ConfigOptions.key("sheet-no") + .stringType() + .noDefaultValue() + .withDescription("sheet no, Multiple numbers separated by commas(,)"); + public static final ConfigOption COLUMN_INDEX = + ConfigOptions.key("column-index") + .stringType() + .noDefaultValue() + .withDescription("column index, Multiple numbers separated by commas(,)"); +} diff --git a/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/source/ExcelInputFormat.java b/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/source/ExcelInputFormat.java new file mode 100644 index 0000000000..83aa1dba53 --- /dev/null +++ b/chunjun-formats/chunjun-format-excel/src/main/java/com/dtstack/chunjun/format/excel/source/ExcelInputFormat.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.excel.source; + +import com.dtstack.chunjun.format.excel.client.ExcelReadListener; +import com.dtstack.chunjun.format.excel.client.ExcelReaderExecutor; +import com.dtstack.chunjun.format.excel.client.ExcelSubExceptionCarrier; +import com.dtstack.chunjun.format.excel.client.Row; +import com.dtstack.chunjun.format.excel.config.ExcelFormatConfig; + +import com.alibaba.excel.EasyExcel; +import com.alibaba.excel.ExcelReader; +import com.alibaba.excel.read.builder.ExcelReaderBuilder; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.concurrent.BasicThreadFactory; + +import java.io.Closeable; +import java.io.InputStream; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import static com.alibaba.excel.enums.ReadDefaultReturnEnum.ACTUAL_DATA; +import static java.util.concurrent.TimeUnit.NANOSECONDS; + +@Slf4j +public class ExcelInputFormat implements Closeable { + + private BlockingQueue queue; + private ThreadPoolExecutor executorService; + /** The number of cells per row in the Excel file. */ + private Integer cellCount = 0; + /** The number of sheet in the Excel file. */ + private int sheetNum; + + private ExcelSubExceptionCarrier ec; + private Row row; + + public void open(InputStream inputStream, ExcelFormatConfig config) { + this.cellCount = config.getFields().length; + ExcelReadListener listener = new ExcelReadListener(); + this.queue = listener.getQueue(); + this.ec = new ExcelSubExceptionCarrier(); + ExcelReaderBuilder builder = EasyExcel.read(inputStream, listener); + if (!config.isFirstLineHeader()) { + builder.headRowNumber(0); + } + builder.ignoreEmptyRow(true); + builder.autoCloseStream(true); + // @since 3.2.0 + // STRING:会返回一个Map的数组,返回值就是你在excel里面不点击单元格看到的内容 + // ACTUAL_DATA:会返回一个Map的数组,返回实际上存储的数据,会帮自动转换类型,Object类型为BigDecimal、Boolean、String、LocalDateTime、null,中的一个, + // READ_CELL_DATA: 会返回一个Map>的数组,其中?类型参照ACTUAL_DATA的 + builder.readDefaultReturn(ACTUAL_DATA); + ExcelReader reader = builder.build(); + + this.sheetNum = reader.excelExecutor().sheetList().size(); + this.executorService = + new ThreadPoolExecutor( + 1, + 1, + 0, + NANOSECONDS, + new LinkedBlockingDeque<>(2), + new BasicThreadFactory.Builder() + .namingPattern("excel-schedule-pool-%d") + .daemon(false) + .build()); + ExcelReaderExecutor executor = new ExcelReaderExecutor(reader, ec, config); + executorService.execute(executor); + } + + public boolean hasNext() { + if (ec.getThrowable() != null) { + throw new RuntimeException("Read file error.", ec.getThrowable()); + } + try { + row = queue.poll(3000L, TimeUnit.MILLISECONDS); + + if (row == null) { + return false; + } + + if (row.isEnd()) { + return row.getSheetIndex() < sheetNum - 1; + } else { + return true; + } + } catch (InterruptedException e) { + throw new RuntimeException( + "cannot get data from the queue because the current thread is interrupted.", e); + } + } + + public String[] nextRecord() { + String[] data; + if (row.isEnd()) { + try { + Row head = queue.poll(3000L, TimeUnit.MILLISECONDS); + if (head != null) { + data = head.getData(); + } else { + return null; + } + } catch (InterruptedException e) { + throw new RuntimeException( + "cannot get data from the queue because the current thread is interrupted.", + e); + } + } else { + data = row.getData(); + } + if (cellCount == data.length) { + return data; + } + if (cellCount < data.length) { + cellCount = data.length; + } + return formatValue(data); + } + + @Override + public void close() { + if (executorService != null) { + executorService.shutdown(); + queue.clear(); + } + } + + private String[] formatValue(String[] data) { + String[] record = initDataContainer(cellCount, ""); + // because cellCount is always >= data.length + System.arraycopy(data, 0, record, 0, data.length); + return record; + } + + private String[] initDataContainer(int capacity, String defValue) { + String[] container = new String[capacity]; + for (int i = 0; i < capacity; i++) { + container[i] = defValue; + } + return container; + } +} diff --git a/chunjun-formats/chunjun-format-protobuf/pom.xml b/chunjun-formats/chunjun-format-protobuf/pom.xml new file mode 100644 index 0000000000..7fbc649824 --- /dev/null +++ b/chunjun-formats/chunjun-format-protobuf/pom.xml @@ -0,0 +1,59 @@ + + + + + + chunjun-formats + com.dtstack.chunjun + ${revision} + + 4.0.0 + + chunjun-format-protobuf + ChunJun : Formats : Protobuf + + + protobuf + + + + + com.google.protobuf + protobuf-java + 3.20.1-rc-1 + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + org.apache.maven.plugins + maven-antrun-plugin + + + + + diff --git a/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/PbFormatFactory.java b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/PbFormatFactory.java new file mode 100644 index 0000000000..4d3cecc17b --- /dev/null +++ b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/PbFormatFactory.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.protobuf; + +import com.dtstack.chunjun.format.protobuf.deserialize.PbRowDataDeserializationSchema; +import com.dtstack.chunjun.format.protobuf.serialize.PbRowDataSerializationSchema; + +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.table.connector.ChangelogMode; +import org.apache.flink.table.connector.format.DecodingFormat; +import org.apache.flink.table.connector.format.EncodingFormat; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.connector.source.DynamicTableSource; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.factories.DeserializationFormatFactory; +import org.apache.flink.table.factories.DynamicTableFactory; +import org.apache.flink.table.factories.FactoryUtil; +import org.apache.flink.table.factories.SerializationFormatFactory; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.RowType; + +import java.util.HashSet; +import java.util.Set; + +/** @author liuliu 2022/4/8 */ +public class PbFormatFactory implements DeserializationFormatFactory, SerializationFormatFactory { + + public static final String IDENTIFIER = "protobuf-x"; + + @Override + public DecodingFormat> createDecodingFormat( + DynamicTableFactory.Context context, ReadableConfig formatOptions) { + FactoryUtil.validateFactoryOptions(this, formatOptions); + + String messageClassName = formatOptions.get(PbFormatOptions.MESSAGE_CLASS_NAME); + return new DecodingFormat>() { + @Override + public DeserializationSchema createRuntimeDecoder( + DynamicTableSource.Context context, DataType physicalDataType) { + final TypeInformation rowDataTypeInfo = + context.createTypeInformation(physicalDataType); + final RowType rowType = (RowType) physicalDataType.getLogicalType(); + return new PbRowDataDeserializationSchema( + rowType, rowDataTypeInfo, messageClassName); + } + + @Override + public ChangelogMode getChangelogMode() { + return ChangelogMode.insertOnly(); + } + }; + } + + @Override + public EncodingFormat> createEncodingFormat( + DynamicTableFactory.Context context, ReadableConfig formatOptions) { + FactoryUtil.validateFactoryOptions(this, formatOptions); + + String messageClassName = formatOptions.get(PbFormatOptions.MESSAGE_CLASS_NAME); + return new EncodingFormat>() { + @Override + public SerializationSchema createRuntimeEncoder( + DynamicTableSink.Context context, DataType physicalDataType) { + final RowType rowType = (RowType) physicalDataType.getLogicalType(); + return new PbRowDataSerializationSchema(rowType, messageClassName); + } + + @Override + public ChangelogMode getChangelogMode() { + return ChangelogMode.insertOnly(); + } + }; + } + + @Override + public String factoryIdentifier() { + return IDENTIFIER; + } + + @Override + public Set> requiredOptions() { + Set> options = new HashSet<>(); + options.add(PbFormatOptions.MESSAGE_CLASS_NAME); + return options; + } + + @Override + public Set> optionalOptions() { + return new HashSet<>(); + } +} diff --git a/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/PbFormatOptions.java b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/PbFormatOptions.java new file mode 100644 index 0000000000..5c9f93c994 --- /dev/null +++ b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/PbFormatOptions.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.protobuf; + +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; + +/** @author liuliu 2022/4/18 */ +public class PbFormatOptions { + private PbFormatOptions() {} + + public static final ConfigOption MESSAGE_CLASS_NAME = + ConfigOptions.key("message-class-name") + .stringType() + .noDefaultValue() + .withDescription( + "Required option to specify the full name of protobuf message class. The protobuf class " + + "must be located in the classpath both in client and task side"); +} diff --git a/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/PbFormatType.java b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/PbFormatType.java new file mode 100644 index 0000000000..e3e12e2658 --- /dev/null +++ b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/PbFormatType.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.protobuf; + +import com.dtstack.chunjun.throwable.ChunJunRuntimeException; +import com.dtstack.chunjun.throwable.UnsupportedTypeException; + +import com.google.protobuf.AbstractMessage; +import com.google.protobuf.Descriptors; + +/** @author liuliu 2022/4/11 */ +public enum PbFormatType { + MAP, + ENUM, + ARRAY, + MESSAGE, + BOOLEAN, + INT, + LONG, + FLOAT, + DOUBLE, + STRING, + BYTE_STRING; + + PbFormatType() {} + + public static PbFormatType getTypeByFieldDescriptor( + Descriptors.FieldDescriptor fieldDescriptor) { + if (fieldDescriptor.isMapField()) { + return MAP; + } else if (fieldDescriptor.isRepeated()) { + return ARRAY; + } else { + return getTypeByTypeName(fieldDescriptor.getJavaType().name()); + } + } + + public static PbFormatType getArrayInnerTypeByFieldDescriptor( + Descriptors.FieldDescriptor fieldDescriptor) { + return getTypeByTypeName(fieldDescriptor.getJavaType().name()); + } + + public static PbFormatType getProtoTypeForMapKey(Class clazz) { + switch (clazz.getName()) { + case "java.lang.String": + return STRING; + case "java.lang.Integer": + case "int": + return INT; + case "java.lang.Long": + case "long": + return LONG; + default: + throw new ChunJunRuntimeException( + String.format( + "Map key expect any scalar type except for floating point types and bytes,but it is %s", + clazz.getName())); + } + } + + public static PbFormatType getProtoTypeForMapValue(Class clazz) { + if (com.google.protobuf.ProtocolMessageEnum.class.isAssignableFrom(clazz)) { + return ENUM; + } else if (AbstractMessage.class.isAssignableFrom(clazz)) { + return MESSAGE; + } + switch (clazz.getName()) { + case "java.lang.String": + return STRING; + case "java.lang.Integer": + case "int": + return INT; + case "java.lang.Double": + case "double": + return DOUBLE; + case "java.lang.Long": + case "long": + return LONG; + case "java.lang.Float": + case "float": + return FLOAT; + case "com.google.protobuf.ByteString": + return BYTE_STRING; + default: + throw new UnsupportedOperationException(clazz.getName()); + } + } + + public static PbFormatType getTypeByTypeName(String typeName) { + for (PbFormatType protoType : PbFormatType.values()) { + if (typeName.equalsIgnoreCase(protoType.name())) { + return protoType; + } + } + throw new UnsupportedTypeException(typeName); + } +} diff --git a/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/PbMessageAdaptor.java b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/PbMessageAdaptor.java new file mode 100644 index 0000000000..ce328124d2 --- /dev/null +++ b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/PbMessageAdaptor.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.protobuf; + +import com.dtstack.chunjun.throwable.ChunJunRuntimeException; + +import com.google.protobuf.AbstractMessage; +import com.google.protobuf.Descriptors; + +import java.lang.reflect.Method; +import java.util.Arrays; +import java.util.List; + +/** @author liuliu 2022/4/27 */ +public class PbMessageAdaptor { + protected final int[] normalToOneof; + protected final int[] oneOfLastIndex; + protected int size; + + public PbMessageAdaptor( + List fieldDescriptorList, + List oneofDescriptorList) { + size = fieldDescriptorList.size(); + oneOfLastIndex = new int[oneofDescriptorList.size()]; + normalToOneof = new int[fieldDescriptorList.size()]; + Arrays.fill(normalToOneof, -1); + for (int i = 0; i < oneofDescriptorList.size(); i++) { + Descriptors.OneofDescriptor oneofDescriptor = oneofDescriptorList.get(i); + for (Descriptors.FieldDescriptor fieldDescriptor : oneofDescriptor.getFields()) { + normalToOneof[fieldDescriptor.getIndex()] = i; + oneOfLastIndex[i] = fieldDescriptor.getIndex(); + size--; + } + size++; + } + } + + /** Check whether the protobuf field corresponding to index belongs to a oneof field */ + public boolean isOneOf(int index) { + return normalToOneof[index] != -1; + } + + /** + * If the protobuf field corresponding to index belongs to a oneof field, then return the index + * of the last field of the oneof + */ + public Integer getLastOneOfIndex(int index) { + int oneofIndex = normalToOneof[index]; + if (oneofIndex != -1) { + return oneOfLastIndex[oneofIndex]; + } + throw new ChunJunRuntimeException(""); + } + + public Method obtainMethod(String methodName, Class clazz) { + try { + Method method = + Arrays.stream(clazz.getDeclaredMethods()) + .filter(m -> m.getName().equalsIgnoreCase(methodName)) + .filter( + m -> + m.getParameterTypes().length == 0 + || !AbstractMessage.Builder.class + .isAssignableFrom( + m.getParameterTypes()[0])) + .toArray(Method[]::new)[0]; + method.setAccessible(true); + return method; + } catch (Exception e) { + throw new ChunJunRuntimeException( + String.format( + "failed to obtain getMethod[%s] from class[%s]", + methodName, clazz.getName())); + } + } + + public int getSize() { + return size; + } +} diff --git a/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/deserialize/PbMessageGetter.java b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/deserialize/PbMessageGetter.java new file mode 100644 index 0000000000..d5a5f61cfe --- /dev/null +++ b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/deserialize/PbMessageGetter.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.protobuf.deserialize; + +import com.dtstack.chunjun.format.protobuf.PbMessageAdaptor; +import com.dtstack.chunjun.throwable.ChunJunRuntimeException; + +import com.google.protobuf.AbstractMessage; +import com.google.protobuf.Descriptors; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.List; + +/** + * Reflect all GET methods in the order of the fields in the protobuf message, then we can get the + * values of all the fields in the Message object by index. + * + * @author liuliu 2022/4/13 + */ +public class PbMessageGetter extends PbMessageAdaptor { + + private Method[] normalMethods; + private Method[] oneofCaseMethods; + private Method[] oneOfNumberMethods; + + public PbMessageGetter( + List fieldDescriptorList, + List oneofDescriptorList, + Class clazz) { + super(fieldDescriptorList, oneofDescriptorList); + initMethods(fieldDescriptorList, oneofDescriptorList, clazz); + } + + /** + * Get the value in the protocolMessage object by fieldDescriptor index + * + * @param object protocolMessage + * @param index index of fieldDescriptor + */ + public Object getByIndex(Object object, int index) { + try { + return normalMethods[index].invoke(object); + } catch (IllegalAccessException | InvocationTargetException e) { + throw new ChunJunRuntimeException( + String.format( + "failed to get filed from flink type[%s],index[%s]", object, index), + e); + } + } + + /** + * If the index is a member of oneof,returns the corresponding index of the valid value in oneof + * + * @param object protocolMessage + * @param index index of fieldDescriptor + */ + public Integer getOneofCase(Object object, int index) + throws InvocationTargetException, IllegalAccessException { + int oneofIndex = normalToOneof[index]; + return (Integer) + oneOfNumberMethods[oneofIndex].invoke(oneofCaseMethods[oneofIndex].invoke(object)); + } + + public void initMethods( + List fieldDescriptorList, + List oneofDescriptorList, + Class clazz) { + this.normalMethods = + fieldDescriptorList.stream() + .map(fieldDescriptor -> obtainNormalGetMethod(fieldDescriptor, clazz)) + .toArray(Method[]::new); + this.oneofCaseMethods = + oneofDescriptorList.stream() + .map(oneofDescriptor -> obtainOneofCaseMethod(oneofDescriptor, clazz)) + .toArray(Method[]::new); + this.oneOfNumberMethods = + oneofDescriptorList.stream() + .map(oneofDescriptor -> obtainOneofNumberMethod(oneofDescriptor, clazz)) + .toArray(Method[]::new); + } + + public Method obtainOneofNumberMethod( + Descriptors.OneofDescriptor oneofDescriptor, Class clazz) { + Class caseClass; + try { + caseClass = Class.forName(clazz.getName() + "$" + oneofDescriptor.getName() + "Case"); + } catch (ClassNotFoundException e) { + throw new ChunJunRuntimeException( + String.format( + "failed to get OneOfCase Class,caseName=%s", + oneofDescriptor.getName())); + } + return obtainMethod("getNumber", caseClass); + } + + public Method obtainOneofCaseMethod( + Descriptors.OneofDescriptor oneofDescriptor, Class clazz) { + return obtainMethod("get" + oneofDescriptor.getName() + "Case", clazz); + } + + /** + * obtain get Method . methodName = get+descriptorName. if repeated,methodName = + * get+descriptorName+List + */ + public Method obtainNormalGetMethod( + Descriptors.FieldDescriptor fieldDescriptor, Class clazz) { + StringBuilder stringBuilder = new StringBuilder("get"); + stringBuilder.append(fieldDescriptor.getName()); + if (!fieldDescriptor.isMapField() && fieldDescriptor.isRepeated()) { + stringBuilder.append("List"); + } + return obtainMethod(stringBuilder.toString(), clazz); + } +} diff --git a/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/deserialize/PbParser.java b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/deserialize/PbParser.java new file mode 100644 index 0000000000..1a4f3c594a --- /dev/null +++ b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/deserialize/PbParser.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.protobuf.deserialize; + +import com.dtstack.chunjun.format.protobuf.util.PbReflectUtil; +import com.dtstack.chunjun.throwable.ChunJunRuntimeException; + +import java.lang.reflect.Method; + +/** + * trans byte[] to protobuf object by protobuf parseFrom method + * + * @author liuliu 2022/4/8 + */ +public class PbParser { + + private Class messageClass; + private Method parseMethod; + + public PbParser(String messageClassName) throws NoSuchMethodException { + messageClass = PbReflectUtil.getClassByClassName(messageClassName); + parseMethod = messageClass.getMethod("parseFrom", byte[].class); + parseMethod.setAccessible(true); + } + + public Object parse(byte[] bytes) { + try { + return parseMethod.invoke(messageClass, bytes); + } catch (Exception e) { + throw new ChunJunRuntimeException( + String.format( + "Failed to deserialize protocol data from byte[] to message object,messageClass=%s", + messageClass), + e); + } + } +} diff --git a/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/deserialize/PbRowDataDeserializationSchema.java b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/deserialize/PbRowDataDeserializationSchema.java new file mode 100644 index 0000000000..aea96a9b6d --- /dev/null +++ b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/deserialize/PbRowDataDeserializationSchema.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.protobuf.deserialize; + +import com.dtstack.chunjun.format.protobuf.util.FormatCheckUtil; +import com.dtstack.chunjun.throwable.ChunJunRuntimeException; + +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.RowType; + +import java.util.Objects; + +/** @author liuliu 2022/4/8 */ +public class PbRowDataDeserializationSchema implements DeserializationSchema { + + private static final long serialVersionUID = 1L; + + private PbParser protoMessageTransformer; + private PbToRowDataPbConverter.ProtoToRowDataConverter runtimeConverter; + private TypeInformation typeInformation; + private String messageClassName; + private RowType rowType; + + public PbRowDataDeserializationSchema( + RowType rowType, TypeInformation typeInformation, String protoOutClassName) { + this.rowType = rowType; + this.typeInformation = typeInformation; + this.messageClassName = protoOutClassName; + new FormatCheckUtil(rowType, messageClassName).isValid(); + } + + @Override + public void open(InitializationContext context) throws Exception { + protoMessageTransformer = new PbParser(messageClassName); + runtimeConverter = PbToRowDataPbConverter.initMessageDataConverter(messageClassName); + } + + @Override + public RowData deserialize(byte[] message) { + if (message == null) { + return null; + } + Object protoMessage = protoMessageTransformer.parse(message); + try { + return (RowData) runtimeConverter.convert(protoMessage); + } catch (Exception e) { + throw new ChunJunRuntimeException("Failed to convert protobuf record to RowData", e); + } + } + + @Override + public boolean isEndOfStream(RowData nextElement) { + return false; + } + + @Override + public TypeInformation getProducedType() { + return typeInformation; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + PbRowDataDeserializationSchema that = (PbRowDataDeserializationSchema) o; + return messageClassName.equals(that.messageClassName); + } + + @Override + public int hashCode() { + return Objects.hash(messageClassName); + } +} diff --git a/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/deserialize/PbToRowDataPbConverter.java b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/deserialize/PbToRowDataPbConverter.java new file mode 100644 index 0000000000..6076551c76 --- /dev/null +++ b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/deserialize/PbToRowDataPbConverter.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.protobuf.deserialize; + +import com.dtstack.chunjun.format.protobuf.PbFormatType; +import com.dtstack.chunjun.throwable.ChunJunRuntimeException; + +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.api.java.tuple.Tuple4; +import org.apache.flink.table.data.GenericArrayData; +import org.apache.flink.table.data.GenericMapData; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.StringData; + +import com.google.protobuf.AbstractMessage; +import com.google.protobuf.ByteString; +import com.google.protobuf.Descriptors; + +import java.io.Serializable; +import java.lang.reflect.InvocationTargetException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static com.dtstack.chunjun.format.protobuf.util.PbReflectUtil.getClassByFieldDescriptor; +import static com.dtstack.chunjun.format.protobuf.util.PbReflectUtil.getDescriptorByMessageClassName; +import static com.dtstack.chunjun.format.protobuf.util.PbReflectUtil.getMapTypeTuple; + +/** @author liuliu 2022/4/11 Converte from proto to RowData */ +public class PbToRowDataPbConverter { + + /** proto class full name example as com.dtstack.messageOutClass */ + protected static String PROTO_CLASS_NAME; + /** proto class package name example as com.dtstack */ + protected static String PROTO_PACKAGE_NAME; + + public interface ProtoToRowDataConverter extends Serializable { + Object convert(Object object) throws InvocationTargetException, IllegalAccessException; + } + + /** + * create a runtime converter for row by specific protoMessageClassName and init {@link + * PbToRowDataPbConverter#PROTO_PACKAGE_NAME} and {@link + * PbToRowDataPbConverter#PROTO_CLASS_NAME} + */ + public static ProtoToRowDataConverter initMessageDataConverter(String messageClassName) { + try { + PROTO_CLASS_NAME = messageClassName.substring(0, messageClassName.lastIndexOf("$")); + PROTO_PACKAGE_NAME = PROTO_CLASS_NAME.substring(0, PROTO_CLASS_NAME.lastIndexOf(".")); + return createMessageDataConverter(messageClassName); + } catch (IndexOutOfBoundsException e) { + throw new ChunJunRuntimeException( + String.format("Incorrect proto message class name:%s", messageClassName), e); + } + } + + /** + * create a runtime converter for row by specific messageClassName example as + * com.dtstack.MessageOutClass$Message + */ + public static ProtoToRowDataConverter createMessageDataConverter(String protoMessageClassName) { + Tuple2> + descriptorByMessageClassName = + getDescriptorByMessageClassName(protoMessageClassName); + + return createMessageDataConverter( + descriptorByMessageClassName.f0.getFields(), + descriptorByMessageClassName.f0.getOneofs(), + descriptorByMessageClassName.f1); + } + + /** create a runtime converter for row by specific fieldDescriptor */ + public static ProtoToRowDataConverter createMessageDataConverter( + Descriptors.FieldDescriptor fieldDescriptor) { + + Descriptors.Descriptor messageType = fieldDescriptor.getMessageType(); + return createMessageDataConverter( + messageType.getFields(), + messageType.getOneofs(), + getClassByFieldDescriptor( + PROTO_CLASS_NAME, PROTO_PACKAGE_NAME, fieldDescriptor, null)); + } + + public static ProtoToRowDataConverter createMessageDataConverter( + List fieldDescriptorList, + List oneofDescriptorList, + Class clazz) { + PbMessageGetter pbMessageGetter = + new PbMessageGetter(fieldDescriptorList, oneofDescriptorList, clazz); + ProtoToRowDataConverter[] protoToRowDataConverters = + fieldDescriptorList.stream() + .map(PbToRowDataPbConverter::createNullableConverter) + .toArray(ProtoToRowDataConverter[]::new); + return object -> { + int size = pbMessageGetter.getSize(); + GenericRowData genericRowData = new GenericRowData(size); + for (int i = 0, index = 0; i < protoToRowDataConverters.length && index < size; ) { + if (pbMessageGetter.isOneOf(i)) { + int lastIndex = pbMessageGetter.getLastOneOfIndex(i); + GenericRowData oneofGenericRowData = new GenericRowData(lastIndex - i + 2); + oneofGenericRowData.setField(0, pbMessageGetter.getOneofCase(object, i)); + int currentCase = 1; + while (currentCase < oneofGenericRowData.getArity()) { + Object o = pbMessageGetter.getByIndex(object, i); + oneofGenericRowData.setField( + currentCase++, protoToRowDataConverters[i++].convert(o)); + } + genericRowData.setField(index++, oneofGenericRowData); + } else { + Object o = pbMessageGetter.getByIndex(object, i); + genericRowData.setField(index++, protoToRowDataConverters[i++].convert(o)); + } + } + return genericRowData; + }; + } + + /** Creates a runtime converter which is null safe. */ + private static ProtoToRowDataConverter createNullableConverter( + Descriptors.FieldDescriptor descriptor) { + return createNullableConverter( + descriptor, PbFormatType.getTypeByFieldDescriptor(descriptor)); + } + + private static ProtoToRowDataConverter createNullableConverter( + Descriptors.FieldDescriptor descriptor, PbFormatType protoType) { + final ProtoToRowDataConverter converter = createConverter(descriptor, protoType); + return protoObject -> { + if (protoObject == null) { + return null; + } + return converter.convert(protoObject); + }; + } + + /** + * In standard ProtocolBuffers, mapKey can be any integral or string type (so, any scalar type + * except for floating point types and bytes), mapValue not can be mapType and arrayType + * + * @return a runtime map converter + */ + public static ProtoToRowDataConverter createMapConverter( + Descriptors.FieldDescriptor fieldDescriptor) { + // get builder method + Tuple4 mapTypeTuple = + getMapTypeTuple(PROTO_CLASS_NAME, PROTO_PACKAGE_NAME, fieldDescriptor); + // create keyConverter and valueConverter + final ProtoToRowDataConverter keyConverter = createNullableConverter(null, mapTypeTuple.f0); + final ProtoToRowDataConverter valueConverter; + if (mapTypeTuple.f1 == PbFormatType.MESSAGE) { + valueConverter = createMessageDataConverter(mapTypeTuple.f3.getName()); + } else { + // scalar type/enum + valueConverter = createConverter(null, mapTypeTuple.f1); + } + // create runtime map converter + return object -> { + final Map map = (Map) object; + Map result = new HashMap<>(); + for (Map.Entry entry : map.entrySet()) { + Object key = keyConverter.convert(entry.getKey()); + Object value = valueConverter.convert(entry.getValue()); + result.put(key, value); + } + return new GenericMapData(result); + }; + } + + /** + * In standard ProtocolBuffers, element can only be map、message、scalar + * + * @return a runtime array converter + */ + public static ProtoToRowDataConverter createArrayConverter( + Descriptors.FieldDescriptor fieldDescriptor) { + PbFormatType protoType = PbFormatType.getArrayInnerTypeByFieldDescriptor(fieldDescriptor); + final ProtoToRowDataConverter elementConverter = + createConverter(fieldDescriptor, protoType); + return object -> { + final List list = (List) object; + final int length = list.size(); + final Object[] array = new Object[length]; + for (int i = 0; i < length; ++i) { + array[i] = elementConverter.convert(list.get(i)); + } + return new GenericArrayData(array); + }; + } + + public static ProtoToRowDataConverter createConverter( + Descriptors.FieldDescriptor fieldDescriptor, PbFormatType protoType) { + switch (protoType) { + case ENUM: + case STRING: + return object -> StringData.fromString(object.toString()); + case INT: + case LONG: + case FLOAT: + case DOUBLE: + case BOOLEAN: + return object -> object; + case BYTE_STRING: + return object -> ((ByteString) object).toByteArray(); + case MAP: + return createMapConverter(fieldDescriptor); + case MESSAGE: + return createMessageDataConverter(fieldDescriptor); + case ARRAY: + return createArrayConverter(fieldDescriptor); + default: + throw new UnsupportedOperationException(protoType.name()); + } + } +} diff --git a/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/serialize/PbMessageSetter.java b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/serialize/PbMessageSetter.java new file mode 100644 index 0000000000..0d67ddbd7c --- /dev/null +++ b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/serialize/PbMessageSetter.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.protobuf.serialize; + +import com.dtstack.chunjun.format.protobuf.PbMessageAdaptor; +import com.dtstack.chunjun.throwable.ChunJunRuntimeException; + +import com.google.protobuf.AbstractMessage; +import com.google.protobuf.Descriptors; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.Arrays; +import java.util.List; + +/** + * Reflect all Builder#Set methods in the order of the fields in the Protobuf Message, then we can + * set the values by index. + * + * @author liuliu 2022/4/26 + */ +public class PbMessageSetter extends PbMessageAdaptor { + + private AbstractMessage.Builder builder; + private Method[] normalMethods; + + public PbMessageSetter( + List fieldDescriptorList, + List oneofDescriptorList, + Class clazz) { + super(fieldDescriptorList, oneofDescriptorList); + initBuilder(clazz); + initMethods(fieldDescriptorList, builder.getClass()); + } + + private void initBuilder(Class builderClass) { + try { + Method newBuilder = builderClass.getMethod("newBuilder"); + this.builder = (AbstractMessage.Builder) newBuilder.invoke(builderClass); + } catch (Exception e) { + e.printStackTrace(); + } + } + + public void setByIndex(Object o, int index) throws ChunJunRuntimeException { + try { + + normalMethods[index].invoke(builder, o); + } catch (InvocationTargetException + | IllegalAccessException + | IllegalArgumentException exception) { + System.out.println(normalMethods[index].getName()); + Arrays.stream(normalMethods[index].getParameterTypes()) + .forEach(p -> System.out.println(p.getName())); + System.out.println(o); + throw new ChunJunRuntimeException( + String.format( + "failed to set field by protobuf builder,builder class[%s],index[%s],filed[%s]", + builder.getClass().getName(), index, o), + exception); + } + } + + public Object build() { + Object result = builder.build(); + builder.clear(); + return result; + } + + private void initMethods(List fieldDescriptorList, Class clazz) { + normalMethods = + fieldDescriptorList.stream() + .map( + f -> { + String action = "set"; + if (f.isMapField()) { + action = "putAll"; + } else if (f.isRepeated()) { + action = "addAll"; + } + return action + f.getName(); + }) + .map(methodName -> obtainMethod(methodName, clazz)) + .toArray(Method[]::new); + } +} diff --git a/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/serialize/PbRowDataSerializationSchema.java b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/serialize/PbRowDataSerializationSchema.java new file mode 100644 index 0000000000..22fb685bda --- /dev/null +++ b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/serialize/PbRowDataSerializationSchema.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.protobuf.serialize; + +import com.dtstack.chunjun.format.protobuf.util.FormatCheckUtil; +import com.dtstack.chunjun.throwable.ChunJunRuntimeException; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.RowType; + +import com.google.protobuf.AbstractMessage; + +import java.util.Objects; + +/** @author liuliu 2022/4/26 */ +public class PbRowDataSerializationSchema implements SerializationSchema { + private static final long serialVersionUID = 1L; + + private final RowType rowType; + private final String messageClassName; + private RowDataToPbConverter.RowDataToProtoConverter runtimeConverter; + + public PbRowDataSerializationSchema(RowType rowType, String messageClassName) { + this.rowType = rowType; + this.messageClassName = messageClassName; + new FormatCheckUtil(rowType, messageClassName).isValid(); + } + + @Override + public void open(InitializationContext context) { + runtimeConverter = RowDataToPbConverter.initMessageDataConverter(rowType, messageClassName); + } + + @Override + public byte[] serialize(RowData element) { + if (element == null) { + return null; + } + try { + return ((AbstractMessage) runtimeConverter.convert(element)).toByteArray(); + } catch (Exception e) { + throw new ChunJunRuntimeException( + String.format( + "Failed to convert RowData to protobuf record,RowData:%s", element), + e); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + PbRowDataSerializationSchema that = (PbRowDataSerializationSchema) o; + return messageClassName.equals(that.messageClassName); + } + + @Override + public int hashCode() { + return Objects.hash(messageClassName); + } +} diff --git a/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/serialize/RowDataToPbConverter.java b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/serialize/RowDataToPbConverter.java new file mode 100644 index 0000000000..625ffe6223 --- /dev/null +++ b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/serialize/RowDataToPbConverter.java @@ -0,0 +1,328 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.protobuf.serialize; + +import com.dtstack.chunjun.format.protobuf.PbFormatType; +import com.dtstack.chunjun.throwable.ChunJunRuntimeException; + +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.api.java.tuple.Tuple4; +import org.apache.flink.table.data.GenericArrayData; +import org.apache.flink.table.data.GenericMapData; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.binary.BinaryArrayData; +import org.apache.flink.table.data.binary.BinaryMapData; +import org.apache.flink.table.types.logical.ArrayType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.MapType; +import org.apache.flink.table.types.logical.RowType; + +import com.google.protobuf.AbstractMessage; +import com.google.protobuf.ByteString; +import com.google.protobuf.Descriptors; +import com.google.protobuf.ProtocolMessageEnum; + +import java.io.Serializable; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static com.dtstack.chunjun.format.protobuf.util.PbReflectUtil.getClassByClassName; +import static com.dtstack.chunjun.format.protobuf.util.PbReflectUtil.getClassByFieldDescriptor; +import static com.dtstack.chunjun.format.protobuf.util.PbReflectUtil.getClassNameByFullName; +import static com.dtstack.chunjun.format.protobuf.util.PbReflectUtil.getDescriptorByMessageClassName; +import static com.dtstack.chunjun.format.protobuf.util.PbReflectUtil.getMapTypeTuple; + +/** @author liuliu 2022/4/26 */ +public class RowDataToPbConverter { + + /** proto class full name example as com.dtstack.messageOutClass */ + protected static String PROTO_CLASS_NAME; + /** proto class package name example as com.dtstack */ + protected static String PROTO_PACKAGE_NAME; + + public interface RowDataToProtoConverter extends Serializable { + Object convert(Object object) throws InvocationTargetException, IllegalAccessException; + } + + /** + * create a runtime converter for row by specific protoMessageClassName and init {@link + * RowDataToPbConverter#PROTO_PACKAGE_NAME} and {@link RowDataToPbConverter#PROTO_CLASS_NAME} + */ + public static RowDataToProtoConverter initMessageDataConverter( + LogicalType logicalType, String messageClassName) { + try { + PROTO_CLASS_NAME = messageClassName.substring(0, messageClassName.lastIndexOf("$")); + PROTO_PACKAGE_NAME = PROTO_CLASS_NAME.substring(0, PROTO_CLASS_NAME.lastIndexOf(".")); + return createMessageDataConverter(logicalType, messageClassName); + } catch (IndexOutOfBoundsException e) { + throw new ChunJunRuntimeException( + String.format("Incorrect proto message class name:%s", messageClassName), e); + } + } + + /** + * create a runtime converter for row by specific messageClassName example as + * com.dtstack.MessageOutClass$Message + */ + private static RowDataToProtoConverter createMessageDataConverter( + LogicalType logicalType, String protoMessageClassName) { + Tuple2> + descriptorByMessageClassName = + getDescriptorByMessageClassName(protoMessageClassName); + return createMessageDataConverter( + logicalType, + descriptorByMessageClassName.f0.getFields(), + descriptorByMessageClassName.f0.getOneofs(), + descriptorByMessageClassName.f1); + } + + /** create a runtime converter for row by specific fieldDescriptor */ + public static RowDataToProtoConverter createMessageDataConverter( + LogicalType logicalType, Descriptors.FieldDescriptor fieldDescriptor) { + + Descriptors.Descriptor messageType = fieldDescriptor.getMessageType(); + return createMessageDataConverter( + logicalType, + messageType.getFields(), + messageType.getOneofs(), + getClassByFieldDescriptor( + PROTO_CLASS_NAME, PROTO_PACKAGE_NAME, fieldDescriptor, null)); + } + + private static RowDataToProtoConverter createMessageDataConverter( + LogicalType logicalType, + List fieldDescriptorList, + List oneofDescriptorList, + Class clazz) { + RowDataToProtoConverter[] rowDataToProtoConverters = + new RowDataToProtoConverter[fieldDescriptorList.size()]; + PbMessageSetter pbMessageSetter = + new PbMessageSetter(fieldDescriptorList, oneofDescriptorList, clazz); + int currentIndex = 0; + for (LogicalType childLogicalType : logicalType.getChildren()) { + if (childLogicalType instanceof RowType && pbMessageSetter.isOneOf(currentIndex)) { + for (int i = 1; i < ((RowType) childLogicalType).getFieldCount(); i++) { + rowDataToProtoConverters[currentIndex] = + createNullableConverter( + ((RowType) childLogicalType).getTypeAt(i), + fieldDescriptorList.get(currentIndex++)); + } + } else { + rowDataToProtoConverters[currentIndex] = + createNullableConverter( + childLogicalType, fieldDescriptorList.get(currentIndex++)); + } + } + return object -> { + GenericRowData genericRowData = (GenericRowData) object; + for (int i = 0, index = 0; + i < fieldDescriptorList.size() && index < genericRowData.getArity(); + i++) { + if (pbMessageSetter.isOneOf(i)) { + GenericRowData oneofGenericRowData = + (GenericRowData) genericRowData.getField(index++); + int oneofCase = oneofGenericRowData.getInt(0); + int caseInRow = oneofCase - i; + pbMessageSetter.setByIndex( + rowDataToProtoConverters[oneofCase - 1].convert( + oneofGenericRowData.getField(caseInRow)), + oneofCase - 1); + i = pbMessageSetter.getLastOneOfIndex(i); + } else { + pbMessageSetter.setByIndex( + rowDataToProtoConverters[i].convert(genericRowData.getField(index++)), + i); + } + } + return pbMessageSetter.build(); + }; + } + + /** Creates a runtime converter which is null safe. */ + private static RowDataToProtoConverter createNullableConverter( + LogicalType logicalType, Descriptors.FieldDescriptor descriptor) { + return createNullableConverter( + logicalType, descriptor, PbFormatType.getTypeByFieldDescriptor(descriptor)); + } + + private static RowDataToProtoConverter createNullableConverter( + LogicalType logicalType, + Descriptors.FieldDescriptor descriptor, + PbFormatType protoType) { + final RowDataToProtoConverter converter = + createConverter(logicalType, descriptor, protoType); + return protoObject -> { + if (protoObject == null) { + return null; + } + return converter.convert(protoObject); + }; + } + + public static RowDataToProtoConverter createConverter( + LogicalType logicalType, + Descriptors.FieldDescriptor fieldDescriptor, + PbFormatType protoType) { + switch (protoType) { + case ENUM: + return createEnumConverter(fieldDescriptor); + case STRING: + return Object::toString; + case INT: + case LONG: + case FLOAT: + case DOUBLE: + case BOOLEAN: + return object -> object; + case BYTE_STRING: + return object -> ByteString.copyFrom((byte[]) object); + case MAP: + return createMapConverter(logicalType, fieldDescriptor); + case MESSAGE: + return createMessageDataConverter(logicalType, fieldDescriptor); + case ARRAY: + return createArrayConverter(logicalType, fieldDescriptor); + default: + throw new UnsupportedOperationException(protoType.name()); + } + } + + /** + * In standard ProtocolBuffers, mapKey can be any integral or string type (so, any scalar type + * except for floating point types and bytes), mapValue not can be mapType and arrayType + * + * @return a runtime map converter + */ + private static RowDataToProtoConverter createMapConverter( + LogicalType logicalType, Descriptors.FieldDescriptor fieldDescriptor) { + Tuple4 mapTypeTuple = + getMapTypeTuple(PROTO_CLASS_NAME, PROTO_PACKAGE_NAME, fieldDescriptor); + LogicalType keyType = ((MapType) logicalType).getKeyType(); + LogicalType valueType = ((MapType) logicalType).getValueType(); + // create keyConverter and valueConverter + final RowDataToProtoConverter keyConverter = + createNullableConverter(keyType, null, mapTypeTuple.f0); + final RowDataToProtoConverter valueConverter; + if (mapTypeTuple.f1 == PbFormatType.MESSAGE) { + valueConverter = createMessageDataConverter(valueType, mapTypeTuple.f3.getName()); + } else { + // scalar type/enum + valueConverter = createConverter(valueType, null, mapTypeTuple.f1); + } + + return object -> { + Map result = new HashMap<>(); + Object[] keyArray; + Object[] valueArray; + if (object instanceof BinaryMapData) { + keyArray = ((BinaryMapData) object).keyArray().toObjectArray(keyType); + valueArray = ((BinaryMapData) object).valueArray().toObjectArray(valueType); + } else if (object instanceof GenericMapData) { + keyArray = + ((GenericArrayData) ((GenericMapData) object).keyArray()).toObjectArray(); + valueArray = + ((GenericArrayData) ((GenericMapData) object).valueArray()).toObjectArray(); + } else { + throw new UnsupportedOperationException( + "protobuf-x format Map serializer only support BinaryMapData and GenericMapData"); + } + for (int i = 0; i < keyArray.length; i++) { + result.put( + keyConverter.convert(keyArray[i]), valueConverter.convert(valueArray[i])); + } + return result; + }; + } + + /** + * In standard ProtocolBuffers, element can only be map、message、scalar + * + * @return a runtime array converter + */ + private static RowDataToProtoConverter createArrayConverter( + LogicalType logicalType, Descriptors.FieldDescriptor fieldDescriptor) { + PbFormatType protoType = PbFormatType.getArrayInnerTypeByFieldDescriptor(fieldDescriptor); + LogicalType elementType = ((ArrayType) logicalType).getElementType(); + final RowDataToProtoConverter elementConverter = + createConverter(elementType, fieldDescriptor, protoType); + return object -> { + List list = new ArrayList<>(); + Object[] elements; + if (object instanceof BinaryArrayData) { + elements = ((BinaryArrayData) object).toObjectArray(elementType); + } else if (object instanceof GenericArrayData) { + elements = ((GenericArrayData) object).toObjectArray(); + } else { + throw new UnsupportedOperationException( + "protobuf-x format Repeated serializer only support BinaryArrayData and GenericArrayData"); + } + for (Object o : elements) { + list.add(elementConverter.convert(o)); + } + return list; + }; + } + + private static RowDataToProtoConverter createEnumConverter( + Descriptors.FieldDescriptor fieldDescriptor) { + EnumMessageAssemblers assemblers = new EnumMessageAssemblers(fieldDescriptor); + return object -> assemblers.valueOf(object.toString()); + } + + private static class EnumMessageAssemblers { + + private final Method valueOfMethod; + private final String enumClassName; + private final Class enumClass; + + public EnumMessageAssemblers(Descriptors.FieldDescriptor fieldDescriptor) { + this.enumClassName = + getClassNameByFullName( + PROTO_CLASS_NAME, + PROTO_PACKAGE_NAME, + fieldDescriptor.getEnumType().getFullName()); + enumClass = getClassByClassName(enumClassName); + try { + this.valueOfMethod = enumClass.getDeclaredMethod("valueOf", String.class); + valueOfMethod.setAccessible(true); + } catch (NoSuchMethodException e) { + throw new ChunJunRuntimeException( + String.format( + "Failed to get valueOf method from enum class [%s]", + enumClassName)); + } + } + + public Object valueOf(String enumString) { + try { + return valueOfMethod.invoke(enumClass, enumString); + } catch (Exception e) { + throw new ChunJunRuntimeException( + String.format( + "Failed to generate an enum object of class [%s] from the string [%s]", + enumClassName, enumString), + e); + } + } + } +} diff --git a/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/util/FormatCheckUtil.java b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/util/FormatCheckUtil.java new file mode 100644 index 0000000000..c63aee12ec --- /dev/null +++ b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/util/FormatCheckUtil.java @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.protobuf.util; + +import com.dtstack.chunjun.format.protobuf.PbFormatType; +import com.dtstack.chunjun.format.protobuf.PbMessageAdaptor; +import com.dtstack.chunjun.throwable.ChunJunRuntimeException; + +import org.apache.flink.api.java.tuple.Tuple4; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.types.logical.ArrayType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.MapType; +import org.apache.flink.table.types.logical.RowType; + +import com.google.protobuf.Descriptors; + +import java.util.ArrayList; +import java.util.List; + +import static com.dtstack.chunjun.format.protobuf.util.PbReflectUtil.getMapTypeTuple; + +/** @author liuliu 2022/5/11 */ +public class FormatCheckUtil { + + /** proto class full name example as com.dtstack.messageOutClass */ + protected String PROTO_CLASS_NAME; + /** proto class package name example as com.dtstack */ + protected String PROTO_PACKAGE_NAME; + + protected RowType rowType; + protected String messageClassName; + + public FormatCheckUtil(RowType rowType, String messageClassName) { + this.PROTO_CLASS_NAME = messageClassName.substring(0, messageClassName.lastIndexOf("$")); + this.PROTO_PACKAGE_NAME = PROTO_CLASS_NAME.substring(0, PROTO_CLASS_NAME.lastIndexOf(".")); + this.rowType = rowType; + this.messageClassName = messageClassName; + } + + /** Checks whether the DDL statement matches the Protobuf message */ + public void isValid() { + RowType messageLogicalType = createMessageLogicalType(messageClassName); + if (!rowTypeEqual(messageLogicalType, rowType)) { + throw new ChunJunRuntimeException( + String.format( + "protobuf class %s except rowType like\n" + " %s\n" + "but\n %s", + messageClassName, messageLogicalType, rowType)); + } + } + + private boolean logicalTypeEqual(LogicalType logicalType, LogicalType target) { + boolean flag; + if (logicalType instanceof RowType && target instanceof RowType) { + flag = rowTypeEqual((RowType) logicalType, (RowType) target); + } else if (logicalType instanceof MapType && target instanceof MapType) { + flag = mapTypeEqual((MapType) logicalType, (MapType) target); + } else if (logicalType instanceof ArrayType && target instanceof ArrayType) { + flag = arrayTypeEqual((ArrayType) logicalType, (ArrayType) target); + } else { + flag = logicalType.getClass() == target.getClass(); + } + return flag; + } + + private boolean arrayTypeEqual(ArrayType arrayType, ArrayType target) { + return logicalTypeEqual(arrayType.getElementType(), target.getElementType()); + } + + private boolean mapTypeEqual(MapType mapType, MapType target) { + return logicalTypeEqual(mapType.getKeyType(), target.getKeyType()) + && logicalTypeEqual(mapType.getValueType(), target.getValueType()); + } + + public boolean rowTypeEqual(RowType rowType, RowType target) { + List fields = rowType.getFields(); + List targetFields = target.getFields(); + int size = fields.size(); + if (size == targetFields.size()) { + for (int i = 0; i < size; i++) { + RowType.RowField field = fields.get(i); + RowType.RowField targetField = targetFields.get(i); + if (!logicalTypeEqual(field.getType(), targetField.getType())) { + return false; + } + } + return true; + } else { + return false; + } + } + + /** create rowType by protoMessageClass */ + public RowType createMessageLogicalType(String protoMessageClass) { + Descriptors.Descriptor descriptor = + PbReflectUtil.getDescriptorByMessageClassName(protoMessageClass).f0; + return createMessageLogicalType(descriptor); + } + + private RowType createMessageLogicalType(Descriptors.Descriptor descriptor) { + return createMessageLogicalType(descriptor.getFields(), descriptor.getOneofs()); + } + + private RowType createMessageLogicalType(Descriptors.FieldDescriptor descriptor) { + return createMessageLogicalType( + descriptor.getMessageType().getFields(), descriptor.getMessageType().getOneofs()); + } + + private RowType createMessageLogicalType( + List descriptorList, + List oneofDescriptorList) { + PbMessageAdaptor pbMessageAdaptor = + new PbMessageAdaptor(descriptorList, oneofDescriptorList); + + List rowFieldList = new ArrayList<>(); + int indexOfOneof = 0; + for (int i = 0; i < descriptorList.size(); ) { + if (pbMessageAdaptor.isOneOf(i)) { + + List oneofRowFieldList = new ArrayList<>(); + oneofRowFieldList.add( + new RowType.RowField("case", DataTypes.INT().getLogicalType())); + Integer lastOneOfIndex = pbMessageAdaptor.getLastOneOfIndex(i); + while (i <= lastOneOfIndex) { + Descriptors.FieldDescriptor descriptor = descriptorList.get(i++); + oneofRowFieldList.add( + new RowType.RowField( + descriptor.getName(), createLogicalType(descriptor))); + } + rowFieldList.add( + new RowType.RowField( + oneofDescriptorList.get(indexOfOneof++).getName(), + new RowType(oneofRowFieldList))); + + } else { + Descriptors.FieldDescriptor descriptor = descriptorList.get(i++); + rowFieldList.add( + new RowType.RowField(descriptor.getName(), createLogicalType(descriptor))); + } + } + return new RowType(rowFieldList); + } + + private LogicalType createLogicalType(Descriptors.FieldDescriptor fieldDescriptor) { + return createLogicalType( + fieldDescriptor, PbFormatType.getTypeByFieldDescriptor(fieldDescriptor)); + } + + private LogicalType createLogicalType(PbFormatType protoType) { + return createLogicalType(null, protoType); + } + + private LogicalType createLogicalType( + Descriptors.FieldDescriptor fieldDescriptor, PbFormatType protoType) { + switch (protoType) { + case ENUM: + case STRING: + return DataTypes.STRING().getLogicalType(); + case INT: + return DataTypes.INT().getLogicalType(); + case LONG: + return DataTypes.BIGINT().getLogicalType(); + case FLOAT: + return DataTypes.FLOAT().getLogicalType(); + case DOUBLE: + return DataTypes.DOUBLE().getLogicalType(); + case BOOLEAN: + return DataTypes.BOOLEAN().getLogicalType(); + case BYTE_STRING: + return DataTypes.BYTES().getLogicalType(); + case MAP: + return createMapLogicalType(fieldDescriptor); + case MESSAGE: + return createMessageLogicalType(fieldDescriptor); + case ARRAY: + return createArrayLogicalType(fieldDescriptor); + default: + throw new UnsupportedOperationException(protoType.name()); + } + } + + private LogicalType createArrayLogicalType(Descriptors.FieldDescriptor fieldDescriptor) { + PbFormatType protoType = PbFormatType.getArrayInnerTypeByFieldDescriptor(fieldDescriptor); + return new ArrayType(createLogicalType(fieldDescriptor, protoType)); + } + + private LogicalType createMapLogicalType(Descriptors.FieldDescriptor fieldDescriptor) { + // get builder method + Tuple4 mapTypeTuple = + getMapTypeTuple(PROTO_CLASS_NAME, PROTO_PACKAGE_NAME, fieldDescriptor); + LogicalType keyLogicalType = createLogicalType(mapTypeTuple.f0); + LogicalType valueLogicalType; + if (mapTypeTuple.f1 == PbFormatType.MESSAGE) { + valueLogicalType = createMessageLogicalType(mapTypeTuple.f3.getName()); + } else { + // scalar type/enum + valueLogicalType = createLogicalType(mapTypeTuple.f1); + } + return new MapType(keyLogicalType, valueLogicalType); + } +} diff --git a/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/util/PbReflectUtil.java b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/util/PbReflectUtil.java new file mode 100644 index 0000000000..222fdd990e --- /dev/null +++ b/chunjun-formats/chunjun-format-protobuf/src/main/java/com/dtstack/chunjun/format/protobuf/util/PbReflectUtil.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.protobuf.util; + +import com.dtstack.chunjun.format.protobuf.PbFormatType; +import com.dtstack.chunjun.throwable.ChunJunRuntimeException; + +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.api.java.tuple.Tuple4; + +import com.google.protobuf.AbstractMessage; +import com.google.protobuf.Descriptors; + +import java.lang.reflect.Method; +import java.util.Arrays; + +/** @author liuliu 2022/4/26 */ +public abstract class PbReflectUtil { + + public static Tuple2> + getDescriptorByMessageClassName(String protoMessageClassName) { + Class clazz; + try { + clazz = getClassByClassName(protoMessageClassName); + return Tuple2.of(getDescriptorByClass(clazz), clazz); + } catch (Exception e) { + throw new ChunJunRuntimeException("failed to get proto Descriptor", e); + } + } + + public static Class getClassByClassName(String protoMessageClassName) { + try { + return Class.forName(protoMessageClassName); + } catch (Exception e) { + throw new ChunJunRuntimeException( + String.format( + "failed to get proto class by className %s", protoMessageClassName), + e); + } + } + + public static Descriptors.Descriptor getDescriptorByClass( + Class clazz) { + try { + Method getDescriptor = clazz.getMethod("getDescriptor"); + getDescriptor.setAccessible(true); + return (Descriptors.Descriptor) getDescriptor.invoke(clazz); + } catch (Exception e) { + throw new ChunJunRuntimeException( + String.format( + "failed to get proto Descriptor,final class name=%s", clazz.getName()), + e); + } + } + + public static String getClassNameByFullName( + String protoClassName, String protoPackageName, String fullName) { + return protoClassName + + fullName.substring(protoPackageName.length()).replaceAll("\\.", "\\$"); + } + + /** @param suffix example as $Builder */ + public static Class getClassByFieldDescriptor( + String protoClassName, + String protoPackageName, + Descriptors.FieldDescriptor fieldDescriptor, + String suffix) { + String className = null; + try { + className = + getClassNameByFullName( + protoClassName, + protoPackageName, + fieldDescriptor.getMessageType().getFullName()); + if (suffix != null) { + int index = + className.lastIndexOf("$") == -1 + ? className.lastIndexOf(".") + : className.lastIndexOf("$"); + className = className.substring(0, index) + suffix; + } + return Class.forName(className); + } catch (Exception e) { + throw new ChunJunRuntimeException( + String.format( + "cannot found class by fieldDescriptor,messageFullName=%s,final className=%s}", + fieldDescriptor.getMessageType().getFullName(), className)); + } + } + + public static Tuple4 getMapTypeTuple( + String protoClassName, + String protoPackageName, + Descriptors.FieldDescriptor fieldDescriptor) { + // get builder method + Class builderClass = + getClassByFieldDescriptor( + protoClassName, protoPackageName, fieldDescriptor, "$Builder"); + Method[] methods = + Arrays.stream(builderClass.getMethods()) + .filter( + method -> + method.getName() + .equalsIgnoreCase( + "put" + fieldDescriptor.getName())) + .toArray(Method[]::new); + // get map key and value type by builder method + assert methods.length == 1; + Class keyClass = methods[0].getParameterTypes()[0]; + Class valueClass = methods[0].getParameterTypes()[1]; + PbFormatType keyProtoType = PbFormatType.getProtoTypeForMapKey(keyClass); + PbFormatType valueProtoType = PbFormatType.getProtoTypeForMapValue(valueClass); + return Tuple4.of(keyProtoType, valueProtoType, keyClass, valueClass); + } +} diff --git a/chunjun-formats/chunjun-format-protobuf/src/test/java/com/dtstack/chunjun/format/protobuf/test/MessageTestOuterClass.java b/chunjun-formats/chunjun-format-protobuf/src/test/java/com/dtstack/chunjun/format/protobuf/test/MessageTestOuterClass.java new file mode 100644 index 0000000000..9e89b2649e --- /dev/null +++ b/chunjun-formats/chunjun-format-protobuf/src/test/java/com/dtstack/chunjun/format/protobuf/test/MessageTestOuterClass.java @@ -0,0 +1,2816 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Generated by the protocol buffer compiler. DO NOT EDIT! +// source: MessageTest.proto + +package com.dtstack.chunjun.format.protobuf.test; + +public final class MessageTestOuterClass { + private MessageTestOuterClass() {} + + public static void registerAllExtensions(com.google.protobuf.ExtensionRegistryLite registry) {} + + public static void registerAllExtensions(com.google.protobuf.ExtensionRegistry registry) { + registerAllExtensions((com.google.protobuf.ExtensionRegistryLite) registry); + } + + public interface VariantOrBuilder + extends + // @@protoc_insertion_point(interface_extends:com.dtstack.chunjun.format.protobuf.test.Variant) + com.google.protobuf.MessageOrBuilder { + + /** bool boolx = 1; */ + boolean getBoolx(); + + /** bool ValueBool2 = 2; */ + boolean getValueBool2(); + + /** int32 ValueInt32 = 3; */ + int getValueInt32(); + + /** bool booly = 4; */ + boolean getBooly(); + + public Variant.Value2Case getValue2Case(); + } + /** Protobuf type {@code com.dtstack.chunjun.format.protobuf.test.Variant} */ + public static final class Variant extends com.google.protobuf.GeneratedMessageV3 + implements + // @@protoc_insertion_point(message_implements:com.dtstack.chunjun.format.protobuf.test.Variant) + VariantOrBuilder { + // Use Variant.newBuilder() to construct. + private Variant(com.google.protobuf.GeneratedMessageV3.Builder builder) { + super(builder); + } + + private Variant() { + boolx_ = false; + booly_ = false; + } + + @Override + public final com.google.protobuf.UnknownFieldSet getUnknownFields() { + return com.google.protobuf.UnknownFieldSet.getDefaultInstance(); + } + + private Variant( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + this(); + int mutable_bitField0_ = 0; + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: + { + if (!input.skipField(tag)) { + done = true; + } + break; + } + case 8: + { + boolx_ = input.readBool(); + break; + } + case 16: + { + value2Case_ = 2; + value2_ = input.readBool(); + break; + } + case 24: + { + value2Case_ = 3; + value2_ = input.readInt32(); + break; + } + case 32: + { + booly_ = input.readBool(); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException(e) + .setUnfinishedMessage(this); + } finally { + makeExtensionsImmutable(); + } + } + + public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { + return MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_Variant_descriptor; + } + + protected FieldAccessorTable internalGetFieldAccessorTable() { + return MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_Variant_fieldAccessorTable + .ensureFieldAccessorsInitialized(Variant.class, Builder.class); + } + + private int value2Case_ = 0; + private Object value2_; + + public enum Value2Case implements com.google.protobuf.Internal.EnumLite { + VALUEBOOL2(2), + VALUEINT32(3), + VALUE2_NOT_SET(0); + private final int value; + + private Value2Case(int value) { + this.value = value; + } + /** @deprecated Use {@link #forNumber(int)} instead. */ + @Deprecated + public static Value2Case valueOf(int value) { + return forNumber(value); + } + + public static Value2Case forNumber(int value) { + switch (value) { + case 2: + return VALUEBOOL2; + case 3: + return VALUEINT32; + case 0: + return VALUE2_NOT_SET; + default: + return null; + } + } + + public int getNumber() { + return this.value; + } + }; + + public Value2Case getValue2Case() { + return Value2Case.forNumber(value2Case_); + } + + public static final int BOOLX_FIELD_NUMBER = 1; + private boolean boolx_; + /** bool boolx = 1; */ + public boolean getBoolx() { + return boolx_; + } + + public static final int VALUEBOOL2_FIELD_NUMBER = 2; + /** bool ValueBool2 = 2; */ + public boolean getValueBool2() { + if (value2Case_ == 2) { + return (Boolean) value2_; + } + return false; + } + + public static final int VALUEINT32_FIELD_NUMBER = 3; + /** int32 ValueInt32 = 3; */ + public int getValueInt32() { + if (value2Case_ == 3) { + return (Integer) value2_; + } + return 0; + } + + public static final int BOOLY_FIELD_NUMBER = 4; + private boolean booly_; + /** bool booly = 4; */ + public boolean getBooly() { + return booly_; + } + + private byte memoizedIsInitialized = -1; + + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized == 1) return true; + if (isInitialized == 0) return false; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + if (boolx_ != false) { + output.writeBool(1, boolx_); + } + if (value2Case_ == 2) { + output.writeBool(2, (boolean) ((Boolean) value2_)); + } + if (value2Case_ == 3) { + output.writeInt32(3, (int) ((Integer) value2_)); + } + if (booly_ != false) { + output.writeBool(4, booly_); + } + } + + public int getSerializedSize() { + int size = memoizedSize; + if (size != -1) return size; + + size = 0; + if (boolx_ != false) { + size += com.google.protobuf.CodedOutputStream.computeBoolSize(1, boolx_); + } + if (value2Case_ == 2) { + size += + com.google.protobuf.CodedOutputStream.computeBoolSize( + 2, (boolean) ((Boolean) value2_)); + } + if (value2Case_ == 3) { + size += + com.google.protobuf.CodedOutputStream.computeInt32Size( + 3, (int) ((Integer) value2_)); + } + if (booly_ != false) { + size += com.google.protobuf.CodedOutputStream.computeBoolSize(4, booly_); + } + memoizedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + + @Override + public boolean equals(final Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof Variant)) { + return super.equals(obj); + } + Variant other = (Variant) obj; + + boolean result = true; + result = result && (getBoolx() == other.getBoolx()); + result = result && (getBooly() == other.getBooly()); + result = result && getValue2Case().equals(other.getValue2Case()); + if (!result) return false; + switch (value2Case_) { + case 2: + result = result && (getValueBool2() == other.getValueBool2()); + break; + case 3: + result = result && (getValueInt32() == other.getValueInt32()); + break; + case 0: + default: + } + return result; + } + + @Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptor().hashCode(); + hash = (37 * hash) + BOOLX_FIELD_NUMBER; + hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(getBoolx()); + hash = (37 * hash) + BOOLY_FIELD_NUMBER; + hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(getBooly()); + switch (value2Case_) { + case 2: + hash = (37 * hash) + VALUEBOOL2_FIELD_NUMBER; + hash = (53 * hash) + com.google.protobuf.Internal.hashBoolean(getValueBool2()); + break; + case 3: + hash = (37 * hash) + VALUEINT32_FIELD_NUMBER; + hash = (53 * hash) + getValueInt32(); + break; + case 0: + default: + } + hash = (29 * hash) + unknownFields.hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static Variant parseFrom(java.nio.ByteBuffer data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + + public static Variant parseFrom( + java.nio.ByteBuffer data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + + public static Variant parseFrom(com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + + public static Variant parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + + public static Variant parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + + public static Variant parseFrom( + byte[] data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + + public static Variant parseFrom(java.io.InputStream input) throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseWithIOException(PARSER, input); + } + + public static Variant parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseWithIOException( + PARSER, input, extensionRegistry); + } + + public static Variant parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseDelimitedWithIOException( + PARSER, input); + } + + public static Variant parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseDelimitedWithIOException( + PARSER, input, extensionRegistry); + } + + public static Variant parseFrom(com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseWithIOException(PARSER, input); + } + + public static Variant parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseWithIOException( + PARSER, input, extensionRegistry); + } + + public Builder newBuilderForType() { + return newBuilder(); + } + + public static Builder newBuilder() { + return DEFAULT_INSTANCE.toBuilder(); + } + + public static Builder newBuilder(Variant prototype) { + return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype); + } + + public Builder toBuilder() { + return this == DEFAULT_INSTANCE ? new Builder() : new Builder().mergeFrom(this); + } + + @Override + protected Builder newBuilderForType(BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** Protobuf type {@code com.dtstack.chunjun.format.protobuf.test.Variant} */ + public static final class Builder + extends com.google.protobuf.GeneratedMessageV3.Builder + implements + // @@protoc_insertion_point(builder_implements:com.dtstack.chunjun.format.protobuf.test.Variant) + VariantOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { + return MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_Variant_descriptor; + } + + protected FieldAccessorTable internalGetFieldAccessorTable() { + return MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_Variant_fieldAccessorTable + .ensureFieldAccessorsInitialized(Variant.class, Builder.class); + } + + // Construct using + // com.dtstack.chunjun.format.protobuf.test.MessageTestOuterClass.Variant.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder(BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessageV3.alwaysUseFieldBuilders) {} + } + + public Builder clear() { + super.clear(); + boolx_ = false; + + booly_ = false; + + value2Case_ = 0; + value2_ = null; + return this; + } + + public com.google.protobuf.Descriptors.Descriptor getDescriptorForType() { + return MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_Variant_descriptor; + } + + public Variant getDefaultInstanceForType() { + return Variant.getDefaultInstance(); + } + + public Variant build() { + Variant result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public Variant buildPartial() { + Variant result = new Variant(this); + result.boolx_ = boolx_; + if (value2Case_ == 2) { + result.value2_ = value2_; + } + if (value2Case_ == 3) { + result.value2_ = value2_; + } + result.booly_ = booly_; + result.value2Case_ = value2Case_; + onBuilt(); + return result; + } + + public Builder clone() { + return (Builder) super.clone(); + } + + public Builder setField( + com.google.protobuf.Descriptors.FieldDescriptor field, Object value) { + return (Builder) super.setField(field, value); + } + + public Builder clearField(com.google.protobuf.Descriptors.FieldDescriptor field) { + return (Builder) super.clearField(field); + } + + public Builder clearOneof(com.google.protobuf.Descriptors.OneofDescriptor oneof) { + return (Builder) super.clearOneof(oneof); + } + + public Builder setRepeatedField( + com.google.protobuf.Descriptors.FieldDescriptor field, + int index, + Object value) { + return (Builder) super.setRepeatedField(field, index, value); + } + + public Builder addRepeatedField( + com.google.protobuf.Descriptors.FieldDescriptor field, Object value) { + return (Builder) super.addRepeatedField(field, value); + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof Variant) { + return mergeFrom((Variant) other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(Variant other) { + if (other == Variant.getDefaultInstance()) return this; + if (other.getBoolx() != false) { + setBoolx(other.getBoolx()); + } + if (other.getBooly() != false) { + setBooly(other.getBooly()); + } + switch (other.getValue2Case()) { + case VALUEBOOL2: + { + setValueBool2(other.getValueBool2()); + break; + } + case VALUEINT32: + { + setValueInt32(other.getValueInt32()); + break; + } + case VALUE2_NOT_SET: + { + break; + } + } + onChanged(); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + Variant parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (Variant) e.getUnfinishedMessage(); + throw e.unwrapIOException(); + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + + private int value2Case_ = 0; + private Object value2_; + + public Value2Case getValue2Case() { + return Value2Case.forNumber(value2Case_); + } + + public Builder clearValue2() { + value2Case_ = 0; + value2_ = null; + onChanged(); + return this; + } + + private boolean boolx_; + /** bool boolx = 1; */ + public boolean getBoolx() { + return boolx_; + } + /** bool boolx = 1; */ + public Builder setBoolx(boolean value) { + + boolx_ = value; + onChanged(); + return this; + } + /** bool boolx = 1; */ + public Builder clearBoolx() { + + boolx_ = false; + onChanged(); + return this; + } + + /** bool ValueBool2 = 2; */ + public boolean getValueBool2() { + if (value2Case_ == 2) { + return (Boolean) value2_; + } + return false; + } + /** bool ValueBool2 = 2; */ + public Builder setValueBool2(boolean value) { + value2Case_ = 2; + value2_ = value; + onChanged(); + return this; + } + /** bool ValueBool2 = 2; */ + public Builder clearValueBool2() { + if (value2Case_ == 2) { + value2Case_ = 0; + value2_ = null; + onChanged(); + } + return this; + } + + /** int32 ValueInt32 = 3; */ + public int getValueInt32() { + if (value2Case_ == 3) { + return (Integer) value2_; + } + return 0; + } + /** int32 ValueInt32 = 3; */ + public Builder setValueInt32(int value) { + value2Case_ = 3; + value2_ = value; + onChanged(); + return this; + } + /** int32 ValueInt32 = 3; */ + public Builder clearValueInt32() { + if (value2Case_ == 3) { + value2Case_ = 0; + value2_ = null; + onChanged(); + } + return this; + } + + private boolean booly_; + /** bool booly = 4; */ + public boolean getBooly() { + return booly_; + } + /** bool booly = 4; */ + public Builder setBooly(boolean value) { + + booly_ = value; + onChanged(); + return this; + } + /** bool booly = 4; */ + public Builder clearBooly() { + + booly_ = false; + onChanged(); + return this; + } + + public final Builder setUnknownFields( + final com.google.protobuf.UnknownFieldSet unknownFields) { + return this; + } + + public final Builder mergeUnknownFields( + final com.google.protobuf.UnknownFieldSet unknownFields) { + return this; + } + + // @@protoc_insertion_point(builder_scope:com.dtstack.chunjun.format.protobuf.test.Variant) + } + + // @@protoc_insertion_point(class_scope:com.dtstack.chunjun.format.protobuf.test.Variant) + private static final Variant DEFAULT_INSTANCE; + + static { + DEFAULT_INSTANCE = new Variant(); + } + + public static Variant getDefaultInstance() { + return DEFAULT_INSTANCE; + } + + private static final com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public Variant parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new Variant(input, extensionRegistry); + } + }; + + public static com.google.protobuf.Parser parser() { + return PARSER; + } + + @Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + public Variant getDefaultInstanceForType() { + return DEFAULT_INSTANCE; + } + } + + public interface MessageItemOrBuilder + extends + // @@protoc_insertion_point(interface_extends:com.dtstack.chunjun.format.protobuf.test.MessageItem) + com.google.protobuf.MessageOrBuilder { + + /** + * + * + *
+         * 默认optional
+         * 
+ * + * string TagName = 1; + */ + String getTagName(); + /** + * + * + *
+         * 默认optional
+         * 
+ * + * string TagName = 1; + */ + com.google.protobuf.ByteString getTagNameBytes(); + + /** .com.dtstack.chunjun.format.protobuf.test.Variant TagValue = 2; */ + boolean hasTagValue(); + /** .com.dtstack.chunjun.format.protobuf.test.Variant TagValue = 2; */ + Variant getTagValue(); + /** .com.dtstack.chunjun.format.protobuf.test.Variant TagValue = 2; */ + VariantOrBuilder getTagValueOrBuilder(); + + /** map<string, string> ExValues = 3; */ + int getExValuesCount(); + /** map<string, string> ExValues = 3; */ + boolean containsExValues(String key); + /** Use {@link #getExValuesMap()} instead. */ + @Deprecated + java.util.Map getExValues(); + /** map<string, string> ExValues = 3; */ + java.util.Map getExValuesMap(); + /** map<string, string> ExValues = 3; */ + String getExValuesOrDefault(String key, String defaultValue); + /** map<string, string> ExValues = 3; */ + String getExValuesOrThrow(String key); + } + /** Protobuf type {@code com.dtstack.chunjun.format.protobuf.test.MessageItem} */ + public static final class MessageItem extends com.google.protobuf.GeneratedMessageV3 + implements + // @@protoc_insertion_point(message_implements:com.dtstack.chunjun.format.protobuf.test.MessageItem) + MessageItemOrBuilder { + // Use MessageItem.newBuilder() to construct. + private MessageItem(com.google.protobuf.GeneratedMessageV3.Builder builder) { + super(builder); + } + + private MessageItem() { + tagName_ = ""; + } + + @Override + public final com.google.protobuf.UnknownFieldSet getUnknownFields() { + return com.google.protobuf.UnknownFieldSet.getDefaultInstance(); + } + + private MessageItem( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + this(); + int mutable_bitField0_ = 0; + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: + { + if (!input.skipField(tag)) { + done = true; + } + break; + } + case 10: + { + String s = input.readStringRequireUtf8(); + + tagName_ = s; + break; + } + case 18: + { + Variant.Builder subBuilder = null; + if (tagValue_ != null) { + subBuilder = tagValue_.toBuilder(); + } + tagValue_ = input.readMessage(Variant.parser(), extensionRegistry); + if (subBuilder != null) { + subBuilder.mergeFrom(tagValue_); + tagValue_ = subBuilder.buildPartial(); + } + + break; + } + case 26: + { + if (!((mutable_bitField0_ & 0x00000004) == 0x00000004)) { + exValues_ = + com.google.protobuf.MapField.newMapField( + ExValuesDefaultEntryHolder.defaultEntry); + mutable_bitField0_ |= 0x00000004; + } + com.google.protobuf.MapEntry exValues__ = + input.readMessage( + ExValuesDefaultEntryHolder.defaultEntry + .getParserForType(), + extensionRegistry); + exValues_ + .getMutableMap() + .put(exValues__.getKey(), exValues__.getValue()); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException(e) + .setUnfinishedMessage(this); + } finally { + makeExtensionsImmutable(); + } + } + + public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { + return MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_descriptor; + } + + @SuppressWarnings({"rawtypes"}) + protected com.google.protobuf.MapField internalGetMapField(int number) { + switch (number) { + case 3: + return internalGetExValues(); + default: + throw new RuntimeException("Invalid map field number: " + number); + } + } + + protected FieldAccessorTable internalGetFieldAccessorTable() { + return MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_fieldAccessorTable + .ensureFieldAccessorsInitialized(MessageItem.class, Builder.class); + } + + private int bitField0_; + public static final int TAGNAME_FIELD_NUMBER = 1; + private volatile Object tagName_; + /** + * + * + *
+         * 默认optional
+         * 
+ * + * string TagName = 1; + */ + public String getTagName() { + Object ref = tagName_; + if (ref instanceof String) { + return (String) ref; + } else { + com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; + String s = bs.toStringUtf8(); + tagName_ = s; + return s; + } + } + /** + * + * + *
+         * 默认optional
+         * 
+ * + * string TagName = 1; + */ + public com.google.protobuf.ByteString getTagNameBytes() { + Object ref = tagName_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((String) ref); + tagName_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + public static final int TAGVALUE_FIELD_NUMBER = 2; + private Variant tagValue_; + /** .com.dtstack.chunjun.format.protobuf.test.Variant TagValue = 2; */ + public boolean hasTagValue() { + return tagValue_ != null; + } + /** .com.dtstack.chunjun.format.protobuf.test.Variant TagValue = 2; */ + public Variant getTagValue() { + return tagValue_ == null ? Variant.getDefaultInstance() : tagValue_; + } + /** .com.dtstack.chunjun.format.protobuf.test.Variant TagValue = 2; */ + public VariantOrBuilder getTagValueOrBuilder() { + return getTagValue(); + } + + public static final int EXVALUES_FIELD_NUMBER = 3; + + private static final class ExValuesDefaultEntryHolder { + static final com.google.protobuf.MapEntry defaultEntry = + com.google.protobuf.MapEntry.newDefaultInstance( + MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_ExValuesEntry_descriptor, + com.google.protobuf.WireFormat.FieldType.STRING, + "", + com.google.protobuf.WireFormat.FieldType.STRING, + ""); + } + + private com.google.protobuf.MapField exValues_; + + private com.google.protobuf.MapField internalGetExValues() { + if (exValues_ == null) { + return com.google.protobuf.MapField.emptyMapField( + ExValuesDefaultEntryHolder.defaultEntry); + } + return exValues_; + } + + public int getExValuesCount() { + return internalGetExValues().getMap().size(); + } + /** map<string, string> ExValues = 3; */ + public boolean containsExValues(String key) { + if (key == null) { + throw new NullPointerException(); + } + return internalGetExValues().getMap().containsKey(key); + } + /** Use {@link #getExValuesMap()} instead. */ + @Deprecated + public java.util.Map getExValues() { + return getExValuesMap(); + } + /** map<string, string> ExValues = 3; */ + public java.util.Map getExValuesMap() { + return internalGetExValues().getMap(); + } + /** map<string, string> ExValues = 3; */ + public String getExValuesOrDefault(String key, String defaultValue) { + if (key == null) { + throw new NullPointerException(); + } + java.util.Map map = internalGetExValues().getMap(); + return map.containsKey(key) ? map.get(key) : defaultValue; + } + /** map<string, string> ExValues = 3; */ + public String getExValuesOrThrow(String key) { + if (key == null) { + throw new NullPointerException(); + } + java.util.Map map = internalGetExValues().getMap(); + if (!map.containsKey(key)) { + throw new IllegalArgumentException(); + } + return map.get(key); + } + + private byte memoizedIsInitialized = -1; + + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized == 1) return true; + if (isInitialized == 0) return false; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + if (!getTagNameBytes().isEmpty()) { + com.google.protobuf.GeneratedMessageV3.writeString(output, 1, tagName_); + } + if (tagValue_ != null) { + output.writeMessage(2, getTagValue()); + } + com.google.protobuf.GeneratedMessageV3.serializeStringMapTo( + output, internalGetExValues(), ExValuesDefaultEntryHolder.defaultEntry, 3); + } + + public int getSerializedSize() { + int size = memoizedSize; + if (size != -1) return size; + + size = 0; + if (!getTagNameBytes().isEmpty()) { + size += com.google.protobuf.GeneratedMessageV3.computeStringSize(1, tagName_); + } + if (tagValue_ != null) { + size += com.google.protobuf.CodedOutputStream.computeMessageSize(2, getTagValue()); + } + for (java.util.Map.Entry entry : + internalGetExValues().getMap().entrySet()) { + com.google.protobuf.MapEntry exValues__ = + ExValuesDefaultEntryHolder.defaultEntry + .newBuilderForType() + .setKey(entry.getKey()) + .setValue(entry.getValue()) + .build(); + size += com.google.protobuf.CodedOutputStream.computeMessageSize(3, exValues__); + } + memoizedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + + @Override + public boolean equals(final Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof MessageItem)) { + return super.equals(obj); + } + MessageItem other = (MessageItem) obj; + + boolean result = true; + result = result && getTagName().equals(other.getTagName()); + result = result && (hasTagValue() == other.hasTagValue()); + if (hasTagValue()) { + result = result && getTagValue().equals(other.getTagValue()); + } + result = result && internalGetExValues().equals(other.internalGetExValues()); + return result; + } + + @Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptor().hashCode(); + hash = (37 * hash) + TAGNAME_FIELD_NUMBER; + hash = (53 * hash) + getTagName().hashCode(); + if (hasTagValue()) { + hash = (37 * hash) + TAGVALUE_FIELD_NUMBER; + hash = (53 * hash) + getTagValue().hashCode(); + } + if (!internalGetExValues().getMap().isEmpty()) { + hash = (37 * hash) + EXVALUES_FIELD_NUMBER; + hash = (53 * hash) + internalGetExValues().hashCode(); + } + hash = (29 * hash) + unknownFields.hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static MessageItem parseFrom(java.nio.ByteBuffer data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + + public static MessageItem parseFrom( + java.nio.ByteBuffer data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + + public static MessageItem parseFrom(com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + + public static MessageItem parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + + public static MessageItem parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + + public static MessageItem parseFrom( + byte[] data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + + public static MessageItem parseFrom(java.io.InputStream input) throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseWithIOException(PARSER, input); + } + + public static MessageItem parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseWithIOException( + PARSER, input, extensionRegistry); + } + + public static MessageItem parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseDelimitedWithIOException( + PARSER, input); + } + + public static MessageItem parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseDelimitedWithIOException( + PARSER, input, extensionRegistry); + } + + public static MessageItem parseFrom(com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseWithIOException(PARSER, input); + } + + public static MessageItem parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseWithIOException( + PARSER, input, extensionRegistry); + } + + public Builder newBuilderForType() { + return newBuilder(); + } + + public static Builder newBuilder() { + return DEFAULT_INSTANCE.toBuilder(); + } + + public static Builder newBuilder(MessageItem prototype) { + return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype); + } + + public Builder toBuilder() { + return this == DEFAULT_INSTANCE ? new Builder() : new Builder().mergeFrom(this); + } + + @Override + protected Builder newBuilderForType(BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** Protobuf type {@code com.dtstack.chunjun.format.protobuf.test.MessageItem} */ + public static final class Builder + extends com.google.protobuf.GeneratedMessageV3.Builder + implements + // @@protoc_insertion_point(builder_implements:com.dtstack.chunjun.format.protobuf.test.MessageItem) + MessageItemOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { + return MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_descriptor; + } + + @SuppressWarnings({"rawtypes"}) + protected com.google.protobuf.MapField internalGetMapField(int number) { + switch (number) { + case 3: + return internalGetExValues(); + default: + throw new RuntimeException("Invalid map field number: " + number); + } + } + + @SuppressWarnings({"rawtypes"}) + protected com.google.protobuf.MapField internalGetMutableMapField(int number) { + switch (number) { + case 3: + return internalGetMutableExValues(); + default: + throw new RuntimeException("Invalid map field number: " + number); + } + } + + protected FieldAccessorTable internalGetFieldAccessorTable() { + return MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_fieldAccessorTable + .ensureFieldAccessorsInitialized(MessageItem.class, Builder.class); + } + + // Construct using + // com.dtstack.chunjun.format.protobuf.test.MessageTestOuterClass.MessageItem.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder(BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessageV3.alwaysUseFieldBuilders) {} + } + + public Builder clear() { + super.clear(); + tagName_ = ""; + + if (tagValueBuilder_ == null) { + tagValue_ = null; + } else { + tagValue_ = null; + tagValueBuilder_ = null; + } + internalGetMutableExValues().clear(); + return this; + } + + public com.google.protobuf.Descriptors.Descriptor getDescriptorForType() { + return MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_descriptor; + } + + public MessageItem getDefaultInstanceForType() { + return MessageItem.getDefaultInstance(); + } + + public MessageItem build() { + MessageItem result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public MessageItem buildPartial() { + MessageItem result = new MessageItem(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + result.tagName_ = tagName_; + if (tagValueBuilder_ == null) { + result.tagValue_ = tagValue_; + } else { + result.tagValue_ = tagValueBuilder_.build(); + } + result.exValues_ = internalGetExValues(); + result.exValues_.makeImmutable(); + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder clone() { + return (Builder) super.clone(); + } + + public Builder setField( + com.google.protobuf.Descriptors.FieldDescriptor field, Object value) { + return (Builder) super.setField(field, value); + } + + public Builder clearField(com.google.protobuf.Descriptors.FieldDescriptor field) { + return (Builder) super.clearField(field); + } + + public Builder clearOneof(com.google.protobuf.Descriptors.OneofDescriptor oneof) { + return (Builder) super.clearOneof(oneof); + } + + public Builder setRepeatedField( + com.google.protobuf.Descriptors.FieldDescriptor field, + int index, + Object value) { + return (Builder) super.setRepeatedField(field, index, value); + } + + public Builder addRepeatedField( + com.google.protobuf.Descriptors.FieldDescriptor field, Object value) { + return (Builder) super.addRepeatedField(field, value); + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof MessageItem) { + return mergeFrom((MessageItem) other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(MessageItem other) { + if (other == MessageItem.getDefaultInstance()) return this; + if (!other.getTagName().isEmpty()) { + tagName_ = other.tagName_; + onChanged(); + } + if (other.hasTagValue()) { + mergeTagValue(other.getTagValue()); + } + internalGetMutableExValues().mergeFrom(other.internalGetExValues()); + onChanged(); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + MessageItem parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (MessageItem) e.getUnfinishedMessage(); + throw e.unwrapIOException(); + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + + private int bitField0_; + + private Object tagName_ = ""; + /** + * + * + *
+             * 默认optional
+             * 
+ * + * string TagName = 1; + */ + public String getTagName() { + Object ref = tagName_; + if (!(ref instanceof String)) { + com.google.protobuf.ByteString bs = (com.google.protobuf.ByteString) ref; + String s = bs.toStringUtf8(); + tagName_ = s; + return s; + } else { + return (String) ref; + } + } + /** + * + * + *
+             * 默认optional
+             * 
+ * + * string TagName = 1; + */ + public com.google.protobuf.ByteString getTagNameBytes() { + Object ref = tagName_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((String) ref); + tagName_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + /** + * + * + *
+             * 默认optional
+             * 
+ * + * string TagName = 1; + */ + public Builder setTagName(String value) { + if (value == null) { + throw new NullPointerException(); + } + + tagName_ = value; + onChanged(); + return this; + } + /** + * + * + *
+             * 默认optional
+             * 
+ * + * string TagName = 1; + */ + public Builder clearTagName() { + + tagName_ = getDefaultInstance().getTagName(); + onChanged(); + return this; + } + /** + * + * + *
+             * 默认optional
+             * 
+ * + * string TagName = 1; + */ + public Builder setTagNameBytes(com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + checkByteStringIsUtf8(value); + + tagName_ = value; + onChanged(); + return this; + } + + private Variant tagValue_ = null; + private com.google.protobuf.SingleFieldBuilderV3< + Variant, Variant.Builder, VariantOrBuilder> + tagValueBuilder_; + /** .com.dtstack.chunjun.format.protobuf.test.Variant TagValue = 2; */ + public boolean hasTagValue() { + return tagValueBuilder_ != null || tagValue_ != null; + } + /** .com.dtstack.chunjun.format.protobuf.test.Variant TagValue = 2; */ + public Variant getTagValue() { + if (tagValueBuilder_ == null) { + return tagValue_ == null ? Variant.getDefaultInstance() : tagValue_; + } else { + return tagValueBuilder_.getMessage(); + } + } + /** .com.dtstack.chunjun.format.protobuf.test.Variant TagValue = 2; */ + public Builder setTagValue(Variant value) { + if (tagValueBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + tagValue_ = value; + onChanged(); + } else { + tagValueBuilder_.setMessage(value); + } + + return this; + } + /** .com.dtstack.chunjun.format.protobuf.test.Variant TagValue = 2; */ + public Builder setTagValue(Variant.Builder builderForValue) { + if (tagValueBuilder_ == null) { + tagValue_ = builderForValue.build(); + onChanged(); + } else { + tagValueBuilder_.setMessage(builderForValue.build()); + } + + return this; + } + /** .com.dtstack.chunjun.format.protobuf.test.Variant TagValue = 2; */ + public Builder mergeTagValue(Variant value) { + if (tagValueBuilder_ == null) { + if (tagValue_ != null) { + tagValue_ = Variant.newBuilder(tagValue_).mergeFrom(value).buildPartial(); + } else { + tagValue_ = value; + } + onChanged(); + } else { + tagValueBuilder_.mergeFrom(value); + } + + return this; + } + /** .com.dtstack.chunjun.format.protobuf.test.Variant TagValue = 2; */ + public Builder clearTagValue() { + if (tagValueBuilder_ == null) { + tagValue_ = null; + onChanged(); + } else { + tagValue_ = null; + tagValueBuilder_ = null; + } + + return this; + } + /** .com.dtstack.chunjun.format.protobuf.test.Variant TagValue = 2; */ + public Variant.Builder getTagValueBuilder() { + + onChanged(); + return getTagValueFieldBuilder().getBuilder(); + } + /** .com.dtstack.chunjun.format.protobuf.test.Variant TagValue = 2; */ + public VariantOrBuilder getTagValueOrBuilder() { + if (tagValueBuilder_ != null) { + return tagValueBuilder_.getMessageOrBuilder(); + } else { + return tagValue_ == null ? Variant.getDefaultInstance() : tagValue_; + } + } + /** .com.dtstack.chunjun.format.protobuf.test.Variant TagValue = 2; */ + private com.google.protobuf.SingleFieldBuilderV3< + Variant, Variant.Builder, VariantOrBuilder> + getTagValueFieldBuilder() { + if (tagValueBuilder_ == null) { + tagValueBuilder_ = + new com.google.protobuf.SingleFieldBuilderV3< + Variant, Variant.Builder, VariantOrBuilder>( + getTagValue(), getParentForChildren(), isClean()); + tagValue_ = null; + } + return tagValueBuilder_; + } + + private com.google.protobuf.MapField exValues_; + + private com.google.protobuf.MapField internalGetExValues() { + if (exValues_ == null) { + return com.google.protobuf.MapField.emptyMapField( + ExValuesDefaultEntryHolder.defaultEntry); + } + return exValues_; + } + + private com.google.protobuf.MapField internalGetMutableExValues() { + onChanged(); + ; + if (exValues_ == null) { + exValues_ = + com.google.protobuf.MapField.newMapField( + ExValuesDefaultEntryHolder.defaultEntry); + } + if (!exValues_.isMutable()) { + exValues_ = exValues_.copy(); + } + return exValues_; + } + + public int getExValuesCount() { + return internalGetExValues().getMap().size(); + } + /** map<string, string> ExValues = 3; */ + public boolean containsExValues(String key) { + if (key == null) { + throw new NullPointerException(); + } + return internalGetExValues().getMap().containsKey(key); + } + /** Use {@link #getExValuesMap()} instead. */ + @Deprecated + public java.util.Map getExValues() { + return getExValuesMap(); + } + /** map<string, string> ExValues = 3; */ + public java.util.Map getExValuesMap() { + return internalGetExValues().getMap(); + } + /** map<string, string> ExValues = 3; */ + public String getExValuesOrDefault(String key, String defaultValue) { + if (key == null) { + throw new NullPointerException(); + } + java.util.Map map = internalGetExValues().getMap(); + return map.containsKey(key) ? map.get(key) : defaultValue; + } + /** map<string, string> ExValues = 3; */ + public String getExValuesOrThrow(String key) { + if (key == null) { + throw new NullPointerException(); + } + java.util.Map map = internalGetExValues().getMap(); + if (!map.containsKey(key)) { + throw new IllegalArgumentException(); + } + return map.get(key); + } + + public Builder clearExValues() { + internalGetMutableExValues().getMutableMap().clear(); + return this; + } + /** map<string, string> ExValues = 3; */ + public Builder removeExValues(String key) { + if (key == null) { + throw new NullPointerException(); + } + internalGetMutableExValues().getMutableMap().remove(key); + return this; + } + /** Use alternate mutation accessors instead. */ + @Deprecated + public java.util.Map getMutableExValues() { + return internalGetMutableExValues().getMutableMap(); + } + /** map<string, string> ExValues = 3; */ + public Builder putExValues(String key, String value) { + if (key == null) { + throw new NullPointerException(); + } + if (value == null) { + throw new NullPointerException(); + } + internalGetMutableExValues().getMutableMap().put(key, value); + return this; + } + /** map<string, string> ExValues = 3; */ + public Builder putAllExValues(java.util.Map values) { + internalGetMutableExValues().getMutableMap().putAll(values); + return this; + } + + public final Builder setUnknownFields( + final com.google.protobuf.UnknownFieldSet unknownFields) { + return this; + } + + public final Builder mergeUnknownFields( + final com.google.protobuf.UnknownFieldSet unknownFields) { + return this; + } + + // @@protoc_insertion_point(builder_scope:com.dtstack.chunjun.format.protobuf.test.MessageItem) + } + + // @@protoc_insertion_point(class_scope:com.dtstack.chunjun.format.protobuf.test.MessageItem) + private static final MessageItem DEFAULT_INSTANCE; + + static { + DEFAULT_INSTANCE = new MessageItem(); + } + + public static MessageItem getDefaultInstance() { + return DEFAULT_INSTANCE; + } + + private static final com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public MessageItem parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new MessageItem(input, extensionRegistry); + } + }; + + public static com.google.protobuf.Parser parser() { + return PARSER; + } + + @Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + public MessageItem getDefaultInstanceForType() { + return DEFAULT_INSTANCE; + } + } + + public interface MessageTestOrBuilder + extends + // @@protoc_insertion_point(interface_extends:com.dtstack.chunjun.format.protobuf.test.MessageTest) + com.google.protobuf.MessageOrBuilder { + + /** map<string, string> GroupInfo = 1; */ + int getGroupInfoCount(); + /** map<string, string> GroupInfo = 1; */ + boolean containsGroupInfo(String key); + /** Use {@link #getGroupInfoMap()} instead. */ + @Deprecated + java.util.Map getGroupInfo(); + /** map<string, string> GroupInfo = 1; */ + java.util.Map getGroupInfoMap(); + /** map<string, string> GroupInfo = 1; */ + String getGroupInfoOrDefault(String key, String defaultValue); + /** map<string, string> GroupInfo = 1; */ + String getGroupInfoOrThrow(String key); + + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + */ + java.util.List getMessagesList(); + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + */ + MessageItem getMessages(int index); + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + */ + int getMessagesCount(); + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + */ + java.util.List getMessagesOrBuilderList(); + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + */ + MessageItemOrBuilder getMessagesOrBuilder(int index); + } + /** Protobuf type {@code com.dtstack.chunjun.format.protobuf.test.MessageTest} */ + public static final class MessageTest extends com.google.protobuf.GeneratedMessageV3 + implements + // @@protoc_insertion_point(message_implements:com.dtstack.chunjun.format.protobuf.test.MessageTest) + MessageTestOrBuilder { + // Use MessageTest.newBuilder() to construct. + private MessageTest(com.google.protobuf.GeneratedMessageV3.Builder builder) { + super(builder); + } + + private MessageTest() { + messages_ = java.util.Collections.emptyList(); + } + + @Override + public final com.google.protobuf.UnknownFieldSet getUnknownFields() { + return com.google.protobuf.UnknownFieldSet.getDefaultInstance(); + } + + private MessageTest( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + this(); + int mutable_bitField0_ = 0; + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: + { + if (!input.skipField(tag)) { + done = true; + } + break; + } + case 10: + { + if (!((mutable_bitField0_ & 0x00000001) == 0x00000001)) { + groupInfo_ = + com.google.protobuf.MapField.newMapField( + GroupInfoDefaultEntryHolder.defaultEntry); + mutable_bitField0_ |= 0x00000001; + } + com.google.protobuf.MapEntry groupInfo__ = + input.readMessage( + GroupInfoDefaultEntryHolder.defaultEntry + .getParserForType(), + extensionRegistry); + groupInfo_ + .getMutableMap() + .put(groupInfo__.getKey(), groupInfo__.getValue()); + break; + } + case 18: + { + if (!((mutable_bitField0_ & 0x00000002) == 0x00000002)) { + messages_ = new java.util.ArrayList(); + mutable_bitField0_ |= 0x00000002; + } + messages_.add( + input.readMessage(MessageItem.parser(), extensionRegistry)); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException(e) + .setUnfinishedMessage(this); + } finally { + if (((mutable_bitField0_ & 0x00000002) == 0x00000002)) { + messages_ = java.util.Collections.unmodifiableList(messages_); + } + makeExtensionsImmutable(); + } + } + + public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { + return MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_descriptor; + } + + @SuppressWarnings({"rawtypes"}) + protected com.google.protobuf.MapField internalGetMapField(int number) { + switch (number) { + case 1: + return internalGetGroupInfo(); + default: + throw new RuntimeException("Invalid map field number: " + number); + } + } + + protected FieldAccessorTable internalGetFieldAccessorTable() { + return MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_fieldAccessorTable + .ensureFieldAccessorsInitialized(MessageTest.class, Builder.class); + } + + public static final int GROUPINFO_FIELD_NUMBER = 1; + + private static final class GroupInfoDefaultEntryHolder { + static final com.google.protobuf.MapEntry defaultEntry = + com.google.protobuf.MapEntry.newDefaultInstance( + MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_GroupInfoEntry_descriptor, + com.google.protobuf.WireFormat.FieldType.STRING, + "", + com.google.protobuf.WireFormat.FieldType.STRING, + ""); + } + + private com.google.protobuf.MapField groupInfo_; + + private com.google.protobuf.MapField internalGetGroupInfo() { + if (groupInfo_ == null) { + return com.google.protobuf.MapField.emptyMapField( + GroupInfoDefaultEntryHolder.defaultEntry); + } + return groupInfo_; + } + + public int getGroupInfoCount() { + return internalGetGroupInfo().getMap().size(); + } + /** map<string, string> GroupInfo = 1; */ + public boolean containsGroupInfo(String key) { + if (key == null) { + throw new NullPointerException(); + } + return internalGetGroupInfo().getMap().containsKey(key); + } + /** Use {@link #getGroupInfoMap()} instead. */ + @Deprecated + public java.util.Map getGroupInfo() { + return getGroupInfoMap(); + } + /** map<string, string> GroupInfo = 1; */ + public java.util.Map getGroupInfoMap() { + return internalGetGroupInfo().getMap(); + } + /** map<string, string> GroupInfo = 1; */ + public String getGroupInfoOrDefault(String key, String defaultValue) { + if (key == null) { + throw new NullPointerException(); + } + java.util.Map map = internalGetGroupInfo().getMap(); + return map.containsKey(key) ? map.get(key) : defaultValue; + } + /** map<string, string> GroupInfo = 1; */ + public String getGroupInfoOrThrow(String key) { + if (key == null) { + throw new NullPointerException(); + } + java.util.Map map = internalGetGroupInfo().getMap(); + if (!map.containsKey(key)) { + throw new IllegalArgumentException(); + } + return map.get(key); + } + + public static final int MESSAGES_FIELD_NUMBER = 2; + private java.util.List messages_; + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + */ + public java.util.List getMessagesList() { + return messages_; + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + */ + public java.util.List getMessagesOrBuilderList() { + return messages_; + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + */ + public int getMessagesCount() { + return messages_.size(); + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + */ + public MessageItem getMessages(int index) { + return messages_.get(index); + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + */ + public MessageItemOrBuilder getMessagesOrBuilder(int index) { + return messages_.get(index); + } + + private byte memoizedIsInitialized = -1; + + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized == 1) return true; + if (isInitialized == 0) return false; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + com.google.protobuf.GeneratedMessageV3.serializeStringMapTo( + output, internalGetGroupInfo(), GroupInfoDefaultEntryHolder.defaultEntry, 1); + for (int i = 0; i < messages_.size(); i++) { + output.writeMessage(2, messages_.get(i)); + } + } + + public int getSerializedSize() { + int size = memoizedSize; + if (size != -1) return size; + + size = 0; + for (java.util.Map.Entry entry : + internalGetGroupInfo().getMap().entrySet()) { + com.google.protobuf.MapEntry groupInfo__ = + GroupInfoDefaultEntryHolder.defaultEntry + .newBuilderForType() + .setKey(entry.getKey()) + .setValue(entry.getValue()) + .build(); + size += com.google.protobuf.CodedOutputStream.computeMessageSize(1, groupInfo__); + } + for (int i = 0; i < messages_.size(); i++) { + size += + com.google.protobuf.CodedOutputStream.computeMessageSize( + 2, messages_.get(i)); + } + memoizedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + + @Override + public boolean equals(final Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof MessageTest)) { + return super.equals(obj); + } + MessageTest other = (MessageTest) obj; + + boolean result = true; + result = result && internalGetGroupInfo().equals(other.internalGetGroupInfo()); + result = result && getMessagesList().equals(other.getMessagesList()); + return result; + } + + @Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptor().hashCode(); + if (!internalGetGroupInfo().getMap().isEmpty()) { + hash = (37 * hash) + GROUPINFO_FIELD_NUMBER; + hash = (53 * hash) + internalGetGroupInfo().hashCode(); + } + if (getMessagesCount() > 0) { + hash = (37 * hash) + MESSAGES_FIELD_NUMBER; + hash = (53 * hash) + getMessagesList().hashCode(); + } + hash = (29 * hash) + unknownFields.hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static MessageTest parseFrom(java.nio.ByteBuffer data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + + public static MessageTest parseFrom( + java.nio.ByteBuffer data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + + public static MessageTest parseFrom(com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + + public static MessageTest parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + + public static MessageTest parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + + public static MessageTest parseFrom( + byte[] data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + + public static MessageTest parseFrom(java.io.InputStream input) throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseWithIOException(PARSER, input); + } + + public static MessageTest parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseWithIOException( + PARSER, input, extensionRegistry); + } + + public static MessageTest parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseDelimitedWithIOException( + PARSER, input); + } + + public static MessageTest parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseDelimitedWithIOException( + PARSER, input, extensionRegistry); + } + + public static MessageTest parseFrom(com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseWithIOException(PARSER, input); + } + + public static MessageTest parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return com.google.protobuf.GeneratedMessageV3.parseWithIOException( + PARSER, input, extensionRegistry); + } + + public Builder newBuilderForType() { + return newBuilder(); + } + + public static Builder newBuilder() { + return DEFAULT_INSTANCE.toBuilder(); + } + + public static Builder newBuilder(MessageTest prototype) { + return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype); + } + + public Builder toBuilder() { + return this == DEFAULT_INSTANCE ? new Builder() : new Builder().mergeFrom(this); + } + + @Override + protected Builder newBuilderForType(BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** Protobuf type {@code com.dtstack.chunjun.format.protobuf.test.MessageTest} */ + public static final class Builder + extends com.google.protobuf.GeneratedMessageV3.Builder + implements + // @@protoc_insertion_point(builder_implements:com.dtstack.chunjun.format.protobuf.test.MessageTest) + MessageTestOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { + return MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_descriptor; + } + + @SuppressWarnings({"rawtypes"}) + protected com.google.protobuf.MapField internalGetMapField(int number) { + switch (number) { + case 1: + return internalGetGroupInfo(); + default: + throw new RuntimeException("Invalid map field number: " + number); + } + } + + @SuppressWarnings({"rawtypes"}) + protected com.google.protobuf.MapField internalGetMutableMapField(int number) { + switch (number) { + case 1: + return internalGetMutableGroupInfo(); + default: + throw new RuntimeException("Invalid map field number: " + number); + } + } + + protected FieldAccessorTable internalGetFieldAccessorTable() { + return MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_fieldAccessorTable + .ensureFieldAccessorsInitialized(MessageTest.class, Builder.class); + } + + // Construct using + // com.dtstack.chunjun.format.protobuf.test.MessageTestOuterClass.MessageTest.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder(BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessageV3.alwaysUseFieldBuilders) { + getMessagesFieldBuilder(); + } + } + + public Builder clear() { + super.clear(); + internalGetMutableGroupInfo().clear(); + if (messagesBuilder_ == null) { + messages_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000002); + } else { + messagesBuilder_.clear(); + } + return this; + } + + public com.google.protobuf.Descriptors.Descriptor getDescriptorForType() { + return MessageTestOuterClass + .internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_descriptor; + } + + public MessageTest getDefaultInstanceForType() { + return MessageTest.getDefaultInstance(); + } + + public MessageTest build() { + MessageTest result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public MessageTest buildPartial() { + MessageTest result = new MessageTest(this); + int from_bitField0_ = bitField0_; + result.groupInfo_ = internalGetGroupInfo(); + result.groupInfo_.makeImmutable(); + if (messagesBuilder_ == null) { + if (((bitField0_ & 0x00000002) == 0x00000002)) { + messages_ = java.util.Collections.unmodifiableList(messages_); + bitField0_ = (bitField0_ & ~0x00000002); + } + result.messages_ = messages_; + } else { + result.messages_ = messagesBuilder_.build(); + } + onBuilt(); + return result; + } + + public Builder clone() { + return (Builder) super.clone(); + } + + public Builder setField( + com.google.protobuf.Descriptors.FieldDescriptor field, Object value) { + return (Builder) super.setField(field, value); + } + + public Builder clearField(com.google.protobuf.Descriptors.FieldDescriptor field) { + return (Builder) super.clearField(field); + } + + public Builder clearOneof(com.google.protobuf.Descriptors.OneofDescriptor oneof) { + return (Builder) super.clearOneof(oneof); + } + + public Builder setRepeatedField( + com.google.protobuf.Descriptors.FieldDescriptor field, + int index, + Object value) { + return (Builder) super.setRepeatedField(field, index, value); + } + + public Builder addRepeatedField( + com.google.protobuf.Descriptors.FieldDescriptor field, Object value) { + return (Builder) super.addRepeatedField(field, value); + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof MessageTest) { + return mergeFrom((MessageTest) other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(MessageTest other) { + if (other == MessageTest.getDefaultInstance()) return this; + internalGetMutableGroupInfo().mergeFrom(other.internalGetGroupInfo()); + if (messagesBuilder_ == null) { + if (!other.messages_.isEmpty()) { + if (messages_.isEmpty()) { + messages_ = other.messages_; + bitField0_ = (bitField0_ & ~0x00000002); + } else { + ensureMessagesIsMutable(); + messages_.addAll(other.messages_); + } + onChanged(); + } + } else { + if (!other.messages_.isEmpty()) { + if (messagesBuilder_.isEmpty()) { + messagesBuilder_.dispose(); + messagesBuilder_ = null; + messages_ = other.messages_; + bitField0_ = (bitField0_ & ~0x00000002); + messagesBuilder_ = + com.google.protobuf.GeneratedMessageV3.alwaysUseFieldBuilders + ? getMessagesFieldBuilder() + : null; + } else { + messagesBuilder_.addAllMessages(other.messages_); + } + } + } + onChanged(); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + MessageTest parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (MessageTest) e.getUnfinishedMessage(); + throw e.unwrapIOException(); + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + + private int bitField0_; + + private com.google.protobuf.MapField groupInfo_; + + private com.google.protobuf.MapField internalGetGroupInfo() { + if (groupInfo_ == null) { + return com.google.protobuf.MapField.emptyMapField( + GroupInfoDefaultEntryHolder.defaultEntry); + } + return groupInfo_; + } + + private com.google.protobuf.MapField internalGetMutableGroupInfo() { + onChanged(); + ; + if (groupInfo_ == null) { + groupInfo_ = + com.google.protobuf.MapField.newMapField( + GroupInfoDefaultEntryHolder.defaultEntry); + } + if (!groupInfo_.isMutable()) { + groupInfo_ = groupInfo_.copy(); + } + return groupInfo_; + } + + public int getGroupInfoCount() { + return internalGetGroupInfo().getMap().size(); + } + /** map<string, string> GroupInfo = 1; */ + public boolean containsGroupInfo(String key) { + if (key == null) { + throw new NullPointerException(); + } + return internalGetGroupInfo().getMap().containsKey(key); + } + /** Use {@link #getGroupInfoMap()} instead. */ + @Deprecated + public java.util.Map getGroupInfo() { + return getGroupInfoMap(); + } + /** map<string, string> GroupInfo = 1; */ + public java.util.Map getGroupInfoMap() { + return internalGetGroupInfo().getMap(); + } + /** map<string, string> GroupInfo = 1; */ + public String getGroupInfoOrDefault(String key, String defaultValue) { + if (key == null) { + throw new NullPointerException(); + } + java.util.Map map = internalGetGroupInfo().getMap(); + return map.containsKey(key) ? map.get(key) : defaultValue; + } + /** map<string, string> GroupInfo = 1; */ + public String getGroupInfoOrThrow(String key) { + if (key == null) { + throw new NullPointerException(); + } + java.util.Map map = internalGetGroupInfo().getMap(); + if (!map.containsKey(key)) { + throw new IllegalArgumentException(); + } + return map.get(key); + } + + public Builder clearGroupInfo() { + internalGetMutableGroupInfo().getMutableMap().clear(); + return this; + } + /** map<string, string> GroupInfo = 1; */ + public Builder removeGroupInfo(String key) { + if (key == null) { + throw new NullPointerException(); + } + internalGetMutableGroupInfo().getMutableMap().remove(key); + return this; + } + /** Use alternate mutation accessors instead. */ + @Deprecated + public java.util.Map getMutableGroupInfo() { + return internalGetMutableGroupInfo().getMutableMap(); + } + /** map<string, string> GroupInfo = 1; */ + public Builder putGroupInfo(String key, String value) { + if (key == null) { + throw new NullPointerException(); + } + if (value == null) { + throw new NullPointerException(); + } + internalGetMutableGroupInfo().getMutableMap().put(key, value); + return this; + } + /** map<string, string> GroupInfo = 1; */ + public Builder putAllGroupInfo(java.util.Map values) { + internalGetMutableGroupInfo().getMutableMap().putAll(values); + return this; + } + + private java.util.List messages_ = java.util.Collections.emptyList(); + + private void ensureMessagesIsMutable() { + if (!((bitField0_ & 0x00000002) == 0x00000002)) { + messages_ = new java.util.ArrayList(messages_); + bitField0_ |= 0x00000002; + } + } + + private com.google.protobuf.RepeatedFieldBuilderV3< + MessageItem, MessageItem.Builder, MessageItemOrBuilder> + messagesBuilder_; + + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public java.util.List getMessagesList() { + if (messagesBuilder_ == null) { + return java.util.Collections.unmodifiableList(messages_); + } else { + return messagesBuilder_.getMessageList(); + } + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public int getMessagesCount() { + if (messagesBuilder_ == null) { + return messages_.size(); + } else { + return messagesBuilder_.getCount(); + } + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public MessageItem getMessages(int index) { + if (messagesBuilder_ == null) { + return messages_.get(index); + } else { + return messagesBuilder_.getMessage(index); + } + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public Builder setMessages(int index, MessageItem value) { + if (messagesBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensureMessagesIsMutable(); + messages_.set(index, value); + onChanged(); + } else { + messagesBuilder_.setMessage(index, value); + } + return this; + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public Builder setMessages(int index, MessageItem.Builder builderForValue) { + if (messagesBuilder_ == null) { + ensureMessagesIsMutable(); + messages_.set(index, builderForValue.build()); + onChanged(); + } else { + messagesBuilder_.setMessage(index, builderForValue.build()); + } + return this; + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public Builder addMessages(MessageItem value) { + if (messagesBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensureMessagesIsMutable(); + messages_.add(value); + onChanged(); + } else { + messagesBuilder_.addMessage(value); + } + return this; + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public Builder addMessages(int index, MessageItem value) { + if (messagesBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensureMessagesIsMutable(); + messages_.add(index, value); + onChanged(); + } else { + messagesBuilder_.addMessage(index, value); + } + return this; + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public Builder addMessages(MessageItem.Builder builderForValue) { + if (messagesBuilder_ == null) { + ensureMessagesIsMutable(); + messages_.add(builderForValue.build()); + onChanged(); + } else { + messagesBuilder_.addMessage(builderForValue.build()); + } + return this; + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public Builder addMessages(int index, MessageItem.Builder builderForValue) { + if (messagesBuilder_ == null) { + ensureMessagesIsMutable(); + messages_.add(index, builderForValue.build()); + onChanged(); + } else { + messagesBuilder_.addMessage(index, builderForValue.build()); + } + return this; + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public Builder addAllMessages(Iterable values) { + if (messagesBuilder_ == null) { + ensureMessagesIsMutable(); + com.google.protobuf.AbstractMessageLite.Builder.addAll(values, messages_); + onChanged(); + } else { + messagesBuilder_.addAllMessages(values); + } + return this; + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public Builder clearMessages() { + if (messagesBuilder_ == null) { + messages_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000002); + onChanged(); + } else { + messagesBuilder_.clear(); + } + return this; + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public Builder removeMessages(int index) { + if (messagesBuilder_ == null) { + ensureMessagesIsMutable(); + messages_.remove(index); + onChanged(); + } else { + messagesBuilder_.remove(index); + } + return this; + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public MessageItem.Builder getMessagesBuilder(int index) { + return getMessagesFieldBuilder().getBuilder(index); + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public MessageItemOrBuilder getMessagesOrBuilder(int index) { + if (messagesBuilder_ == null) { + return messages_.get(index); + } else { + return messagesBuilder_.getMessageOrBuilder(index); + } + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public java.util.List getMessagesOrBuilderList() { + if (messagesBuilder_ != null) { + return messagesBuilder_.getMessageOrBuilderList(); + } else { + return java.util.Collections.unmodifiableList(messages_); + } + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public MessageItem.Builder addMessagesBuilder() { + return getMessagesFieldBuilder().addBuilder(MessageItem.getDefaultInstance()); + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public MessageItem.Builder addMessagesBuilder(int index) { + return getMessagesFieldBuilder() + .addBuilder(index, MessageItem.getDefaultInstance()); + } + /** + * repeated .com.dtstack.chunjun.format.protobuf.test.MessageItem Messages = 2; + * + */ + public java.util.List getMessagesBuilderList() { + return getMessagesFieldBuilder().getBuilderList(); + } + + private com.google.protobuf.RepeatedFieldBuilderV3< + MessageItem, MessageItem.Builder, MessageItemOrBuilder> + getMessagesFieldBuilder() { + if (messagesBuilder_ == null) { + messagesBuilder_ = + new com.google.protobuf.RepeatedFieldBuilderV3< + MessageItem, MessageItem.Builder, MessageItemOrBuilder>( + messages_, + ((bitField0_ & 0x00000002) == 0x00000002), + getParentForChildren(), + isClean()); + messages_ = null; + } + return messagesBuilder_; + } + + public final Builder setUnknownFields( + final com.google.protobuf.UnknownFieldSet unknownFields) { + return this; + } + + public final Builder mergeUnknownFields( + final com.google.protobuf.UnknownFieldSet unknownFields) { + return this; + } + + // @@protoc_insertion_point(builder_scope:com.dtstack.chunjun.format.protobuf.test.MessageTest) + } + + // @@protoc_insertion_point(class_scope:com.dtstack.chunjun.format.protobuf.test.MessageTest) + private static final MessageTest DEFAULT_INSTANCE; + + static { + DEFAULT_INSTANCE = new MessageTest(); + } + + public static MessageTest getDefaultInstance() { + return DEFAULT_INSTANCE; + } + + private static final com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public MessageTest parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new MessageTest(input, extensionRegistry); + } + }; + + public static com.google.protobuf.Parser parser() { + return PARSER; + } + + @Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + public MessageTest getDefaultInstanceForType() { + return DEFAULT_INSTANCE; + } + } + + private static final com.google.protobuf.Descriptors.Descriptor + internal_static_com_dtstack_chunjun_format_protobuf_test_Variant_descriptor; + private static final com.google.protobuf.GeneratedMessageV3.FieldAccessorTable + internal_static_com_dtstack_chunjun_format_protobuf_test_Variant_fieldAccessorTable; + private static final com.google.protobuf.Descriptors.Descriptor + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_descriptor; + private static final com.google.protobuf.GeneratedMessageV3.FieldAccessorTable + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_fieldAccessorTable; + private static final com.google.protobuf.Descriptors.Descriptor + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_ExValuesEntry_descriptor; + private static final com.google.protobuf.GeneratedMessageV3.FieldAccessorTable + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_ExValuesEntry_fieldAccessorTable; + private static final com.google.protobuf.Descriptors.Descriptor + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_descriptor; + private static final com.google.protobuf.GeneratedMessageV3.FieldAccessorTable + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_fieldAccessorTable; + private static final com.google.protobuf.Descriptors.Descriptor + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_GroupInfoEntry_descriptor; + private static final com.google.protobuf.GeneratedMessageV3.FieldAccessorTable + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_GroupInfoEntry_fieldAccessorTable; + + public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { + return descriptor; + } + + private static com.google.protobuf.Descriptors.FileDescriptor descriptor; + + static { + String[] descriptorData = { + "\n\021MessageTest.proto\022(com.dtstack.chunjun" + + ".format.protobuf.test\"]\n\007Variant\022\r\n\005bool" + + "x\030\001 \001(\010\022\024\n\nValueBool2\030\002 \001(\010H\000\022\024\n\nValueIn" + + "t32\030\003 \001(\005H\000\022\r\n\005booly\030\004 \001(\010B\010\n\006Value2\"\353\001\n" + + "\013MessageItem\022\017\n\007TagName\030\001 \001(\t\022C\n\010TagValu" + + "e\030\002 \001(\01321.com.dtstack.chunjun.format.pro" + + "tobuf.test.Variant\022U\n\010ExValues\030\003 \003(\0132C.c" + + "om.dtstack.chunjun.format.protobuf.test." + + "MessageItem.ExValuesEntry\032/\n\rExValuesEnt" + + "ry\022\013\n\003key\030\001 \001(\t\022\r\n\005value\030\002 \001(\t:\0028\001\"\341\001\n\013M", + "essageTest\022W\n\tGroupInfo\030\001 \003(\0132D.com.dtst" + + "ack.chunjun.format.protobuf.test.Message" + + "Test.GroupInfoEntry\022G\n\010Messages\030\002 \003(\01325." + + "com.dtstack.chunjun.format.protobuf.test" + + ".MessageItem\0320\n\016GroupInfoEntry\022\013\n\003key\030\001 " + + "\001(\t\022\r\n\005value\030\002 \001(\t:\0028\001b\006proto3" + }; + com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = + new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { + public com.google.protobuf.ExtensionRegistry assignDescriptors( + com.google.protobuf.Descriptors.FileDescriptor root) { + descriptor = root; + return null; + } + }; + com.google.protobuf.Descriptors.FileDescriptor.internalBuildGeneratedFileFrom( + descriptorData, new com.google.protobuf.Descriptors.FileDescriptor[] {}, assigner); + internal_static_com_dtstack_chunjun_format_protobuf_test_Variant_descriptor = + getDescriptor().getMessageTypes().get(0); + internal_static_com_dtstack_chunjun_format_protobuf_test_Variant_fieldAccessorTable = + new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( + internal_static_com_dtstack_chunjun_format_protobuf_test_Variant_descriptor, + new String[] { + "Boolx", "ValueBool2", "ValueInt32", "Booly", "Value2", + }); + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_descriptor = + getDescriptor().getMessageTypes().get(1); + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_fieldAccessorTable = + new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_descriptor, + new String[] { + "TagName", "TagValue", "ExValues", + }); + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_ExValuesEntry_descriptor = + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_descriptor + .getNestedTypes() + .get(0); + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_ExValuesEntry_fieldAccessorTable = + new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageItem_ExValuesEntry_descriptor, + new String[] { + "Key", "Value", + }); + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_descriptor = + getDescriptor().getMessageTypes().get(2); + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_fieldAccessorTable = + new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_descriptor, + new String[] { + "GroupInfo", "Messages", + }); + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_GroupInfoEntry_descriptor = + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_descriptor + .getNestedTypes() + .get(0); + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_GroupInfoEntry_fieldAccessorTable = + new com.google.protobuf.GeneratedMessageV3.FieldAccessorTable( + internal_static_com_dtstack_chunjun_format_protobuf_test_MessageTest_GroupInfoEntry_descriptor, + new String[] { + "Key", "Value", + }); + } + + // @@protoc_insertion_point(outer_class_scope) +} diff --git a/chunjun-formats/chunjun-format-protobuf/src/test/java/com/dtstack/chunjun/format/protobuf/test/PbFormatFactoryTest.java b/chunjun-formats/chunjun-format-protobuf/src/test/java/com/dtstack/chunjun/format/protobuf/test/PbFormatFactoryTest.java new file mode 100644 index 0000000000..7f8d7bacfe --- /dev/null +++ b/chunjun-formats/chunjun-format-protobuf/src/test/java/com/dtstack/chunjun/format/protobuf/test/PbFormatFactoryTest.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.protobuf.test; + +import com.dtstack.chunjun.format.protobuf.PbFormatFactory; +import com.dtstack.chunjun.format.protobuf.util.FormatCheckUtil; + +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.table.connector.format.DecodingFormat; +import org.apache.flink.table.connector.format.EncodingFormat; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.connector.source.DynamicTableSource; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.factories.DynamicTableFactory; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.RowType; + +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.powermock.api.mockito.PowerMockito; +import org.powermock.core.classloader.annotations.PrepareForTest; +import org.powermock.modules.junit4.PowerMockRunner; + +import static com.dtstack.chunjun.format.protobuf.PbFormatOptions.MESSAGE_CLASS_NAME; + +/** @author liuliu 2022/5/4 */ +@RunWith(PowerMockRunner.class) +@PrepareForTest({PbFormatFactory.class}) +public class PbFormatFactoryTest { + + DecodingFormat> decodingFormat; + EncodingFormat> encodingFormat; + + DeserializationSchema runtimeDecoder; + SerializationSchema runtimeEncoder; + + @Before + public void init() throws Exception { + DynamicTableFactory.Context context = PowerMockito.mock(DynamicTableFactory.Context.class); + ReadableConfig readableConfig = PowerMockito.mock(ReadableConfig.class); + PowerMockito.when(readableConfig.get(MESSAGE_CLASS_NAME)) + .thenReturn( + "com.dtstack.chunjun.format.protobuf.test.MessageTestOuterClass$MessageTest"); + + PbFormatFactory pbFormatFactory = PowerMockito.mock(PbFormatFactory.class); + PowerMockito.when(pbFormatFactory, "createDecodingFormat", context, readableConfig) + .thenCallRealMethod(); + PowerMockito.when(pbFormatFactory, "createEncodingFormat", context, readableConfig) + .thenCallRealMethod(); + decodingFormat = pbFormatFactory.createDecodingFormat(context, readableConfig); + encodingFormat = pbFormatFactory.createEncodingFormat(context, readableConfig); + + DynamicTableSource.Context sourceContext = + PowerMockito.mock(DynamicTableSource.Context.class); + DynamicTableSink.Context sinkContext = PowerMockito.mock(DynamicTableSink.Context.class); + + FormatCheckUtil formatCheckUtil = + new FormatCheckUtil( + null, + "com.dtstack.chunjun.format.protobuf.test.MessageTestOuterClass$MessageTest"); + RowType messageLogicalType = + formatCheckUtil.createMessageLogicalType( + "com.dtstack.chunjun.format.protobuf.test.MessageTestOuterClass$MessageTest"); + DataType physicalDataType = PowerMockito.mock(DataType.class); + PowerMockito.when(physicalDataType.getLogicalType()).thenReturn(messageLogicalType); + PowerMockito.when(sourceContext.createTypeInformation(physicalDataType)).thenReturn(null); + PowerMockito.when(sinkContext.createTypeInformation(physicalDataType)).thenReturn(null); + + runtimeDecoder = decodingFormat.createRuntimeDecoder(sourceContext, physicalDataType); + + runtimeEncoder = encodingFormat.createRuntimeEncoder(sinkContext, physicalDataType); + } + + @Test + public void serializeTest() throws Exception { + + MessageTestOuterClass.MessageTest messageGroup = getMessageGroup(); + // serialize + runtimeDecoder.open(null); + RowData deserialize = runtimeDecoder.deserialize(messageGroup.toByteArray()); + assert deserialize.getMap(0).keyArray().getString(0).toString().equals("group"); + assert deserialize.getArray(1).getRow(0, 0).getRow(1, 0).getRow(1, 0).getInt(0) == 3; + } + + @Test + public void deserializeTest() throws Exception { + + MessageTestOuterClass.MessageTest messageGroup = getMessageGroup(); + runtimeDecoder.open(null); + RowData rowData = runtimeDecoder.deserialize(messageGroup.toByteArray()); + + runtimeEncoder.open(null); + assert messageGroup.equals( + MessageTestOuterClass.MessageTest.parseFrom(runtimeEncoder.serialize(rowData))); + } + + public static MessageTestOuterClass.MessageTest getMessageGroup() { + MessageTestOuterClass.MessageTest.Builder builder = + MessageTestOuterClass.MessageTest.newBuilder(); + builder.putGroupInfo("group", "test"); + builder.addMessages(0, getMessageItem()); + return builder.build(); + } + + public static MessageTestOuterClass.MessageItem getMessageItem() { + MessageTestOuterClass.MessageItem.Builder builder = + MessageTestOuterClass.MessageItem.newBuilder(); + + builder.setTagName("tag"); + builder.setTagValue(getVariant()); + builder.putExValues("2", "3"); + + return builder.build(); + } + + public static MessageTestOuterClass.Variant getVariant() { + MessageTestOuterClass.Variant.Builder builder = MessageTestOuterClass.Variant.newBuilder(); + builder.setBoolx(true); + builder.setValueInt32(1); + builder.setBooly(false); + + return builder.build(); + } +} diff --git a/chunjun-formats/chunjun-format-tika/pom.xml b/chunjun-formats/chunjun-format-tika/pom.xml new file mode 100644 index 0000000000..f370e4c56f --- /dev/null +++ b/chunjun-formats/chunjun-format-tika/pom.xml @@ -0,0 +1,70 @@ + + + + + + chunjun-formats + com.dtstack.chunjun + ${revision} + + 4.0.0 + + chunjun-format-tika + ChunJun : Formats : Tika + + + tika + 2.8.0 + + + + + + org.apache.tika + tika-core + ${tika.version} + provided + + + + + org.apache.tika + tika-parsers-standard-package + ${tika.version} + provided + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + org.apache.maven.plugins + maven-antrun-plugin + + + + + diff --git a/chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/common/TikaData.java b/chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/common/TikaData.java new file mode 100644 index 0000000000..8e89c4077d --- /dev/null +++ b/chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/common/TikaData.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.tika.common; + +import lombok.Data; + +@Data +public class TikaData { + + private String[] data; + private boolean end; + + public TikaData(String[] data, boolean end) { + this.data = data; + this.end = end; + } + + public TikaData(String[] data) { + this.data = data; + } +} diff --git a/chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/config/TikaReadConfig.java b/chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/config/TikaReadConfig.java new file mode 100644 index 0000000000..eac2a1556a --- /dev/null +++ b/chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/config/TikaReadConfig.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.tika.config; + +import lombok.Data; + +import java.io.Serializable; + +@Data +public class TikaReadConfig implements Serializable { + + public static final String ORIGINAL_FILENAME = "_ORIGINAL_FILENAME"; + + private static final long serialVersionUID = 9142075335239994317L; + + /** 是否启用tika提取 */ + private boolean useExtract = false; + + /** 内容重合度比例值 0-100 */ + private int overlapRatio = 0; + + /** 是否启动分块 */ + private boolean enableChunk = false; + + /** 分块大小 */ + private int chunkSize = -1; + + public boolean isEnableChunk() { + return chunkSize > 0 ? true : false; + } +} diff --git a/chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/options/TikaOptions.java b/chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/options/TikaOptions.java new file mode 100644 index 0000000000..6b33451aa8 --- /dev/null +++ b/chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/options/TikaOptions.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.tika.options; + +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; + +public class TikaOptions { + + public static final ConfigOption USE_EXTRACT = + ConfigOptions.key("tika-use-extract") + .booleanType() + .defaultValue(false) + .withDescription("use tika extract"); + + public static final ConfigOption OVERLAP_RATIO = + ConfigOptions.key("tika-overlap-ratio") + .intType() + .defaultValue(0) + .withDescription("content overlap ratio"); + + public static final ConfigOption CHUNK_SIZE = + ConfigOptions.key("tika-chunk-size") + .intType() + .defaultValue(-1) + .withDescription("chunk size"); +} diff --git a/chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/source/TikaInputFormat.java b/chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/source/TikaInputFormat.java new file mode 100644 index 0000000000..7923f8567d --- /dev/null +++ b/chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/source/TikaInputFormat.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.tika.source; + +import com.dtstack.chunjun.format.tika.common.TikaData; +import com.dtstack.chunjun.format.tika.config.TikaReadConfig; + +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.concurrent.BasicThreadFactory; + +import java.io.Closeable; +import java.io.InputStream; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import static java.util.concurrent.TimeUnit.NANOSECONDS; + +@Slf4j +public class TikaInputFormat implements Closeable { + private ThreadPoolExecutor executorService; + private final BlockingQueue queue = new LinkedBlockingQueue<>(4096); + private TikaReadConfig tikaReadConfig; + private TikaData row; + private int fieldCount; + + public TikaInputFormat(TikaReadConfig tikaReadConfig, int fieldCount) { + this.tikaReadConfig = tikaReadConfig; + this.fieldCount = fieldCount; + } + + public void open(InputStream inputStream, String originalFilename) { + this.executorService = + new ThreadPoolExecutor( + 1, + 1, + 0, + NANOSECONDS, + new LinkedBlockingDeque<>(2), + new BasicThreadFactory.Builder() + .namingPattern("tika-schedule-pool-%d") + .daemon(false) + .build()); + TikaReaderExecutor executor = + new TikaReaderExecutor(tikaReadConfig, queue, inputStream, originalFilename); + executorService.execute(executor); + } + + public boolean hasNext() { + try { + row = queue.poll(3000L, TimeUnit.MILLISECONDS); + // 如果没有数据,则继续等待 + if (row == null) { + log.warn("Waiting for queue get tika data"); + hasNext(); + } + if (row != null && row.isEnd()) { + return false; + } + return true; + } catch (InterruptedException e) { + throw new RuntimeException( + "cannot get data from the queue because the current thread is interrupted.", e); + } + } + + /** 根据声明的字段个数,对数据进行补全 */ + public String[] nextRecord() { + String[] data = row.getData(); + if (fieldCount == data.length) { + return data; + } + if (fieldCount < data.length) { + fieldCount = data.length; + } + return formatValue(data); + } + + private String[] formatValue(String[] data) { + String[] record = initDataContainer(fieldCount, ""); + // because fieldCount is always >= data.length + System.arraycopy(data, 0, record, 0, data.length); + return record; + } + + private String[] initDataContainer(int capacity, String defValue) { + String[] container = new String[capacity]; + for (int i = 0; i < capacity; i++) { + container[i] = defValue; + } + return container; + } + + @Override + public void close() { + if (executorService != null) { + executorService.shutdown(); + queue.clear(); + } + } +} diff --git a/chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/source/TikaReaderExecutor.java b/chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/source/TikaReaderExecutor.java new file mode 100644 index 0000000000..302d697e98 --- /dev/null +++ b/chunjun-formats/chunjun-format-tika/src/main/java/com/dtstack/chunjun/format/tika/source/TikaReaderExecutor.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.chunjun.format.tika.source; + +import com.dtstack.chunjun.format.tika.common.TikaData; +import com.dtstack.chunjun.format.tika.config.TikaReadConfig; +import com.dtstack.chunjun.util.GsonUtil; + +import org.apache.commons.lang3.StringUtils; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.AutoDetectParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.sax.BodyContentHandler; +import org.apache.tika.sax.ContentHandlerDecorator; +import org.xml.sax.ContentHandler; + +import java.io.BufferedInputStream; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.BlockingQueue; + +import static com.dtstack.chunjun.format.tika.config.TikaReadConfig.ORIGINAL_FILENAME; + +public class TikaReaderExecutor implements Runnable { + + private TikaReadConfig tikaReadConfig; + private BlockingQueue queue; + private Map metaData = new HashMap<>(); + private String metaDataString; + private String originalFilename; + private InputStream in; + + public TikaReaderExecutor( + TikaReadConfig tikaReadConfig, + BlockingQueue queue, + InputStream in, + String originalFilename) { + this.tikaReadConfig = tikaReadConfig; + this.queue = queue; + this.in = in; + this.originalFilename = originalFilename; + } + + @Override + public void run() { + // 抽取文档内容 + Parser parser = new AutoDetectParser(); + // 内容处理器,用来收集结果,Tika可以将解析结果包装成XHTML SAX + // event进行分发,通过ContentHandler处理这些event就可以得到文本内容和其他有用的信息 + ContentHandler contentHandler = new BodyContentHandler(-1); + // 元数据,既是输入也是输出,可以将文件名或者可能的文件类型传入,tika解析时可以根据这些信息判断文件类型, + // 再调用相应的解析器进行处理;另外,tika也会将一些额外的信息保存到Metadata中,如文件修改日期,作者,编辑工具等 + Metadata metadata = new Metadata(); + // 解析上下文,用来控制解析过程,比如是否提取Office文档里面的宏等 + ParseContext context = new ParseContext(); + + // tika官方文档提供的分块处理思路, 但是测试发现比如同类型word(doc)两个文件,有的可以正常分块,有的不能分块。 + // 还有txt类型文件未能分块读取, pdf文件暂时测试。 + // 因此暂时不建议使用 + final List chunks = new ArrayList<>(); + chunks.add(""); + ContentHandlerDecorator trunkHandler = + new ContentHandlerDecorator() { + @Override + public void characters(char[] ch, int start, int length) { + String chunkContent = ""; + String lastChunk = chunks.get(chunks.size() - 1); + String thisStr = new String(ch, start, length); + if (lastChunk.length() + length > tikaReadConfig.getChunkSize()) { + chunks.add(thisStr); + chunkContent = thisStr; + } else { + String chunkString = lastChunk + thisStr; + chunks.set(chunks.size() - 1, chunkString); + if (StringUtils.isNotBlank(chunkString)) { + chunkContent = chunkString; + } + } + if (metaData.isEmpty()) { + for (String name : metadata.names()) { + metaData.put(name, metadata.get(name)); + } + metaData.put(ORIGINAL_FILENAME, originalFilename); + metaDataString = GsonUtil.GSON.toJson(metaData); + } + if (StringUtils.isNotBlank(chunkContent)) { + try { + queue.put( + new TikaData( + new String[] {chunkContent, metaDataString}, + false)); + } catch (InterruptedException e) { + throw new RuntimeException( + "because the current thread was interrupted, adding data to the queue failed", + e); + } + } + } + }; + + // InputStream in 待解析的文档,以字节流形式传入,可以避免tika占用太多内存 + try (BufferedInputStream bufferedInputStream = new BufferedInputStream(in)) { + // 如何想要使用官方的分块处理方式,需要将contentHandler替换成trunkHandler + parser.parse(bufferedInputStream, contentHandler, metadata, context); + String content = contentHandler.toString(); + for (String name : metadata.names()) { + metaData.put(name, metadata.get(name)); + } + metaData.put(ORIGINAL_FILENAME, originalFilename); + metaDataString = GsonUtil.GSON.toJson(metaData); + if (tikaReadConfig.getChunkSize() > 0) { + // 对整个抽取出来的内容进行分块、内容重复度处理 + List chunkList = + getChunkList( + content, + tikaReadConfig.getChunkSize(), + tikaReadConfig.getOverlapRatio()); + for (String chunk : chunkList) { + queue.put(new TikaData(new String[] {chunk, metaDataString}, false)); + } + } else { + queue.put(new TikaData(new String[] {content, metaDataString}, false)); + } + queue.put(new TikaData(null, true)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public List getChunkList(String content, int chunkSize, int overlapRatio) { + List chunks = new ArrayList<>(); + int length = content.length(); + int startIndex = 0; + int step = chunkSize; + int endIndex = startIndex + step; + int increment = step * overlapRatio / 100; + if (step >= length) { + chunks.add(content); + } else { + while (endIndex <= length) { + // 确保截取的字符串不会超过原始字符串的长度 + if (startIndex + step > length) { + endIndex = length; + } + String substring = content.substring(startIndex, endIndex); + chunks.add(substring); + // 更新起始和结束位置 + startIndex = endIndex - increment; + endIndex = startIndex + step; + } + if (endIndex > length && startIndex + increment < length) { + String substring = content.substring(startIndex, length); + chunks.add(substring); + } + } + return chunks; + } +} diff --git a/chunjun-formats/pom.xml b/chunjun-formats/pom.xml new file mode 100644 index 0000000000..94c83fc18a --- /dev/null +++ b/chunjun-formats/pom.xml @@ -0,0 +1,133 @@ + + + + + + chunjun + com.dtstack.chunjun + ${revision} + + 4.0.0 + + chunjun-formats + ChunJun : Formats + pom + + + formats + formats + + + + chunjun-format-protobuf + chunjun-format-tika + chunjun-format-excel + + + + + com.dtstack.chunjun + chunjun-core + ${project.version} + provided + + + ch.qos.logback + logback-classic + + + ch.qos.logback + logback-core + + + org.apache.flink + flink-shaded-guava + + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + package + + shade + + + + + org.slf4j:slf4j-api + log4j:log4j + ch.qos.logback:* + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + org.apache.maven.plugins + maven-antrun-plugin + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + diff --git a/pom.xml b/pom.xml index dc31571dc4..0c0d8a2bb7 100755 --- a/pom.xml +++ b/pom.xml @@ -43,7 +43,8 @@ chunjun-e2e chunjun-local-test chunjun-server - + chunjun-formats + UTF-8