Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to Arrow 4.0.0 #198

Merged
merged 7 commits into from
Jun 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 0 additions & 33 deletions cmake/FindBrotli.cmake

This file was deleted.

10 changes: 5 additions & 5 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ include(GenerateExportHeader)

find_package(Arrow REQUIRED)
find_package(Boost COMPONENTS filesystem regex system REQUIRED)
find_package(Brotli REQUIRED)
find_package(unofficial-brotli CONFIG REQUIRED)
find_package(BZip2 REQUIRED)
find_package(double-conversion CONFIG REQUIRED)
find_package(gflags CONFIG REQUIRED)
find_package(glog CONFIG REQUIRED)
find_package(lz4 CONFIG REQUIRED)
find_package(OpenSSL REQUIRED)
find_package(re2 CONFIG REQUIRED)
find_package(Snappy CONFIG REQUIRED)
find_package(Thrift CONFIG REQUIRED)
find_package(utf8proc REQUIRED)
Expand Down Expand Up @@ -86,17 +86,17 @@ include_directories(
${PROJECT_BINARY_DIR}
${ParquetCpp_INCLUDE_DIRS})

target_link_libraries(ParquetSharpNative
target_link_libraries(ParquetSharpNative PRIVATE
${ParquetCpp_LIBRARIES}
${Arrow_LIBRARIES}
${Boost_LIBRARIES}
${Brotli_LIBRARIES}
unofficial::brotli::brotlidec-static unofficial::brotli::brotlienc-static unofficial::brotli::brotlicommon-static
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are now using vcpkg's brotli.

BZip2::BZip2
${Crypto_LIBRARIES}
double-conversion::double-conversion
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Arrow 4.0.0 has its own double-conversion.

glog::glog
lz4::lz4
${SSL_LIBRARIES}
re2::re2
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Arrow 4.0.0 now requires re2.

Snappy::snappy
thrift::thrift
${utf8proc_LIBRARIES}
Expand Down
2 changes: 1 addition & 1 deletion cpp/ColumnDecryptionProperties.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "CString.h"
#include "ExceptionInfo.h"

#include <parquet/encryption.h>
#include <parquet/encryption/encryption.h>

using namespace parquet;

Expand Down
2 changes: 1 addition & 1 deletion cpp/ColumnDecryptionPropertiesBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "CString.h"
#include "ExceptionInfo.h"

#include <parquet/encryption.h>
#include <parquet/encryption/encryption.h>

using namespace parquet;

Expand Down
2 changes: 1 addition & 1 deletion cpp/ColumnEncryptionProperties.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "CString.h"
#include "ExceptionInfo.h"

#include <parquet/encryption.h>
#include <parquet/encryption/encryption.h>

using namespace parquet;

Expand Down
2 changes: 1 addition & 1 deletion cpp/ColumnEncryptionPropertiesBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "CString.h"
#include "ExceptionInfo.h"

#include <parquet/encryption.h>
#include <parquet/encryption/encryption.h>

using namespace parquet;

Expand Down
2 changes: 1 addition & 1 deletion cpp/Enums.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ namespace
static_assert(Encoding::BYTE_STREAM_SPLIT == 9);
static_assert(Encoding::UNDEFINED == 10);

static_assert(LogicalType::Type::UNKNOWN == 0);
static_assert(LogicalType::Type::UNDEFINED == 0);
static_assert(LogicalType::Type::STRING == 1);
static_assert(LogicalType::Type::MAP == 2);
static_assert(LogicalType::Type::LIST == 3);
Expand Down
2 changes: 1 addition & 1 deletion cpp/FileDecryptionProperties.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "ManagedAadPrefixVerifier.h"
#include "ManagedDecryptionKeyRetriever.h"

#include <parquet/encryption.h>
#include <parquet/encryption/encryption.h>

using namespace parquet;

Expand Down
2 changes: 1 addition & 1 deletion cpp/FileDecryptionPropertiesBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "ManagedAadPrefixVerifier.h"
#include "ManagedDecryptionKeyRetriever.h"

#include <parquet/encryption.h>
#include <parquet/encryption/encryption.h>

using namespace parquet;

Expand Down
2 changes: 1 addition & 1 deletion cpp/FileEncryptionProperties.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "CString.h"
#include "ExceptionInfo.h"

#include <parquet/encryption.h>
#include <parquet/encryption/encryption.h>

using namespace parquet;

Expand Down
2 changes: 1 addition & 1 deletion cpp/FileEncryptionPropertiesBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "CString.h"
#include "ExceptionInfo.h"

#include <parquet/encryption.h>
#include <parquet/encryption/encryption.h>

using namespace parquet;

Expand Down
3 changes: 2 additions & 1 deletion cpp/KeyValueMetadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "cpp/ParquetSharpExport.h"
#include "ExceptionInfo.h"

#include <arrow/util/key_value_metadata.h>
#include <parquet/metadata.h>

using namespace parquet;
Expand Down Expand Up @@ -76,7 +77,7 @@ extern "C"

PARQUETSHARP_EXPORT void KeyValueMetadata_Free_Entries(const std::shared_ptr<const KeyValueMetadata>* key_value_metadata, const char** keys, const char** values)
{
int64_t size = (*key_value_metadata)->size();
const int64_t size = (*key_value_metadata)->size();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not linked to the Arrow 4.0.0 upgrade, but it's better that way.


for (int i = 0; i != size; ++i)
{
Expand Down
5 changes: 0 additions & 5 deletions cpp/LogicalType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,6 @@ extern "C"
TRYCATCH(*logical_type = new std::shared_ptr<const LogicalType>(LogicalType::None());)
}

PARQUETSHARP_EXPORT ExceptionInfo* LogicalType_Unknown(const std::shared_ptr<const LogicalType>** logical_type)
{
TRYCATCH(*logical_type = new std::shared_ptr<const LogicalType>(LogicalType::Unknown());)
}

Comment on lines -114 to -118
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

UNKNOWN is now UNDEFINED in Arrow 4.0.0 and it's not a real logical type.

// Typed properties
PARQUETSHARP_EXPORT ExceptionInfo* DecimalLogicalType_Precision(const std::shared_ptr<const DecimalLogicalType>* logical_type, int32_t* precision)
{
Expand Down
2 changes: 1 addition & 1 deletion cpp/ManagedAadPrefixVerifier.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

#pragma once

#include <parquet/encryption.h>
#include <parquet/encryption/encryption.h>

using namespace parquet;

Expand Down
4 changes: 2 additions & 2 deletions cpp/ManagedDecryptionKeyRetriever.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

#pragma once

#include <parquet/encryption.h>
#include <parquet/encryption/encryption.h>
#include <stdexcept>

using namespace parquet;
Expand Down Expand Up @@ -35,7 +35,7 @@ class ManagedDecryptionKeyRetriever final : public DecryptionKeyRetriever
free_gc_handle_(Handle);
}

std::string GetKey(const std::string& key_metadata) const override
std::string GetKey(const std::string& key_metadata) override
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Method signature changed in Arrow 4.0.0.

{
const char* exception = nullptr;
AesKey key;
Expand Down
26 changes: 0 additions & 26 deletions cpp/TypedColumnReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,32 +29,6 @@ extern "C"
values_read);) \
} \
\
PARQUETSHARP_EXPORT ExceptionInfo* TypedColumnReader_ReadBatchSpaced_##ParquetType( \
std::shared_ptr<ColumnReader>* columnReader, \
int64_t batch_size, \
int16_t* def_levels, \
int16_t* rep_levels, \
NativeType* values, \
uint8_t* valid_bits, \
int64_t valid_bits_offset, \
int64_t* levels_read, \
int64_t* values_read, \
int64_t* null_count, \
int64_t* return_value) \
{ \
TRYCATCH( \
*levels_read = static_cast<ParquetType##Reader&>(**columnReader).ReadBatchSpaced( \
batch_size, \
def_levels, \
rep_levels, \
values, \
valid_bits, \
valid_bits_offset, \
levels_read, \
values_read, \
null_count);) \
} \
\
Comment on lines -32 to -57
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ReadBatchSpaced has been deprecated in Arrow 4.0.0 because it was buggy and no one was using it.
We are simply removing it altogether, assuming no one was using it in ParquetSharp either.

PARQUETSHARP_EXPORT ExceptionInfo* TypedColumnReader_Skip_##ParquetType( \
std::shared_ptr<ColumnReader>* columnReader, \
int64_t num_rows_to_skip, \
Expand Down
4 changes: 2 additions & 2 deletions csharp.test/TestPhysicalTypeRoundtrip.cs
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,14 @@ private static void AssertReadRoundtrip(ResizableBuffer buffer, ExpectedColumn[]

var numRows = expectedColumns.First().Values.Length;

Assert.AreEqual("parquet-cpp version 1.5.1-SNAPSHOT", fileMetaData.CreatedBy);
Assert.AreEqual("parquet-cpp-arrow version 4.0.0", fileMetaData.CreatedBy);
Assert.AreEqual(new Dictionary<string, string> {{"case", "Test"}, {"Awesome", "true"}}, fileMetaData.KeyValueMetadata);
Assert.AreEqual(expectedColumns.Length, fileMetaData.NumColumns);
Assert.AreEqual(numRows, fileMetaData.NumRows);
Assert.AreEqual(1, fileMetaData.NumRowGroups);
Assert.AreEqual(1 + expectedColumns.Length, fileMetaData.NumSchemaElements);
Assert.AreEqual(ParquetVersion.PARQUET_1_0, fileMetaData.Version);
Assert.AreEqual("parquet-cpp version 1.5.1", fileMetaData.WriterVersion.ToString());
Assert.AreEqual("parquet-cpp-arrow version 4.0.0", fileMetaData.WriterVersion.ToString());

using var rowGroupReader = fileReader.RowGroup(0);
var rowGroupMetaData = rowGroupReader.MetaData;
Expand Down
2 changes: 1 addition & 1 deletion csharp.test/TestWriterProperties.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public static void TestDefaultProperties()
{
var p = WriterProperties.GetDefaultWriterProperties();

Assert.AreEqual("parquet-cpp version 1.5.1-SNAPSHOT", p.CreatedBy);
Assert.AreEqual("parquet-cpp-arrow version 4.0.0", p.CreatedBy);
Assert.AreEqual(Compression.Uncompressed, p.Compression(new ColumnPath("anypath")));
Assert.AreEqual(int.MinValue, p.CompressionLevel(new ColumnPath("anypath")));
Assert.AreEqual(1024*1024, p.DataPageSize);
Expand Down
Loading