diff --git a/.github/ubsan-suppressions.txt b/.github/ubsan-suppressions.txt new file mode 100644 index 0000000000..8987a43145 --- /dev/null +++ b/.github/ubsan-suppressions.txt @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Add specific undefined behavior suppressions here if needed +# Format: +# symbol:SymbolName +# src:source_file.cc \ No newline at end of file diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index a74ec3716a..5c3ec960f2 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -46,6 +46,7 @@ jobs: os: - debian11 - debian12 + - debian13 - ubuntu24 - oraclelinux8 - oraclelinux9 @@ -68,7 +69,6 @@ jobs: - ubuntu-22.04 - ubuntu-24.04 - ubuntu-24.04-arm - - macos-13 - macos-14 - macos-15 java: @@ -81,7 +81,9 @@ jobs: java: 17 cxx: g++ - os: ubuntu-latest - java: 25-ea + java: 25 + - os: macos-26 + java: 21 env: MAVEN_OPTS: -Xmx2g MAVEN_SKIP_RC: true @@ -97,7 +99,7 @@ jobs: - name: "Test" run: | mkdir -p ~/.m2 - if [ "${{ matrix.java }}" = "25-ea" ]; then + if [ "${{ matrix.java }}" = "25" ]; then cd java # JDK 25 Build ./mvnw package -DskipTests @@ -173,7 +175,7 @@ jobs: mkdir build && cd build cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DBUILD_JAVA=OFF cmake --build . - - uses: cpp-linter/cpp-linter-action@v2.13.3 + - uses: cpp-linter/cpp-linter-action@f91c446a32ae3eb9f98fef8c9ed4c7cb613a4f8a id: linter continue-on-error: true env: @@ -210,7 +212,7 @@ jobs: strategy: fail-fast: false matrix: - version: [13, 14, 15] + version: [14, 15] runs-on: macos-${{ matrix.version }} steps: - name: Checkout repository @@ -237,7 +239,6 @@ jobs: - ubuntu-22.04 - ubuntu-24.04 - ubuntu-24.04-arm - - macos-13 - macos-14 - macos-15 steps: diff --git a/.github/workflows/asan_test.yml b/.github/workflows/sanitizer_test.yml similarity index 76% rename from .github/workflows/asan_test.yml rename to .github/workflows/sanitizer_test.yml index a02522f9ef..6eee91a55a 100644 --- a/.github/workflows/asan_test.yml +++ b/.github/workflows/sanitizer_test.yml @@ -30,8 +30,8 @@ concurrency: cancel-in-progress: true jobs: - asan-test: - name: "ASAN with ${{ matrix.compiler }} on Ubuntu" + sanitizer-test: + name: "Sanitizer with ${{ matrix.compiler }} on Ubuntu" runs-on: ubuntu-latest strategy: fail-fast: false @@ -58,12 +58,18 @@ jobs: run: | mkdir -p build && cd build cmake .. -DCMAKE_BUILD_TYPE=Debug -DENABLE_ASAN=ON -DENABLE_UBSAN=ON -DBUILD_ENABLE_AVX512=ON -DBUILD_CPP_ENABLE_METRICS=ON -DBUILD_JAVA=OFF - make + cmake --build . --verbose - name: Run Tests working-directory: build env: - ASAN_OPTIONS: detect_leaks=1:symbolize=1:strict_string_checks=1:halt_on_error=0:detect_container_overflow=0 + ASAN_OPTIONS: log_path=out.log:detect_leaks=1:symbolize=1:strict_string_checks=1:halt_on_error=1:detect_container_overflow=0 LSAN_OPTIONS: suppressions=${{ github.workspace }}/.github/lsan-suppressions.txt - UBSAN_OPTIONS: print_stacktrace=1 + UBSAN_OPTIONS: log_path=out.log:halt_on_error=1:print_stacktrace=1:suppressions=${{ github.workspace }}/.github/ubsan-suppressions.txt run: | - ctest --output-on-failure + ctest -V --output-on-failure + - name: Save the test output + if: always() + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: test-output + path: "**/out.log*" \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 72a5b2efc6..341a4c3796 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ project(ORC C CXX) # Version number of package SET(CPACK_PACKAGE_VERSION_MAJOR "2") SET(CPACK_PACKAGE_VERSION_MINOR "2") -SET(CPACK_PACKAGE_VERSION_PATCH "0") +SET(CPACK_PACKAGE_VERSION_PATCH "1") SET(ORC_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # For clang-tidy. list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules") @@ -129,9 +129,9 @@ endif() # We require a C++17 compliant compiler set(CMAKE_CXX_STANDARD_REQUIRED ON) if (NOT MSVC) - set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fno-omit-frame-pointer") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG -fno-omit-frame-pointer") - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") + set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fno-omit-frame-pointer -Wno-unknown-warning-option -Wno-stringop-overread -Wno-array-bounds") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG -fno-omit-frame-pointer -Wno-unknown-warning-option -Wno-stringop-overread -Wno-array-bounds") + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -Wno-unknown-warning-option -Wno-stringop-overread -Wno-array-bounds") endif () message(STATUS "compiler ${CMAKE_CXX_COMPILER_ID} version ${CMAKE_CXX_COMPILER_VERSION}") if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") @@ -181,15 +181,23 @@ endif() # Configure Undefined Behavior Sanitizer if enabled if (ENABLE_UBSAN) - if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function -fno-sanitize-recover=all") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function -fno-sanitize-recover=all") + if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr -fno-sanitize-recover=all") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr -fno-sanitize-recover=all") message(STATUS "Undefined Behavior Sanitizer enabled") else() message(WARNING "Undefined Behavior Sanitizer is only supported for GCC and Clang compilers") endif() endif() +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (ENABLE_ASAN OR ENABLE_UBSAN) + set(CMAKE_THREAD_LIBS_INIT "-lpthread") + set(THREADS_PREFER_PTHREAD_FLAG ON) + message(STATUS "Forcing pthread linking for GCC with sanitizers") + endif() +endif() + enable_testing() INCLUDE(GNUInstallDirs) # Put it before ThirdpartyToolchain to make CMAKE_INSTALL_LIBDIR available. diff --git a/README.md b/README.md index 2ddf0849b9..2520e213bd 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ The subdirectories are: * Install java 17 or higher * Install maven 3.9.9 or higher * Install cmake 3.12 or higher +* Install meson 1.3.0 or higher (Optional) To build a release version with debug information: diff --git a/c++/src/Compression.cc b/c++/src/Compression.cc index f373a75bff..b552324ce3 100644 --- a/c++/src/Compression.cc +++ b/c++/src/Compression.cc @@ -1156,8 +1156,13 @@ namespace orc { }; uint64_t ZSTDCompressionStream::doBlockCompression() { - return ZSTD_compressCCtx(cctx_, compressorBuffer.data(), compressorBuffer.size(), - rawInputBuffer.data(), static_cast(bufferSize), level); + auto ret = ZSTD_compressCCtx(cctx_, compressorBuffer.data(), compressorBuffer.size(), + rawInputBuffer.data(), static_cast(bufferSize), level); + if (ZSTD_isError(ret)) { + throw CompressionError(std::string("Error while calling ZSTD_compressCCtx(), error: ") + + ZSTD_getErrorName(ret)); + } + return ret; } DIAGNOSTIC_PUSH @@ -1213,8 +1218,12 @@ namespace orc { uint64_t ZSTDDecompressionStream::decompress(const char* inputPtr, uint64_t length, char* output, size_t maxOutputLength) { - return static_cast( - ZSTD_decompressDCtx(dctx_, output, maxOutputLength, inputPtr, length)); + auto ret = ZSTD_decompressDCtx(dctx_, output, maxOutputLength, inputPtr, length); + if (ZSTD_isError(ret)) { + throw CompressionError(std::string("Error while calling ZSTD_decompressDCtx(), error: ") + + ZSTD_getErrorName(ret)); + } + return static_cast(ret); } DIAGNOSTIC_PUSH diff --git a/c++/src/Geospatial.cc b/c++/src/Geospatial.cc index 6d7d268703..2b110cacb6 100644 --- a/c++/src/Geospatial.cc +++ b/c++/src/Geospatial.cc @@ -66,8 +66,8 @@ namespace orc::geospatial { #if defined(_MSC_VER) #include // IWYU pragma: keep -#define ORC_BYTE_SWAP64 _byteSwap_uint64 -#define ORC_BYTE_SWAP32 _byteSwap_ulong +#define ORC_BYTE_SWAP64 _byteswap_uint64 +#define ORC_BYTE_SWAP32 _byteswap_ulong #else #define ORC_BYTE_SWAP64 __builtin_bswap64 #define ORC_BYTE_SWAP32 __builtin_bswap32 diff --git a/c++/src/Timezone.cc b/c++/src/Timezone.cc index 384f8ea99f..bc56efa0dd 100644 --- a/c++/src/Timezone.cc +++ b/c++/src/Timezone.cc @@ -33,8 +33,24 @@ namespace orc { // default location of the timezone files static const char DEFAULT_TZDIR[] = "/usr/share/zoneinfo"; - // location of a symlink to the local timezone - static const char LOCAL_TIMEZONE[] = "/etc/localtime"; + // location of a symlink to the local timezone is /etc/localtime + static const char LOCAL_TIMEZONE_DIR[] = "/etc"; + static const char LOCAL_TIMEZONE[] = "localtime"; + + // US aliases from https://data.iana.org/time-zones/tzdb/backward + static const std::map TZ_ALIASES = { + {"US/Alaska", "America/Anchorage"}, + {"US/Aleutian", "America/Adak"}, + {"US/Arizona", "America/Phoenix"}, + {"US/Central", "America/Chicago"}, + {"US/East-Indiana", "America/Indiana/Indianapolis"}, + {"US/Eastern", "America/New_York"}, + {"US/Hawaii", "Pacific/Honolulu"}, + {"US/Indiana-Starke", "America/Indiana/Knox"}, + {"US/Michigan", "America/Detroit"}, + {"US/Mountain", "America/Denver"}, + {"US/Pacific", "America/Los_Angeles"}, + {"US/Samoa", "Pacific/Pago_Pago"}}; enum TransitionKind { TRANSITION_JULIAN, TRANSITION_DAY, TRANSITION_MONTH }; @@ -734,14 +750,26 @@ namespace orc { * Get a timezone by absolute filename. * Results are cached. */ - const Timezone& getTimezoneByFilename(const std::string& filename) { + const Timezone& getTimezoneByFilename(const std::string& dir, const std::string& zone) { + std::string filename(dir); + filename += "/"; + filename += zone; // ORC-110 std::lock_guard timezone_lock(timezone_mutex); std::map >::iterator itr = timezoneCache.find(filename); if (itr != timezoneCache.end()) { return *(itr->second).get(); } - timezoneCache[filename] = std::make_shared(filename); + auto it = TZ_ALIASES.find(zone); + if (it == TZ_ALIASES.end()) { + timezoneCache[filename] = std::make_shared(filename); + } else { + std::string newfilename(dir); + newfilename += "/"; + newfilename += it->second; + timezoneCache[newfilename] = std::make_shared(newfilename); + timezoneCache[filename] = timezoneCache[newfilename]; + } return *timezoneCache[filename].get(); } @@ -752,7 +780,7 @@ namespace orc { #ifdef _MSC_VER return getTimezoneByName("UTC"); #else - return getTimezoneByFilename(LOCAL_TIMEZONE); + return getTimezoneByFilename(LOCAL_TIMEZONE_DIR, LOCAL_TIMEZONE); #endif } @@ -761,10 +789,7 @@ namespace orc { * Results are cached. */ const Timezone& getTimezoneByName(const std::string& zone) { - std::string filename(getTimezoneDirectory()); - filename += "/"; - filename += zone; - return getTimezoneByFilename(filename); + return getTimezoneByFilename(getTimezoneDirectory(), zone); } /** diff --git a/c++/src/io/InputStream.cc b/c++/src/io/InputStream.cc index 06ef40bd4c..5e1dc00ccd 100644 --- a/c++/src/io/InputStream.cc +++ b/c++/src/io/InputStream.cc @@ -112,8 +112,8 @@ namespace orc { return false; } - google::protobuf::int64 SeekableArrayInputStream::ByteCount() const { - return static_cast(position_); + int64_t SeekableArrayInputStream::ByteCount() const { + return static_cast(position_); } void SeekableArrayInputStream::seek(PositionProvider& seekPosition) { diff --git a/c++/src/io/InputStream.hh b/c++/src/io/InputStream.hh index 07aa623b5f..8b251c9301 100644 --- a/c++/src/io/InputStream.hh +++ b/c++/src/io/InputStream.hh @@ -72,7 +72,7 @@ namespace orc { virtual bool Next(const void** data, int* size) override; virtual void BackUp(int count) override; virtual bool Skip(int count) override; - virtual google::protobuf::int64 ByteCount() const override; + virtual int64_t ByteCount() const override; virtual void seek(PositionProvider& position) override; virtual std::string getName() const override; }; diff --git a/c++/src/io/OutputStream.cc b/c++/src/io/OutputStream.cc index fbf1ca61dd..a55050d122 100644 --- a/c++/src/io/OutputStream.cc +++ b/c++/src/io/OutputStream.cc @@ -65,8 +65,8 @@ namespace orc { // PASS } - google::protobuf::int64 BufferedOutputStream::ByteCount() const { - return static_cast(dataBuffer_->size()); + int64_t BufferedOutputStream::ByteCount() const { + return static_cast(dataBuffer_->size()); } bool BufferedOutputStream::WriteAliasedRaw(const void*, int) { diff --git a/c++/src/io/OutputStream.hh b/c++/src/io/OutputStream.hh index 6319de96d6..b029818125 100644 --- a/c++/src/io/OutputStream.hh +++ b/c++/src/io/OutputStream.hh @@ -61,7 +61,7 @@ namespace orc { virtual bool Next(void** data, int* size) override; virtual void BackUp(int count) override; - virtual google::protobuf::int64 ByteCount() const override; + virtual int64_t ByteCount() const override; virtual bool WriteAliasedRaw(const void* data, int size) override; virtual bool AllowsAliasing() const override; diff --git a/c++/test/TestCompression.cc b/c++/test/TestCompression.cc index e95a6f0169..6a5c4a8565 100644 --- a/c++/test/TestCompression.cc +++ b/c++/test/TestCompression.cc @@ -60,6 +60,7 @@ namespace orc { ++pos; } } + EXPECT_EQ(size, pos); } void compressAndVerify(CompressionKind kind, OutputStream* outStream, @@ -369,4 +370,31 @@ namespace orc { testSeekDecompressionStream(CompressionKind_LZ4); testSeekDecompressionStream(CompressionKind_SNAPPY); } + + TEST(Compression, ZstdDecompressStreamCorrupted) { + MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE); + MemoryPool* pool = getDefaultPool(); + CompressionKind kind = CompressionKind_ZSTD; + + uint64_t capacity = 1024; + uint64_t block = 128; + + char testData[] = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; + // generate valid compressed data from testData + compressAndVerify(kind, &memStream, CompressionStrategy_SPEED, capacity, block, *pool, testData, + sizeof(testData)); + + // Corrupt the compressed data by flipping the 2nd byte counting from the end + std::string corruptedData(memStream.getData(), memStream.getLength()); + size_t corruptedPos = corruptedData.size() - 2; + corruptedData.at(corruptedPos) ^= 0x1; + + // create a new memStream with the corrupted data + MemoryOutputStream memStream2(DEFAULT_MEM_STREAM_SIZE); + memStream2.write(corruptedData.data(), corruptedData.size()); + + // The corruption shall be detected correctly. + EXPECT_THROW(decompressAndVerify(memStream2, kind, testData, sizeof(testData), *pool, capacity), + CompressionError); + } } // namespace orc diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake index c77d3f1f51..851cdc4c22 100644 --- a/cmake_modules/ThirdpartyToolchain.cmake +++ b/cmake_modules/ThirdpartyToolchain.cmake @@ -19,11 +19,11 @@ set(ORC_VENDOR_DEPENDENCIES) set(ORC_SYSTEM_DEPENDENCIES) set(ORC_INSTALL_INTERFACE_TARGETS) -set(ORC_FORMAT_VERSION "1.1.0") +set(ORC_FORMAT_VERSION "1.1.1") set(LZ4_VERSION "1.10.0") set(SNAPPY_VERSION "1.2.2") set(ZLIB_VERSION "1.3.1") -set(GTEST_VERSION "1.12.1") +set(GTEST_VERSION "1.17.0") set(PROTOBUF_VERSION "3.5.1") set(ZSTD_VERSION "1.5.7") set(SPARSEHASH_VERSION "2.11.1") @@ -146,7 +146,7 @@ else() endif() ExternalProject_Add (orc-format_ep URL ${ORC_FORMAT_SOURCE_URL} - URL_HASH SHA256=d4a7ac76c5442abf7119e2cb84e71b677e075aff53518aa866055e2ead0450d7 + URL_HASH SHA256=584dfe2a4202946178fd8fc7d1239be7805b9ed4596ab2042dee739e7880992b CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" @@ -441,7 +441,7 @@ if (BUILD_CPP_TESTS) set(GTEST_INCLUDE_DIR "${GTEST_PREFIX}/include") set(GMOCK_STATIC_LIB "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gmock${CMAKE_STATIC_LIBRARY_SUFFIX}") set(GTEST_STATIC_LIB "${GTEST_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}gtest${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(GTEST_SRC_URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz") + set(GTEST_SRC_URL "https://github.com/google/googletest/archive/refs/tags/v${GTEST_VERSION}.tar.gz") if(APPLE) set(GTEST_CMAKE_CXX_FLAGS " -DGTEST_USE_OWN_TR1_TUPLE=1 -Wno-unused-value -Wno-ignored-attributes") else() diff --git a/docker/README.md b/docker/README.md index 2247cea966..f475022d0a 100644 --- a/docker/README.md +++ b/docker/README.md @@ -2,7 +2,7 @@ ## Supported OSes -* Debian 11 and 12 +* Debian 11, 12, and 13 * Fedora 37 * Ubuntu 22 and 24 * Oracle Linux 9 diff --git a/docker/debian13/Dockerfile b/docker/debian13/Dockerfile new file mode 100644 index 0000000000..859f9b1d52 --- /dev/null +++ b/docker/debian13/Dockerfile @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ORC compile for Debian 13 +# + +FROM debian:13.1 +LABEL org.opencontainers.image.authors="Apache ORC project " +LABEL org.opencontainers.image.licenses="Apache-2.0" +LABEL org.opencontainers.image.ref.name="Apache ORC on Debian 13" +LABEL org.opencontainers.image.version="" +ARG jdk=21 + +RUN apt-get update +RUN apt-get install -y \ + cmake \ + gcc \ + g++ \ + git \ + libsasl2-dev \ + libssl-dev \ + make \ + curl \ + maven \ + openjdk-${jdk}-jdk + +WORKDIR /root + +VOLUME /root/.m2/repository + +CMD if [ ! -d orc ]; then \ + echo "No volume provided, building from apache main."; \ + echo "Pass '-v`pwd`:/root/orc' to docker run to build local source."; \ + git clone https://github.com/apache/orc.git -b main; \ + fi && \ + mkdir build && \ + cd build && \ + cmake ../orc && \ + make package test-out diff --git a/docker/os-list.txt b/docker/os-list.txt index e138aaf493..65ff27719e 100644 --- a/docker/os-list.txt +++ b/docker/os-list.txt @@ -1,5 +1,6 @@ debian11 debian12 +debian13 ubuntu22 ubuntu24 oraclelinux8 diff --git a/java/bench/core/pom.xml b/java/bench/core/pom.xml index c85805114e..fe132d3b28 100644 --- a/java/bench/core/pom.xml +++ b/java/bench/core/pom.xml @@ -17,7 +17,7 @@ org.apache.orc orc-benchmarks - 2.2.0 + 2.2.1 .. diff --git a/java/bench/hive/pom.xml b/java/bench/hive/pom.xml index 64f7d74cc9..994f698843 100644 --- a/java/bench/hive/pom.xml +++ b/java/bench/hive/pom.xml @@ -17,7 +17,7 @@ org.apache.orc orc-benchmarks - 2.2.0 + 2.2.1 .. diff --git a/java/bench/pom.xml b/java/bench/pom.xml index 7d5f6f9478..12f1ee159c 100644 --- a/java/bench/pom.xml +++ b/java/bench/pom.xml @@ -17,7 +17,7 @@ org.apache.orc orc - 2.2.0 + 2.2.1 ../pom.xml @@ -34,14 +34,14 @@ 1.12.0 - 4.0.1 + 4.1.0 1.37 - 5.12.2 + 5.13.4 ${project.version} - 1.15.2 + 1.16.0 2.13 2.13.16 - 4.0.0 + 4.0.1 @@ -73,7 +73,7 @@ com.google.code.gson gson - 2.13.0 + 2.13.2 com.google.guava @@ -82,7 +82,7 @@ commons-cli commons-cli - 1.9.0 + ${commons-cli.version} io.airlift @@ -108,7 +108,7 @@ org.apache.commons commons-csv - 1.14.0 + 1.14.1 org.apache.hadoop diff --git a/java/bench/spark/pom.xml b/java/bench/spark/pom.xml index 9807b310bb..4a119b093d 100644 --- a/java/bench/spark/pom.xml +++ b/java/bench/spark/pom.xml @@ -17,7 +17,7 @@ org.apache.orc orc-benchmarks - 2.2.0 + 2.2.1 .. diff --git a/java/core/pom.xml b/java/core/pom.xml index e293805685..456874b5b6 100644 --- a/java/core/pom.xml +++ b/java/core/pom.xml @@ -17,7 +17,7 @@ org.apache.orc orc - 2.2.0 + 2.2.1 ../pom.xml diff --git a/java/core/src/java/org/apache/orc/DateColumnStatistics.java b/java/core/src/java/org/apache/orc/DateColumnStatistics.java index 564037645b..f6d22af28b 100644 --- a/java/core/src/java/org/apache/orc/DateColumnStatistics.java +++ b/java/core/src/java/org/apache/orc/DateColumnStatistics.java @@ -54,6 +54,7 @@ public interface DateColumnStatistics extends ColumnStatistics { * @return minimum value * @deprecated Use #getMinimumLocalDate instead */ + @Deprecated Date getMinimum(); /** @@ -61,5 +62,6 @@ public interface DateColumnStatistics extends ColumnStatistics { * @return maximum value * @deprecated Use #getMaximumLocalDate instead */ + @Deprecated Date getMaximum(); } diff --git a/java/core/src/java/org/apache/orc/FileMetadata.java b/java/core/src/java/org/apache/orc/FileMetadata.java index f8b054fb03..9a927b94e1 100644 --- a/java/core/src/java/org/apache/orc/FileMetadata.java +++ b/java/core/src/java/org/apache/orc/FileMetadata.java @@ -26,6 +26,7 @@ * ORC stop depending on them too. Luckily, they shouldn't be very big. * @deprecated Use {@link org.apache.orc.impl.OrcTail} instead */ +@Deprecated public interface FileMetadata { boolean isOriginalFormat(); diff --git a/java/core/src/java/org/apache/orc/MemoryManager.java b/java/core/src/java/org/apache/orc/MemoryManager.java index 40eb19cbcf..fa478b477a 100644 --- a/java/core/src/java/org/apache/orc/MemoryManager.java +++ b/java/core/src/java/org/apache/orc/MemoryManager.java @@ -65,6 +65,7 @@ void addWriter(Path path, long requestedAllocation, * @throws IOException * @deprecated Use {@link MemoryManager#checkMemory} instead */ + @Deprecated void addedRow(int rows) throws IOException; /** diff --git a/java/core/src/java/org/apache/orc/OrcFile.java b/java/core/src/java/org/apache/orc/OrcFile.java index 160aaf1f9e..2daba452c6 100644 --- a/java/core/src/java/org/apache/orc/OrcFile.java +++ b/java/core/src/java/org/apache/orc/OrcFile.java @@ -357,6 +357,7 @@ public KeyProvider getKeyProvider() { /** * @deprecated Use {@link #orcTail(OrcTail)} instead. */ + @Deprecated public ReaderOptions fileMetadata(final FileMetadata metadata) { fileMetadata = metadata; return this; diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java index aac9bd77f0..4c7218f502 100644 --- a/java/core/src/java/org/apache/orc/Reader.java +++ b/java/core/src/java/org/apache/orc/Reader.java @@ -145,6 +145,7 @@ public interface Reader extends Closeable { * @deprecated use getSchema instead * @since 1.1.0 */ + @Deprecated List getTypes(); /** @@ -756,6 +757,7 @@ public Options rowBatchSize(int value) { * @deprecated Use {@link #getStripeStatistics()} instead. * @since 1.1.0 */ + @Deprecated List getOrcProtoStripeStatistics(); /** @@ -779,6 +781,7 @@ public Options rowBatchSize(int value) { * @deprecated Use {@link #getStatistics()} instead. * @since 1.1.0 */ + @Deprecated List getOrcProtoFileStatistics(); /** diff --git a/java/core/src/java/org/apache/orc/Writer.java b/java/core/src/java/org/apache/orc/Writer.java index b63c9c86d9..7e06212394 100644 --- a/java/core/src/java/org/apache/orc/Writer.java +++ b/java/core/src/java/org/apache/orc/Writer.java @@ -139,6 +139,7 @@ void appendStripe(byte[] stripe, int offset, int length, * @deprecated use {@link #addUserMetadata(String, ByteBuffer)} instead * @since 1.1.0 */ + @Deprecated void appendUserMetadata(List userMetadata); /** diff --git a/java/core/src/java/org/apache/orc/impl/MemoryManagerImpl.java b/java/core/src/java/org/apache/orc/impl/MemoryManagerImpl.java index 0a1f8efae5..cb06e7fd95 100644 --- a/java/core/src/java/org/apache/orc/impl/MemoryManagerImpl.java +++ b/java/core/src/java/org/apache/orc/impl/MemoryManagerImpl.java @@ -134,6 +134,7 @@ public void addedRow(int rows) throws IOException { * Obsolete method left for Hive, which extends this class. * @deprecated remove this method */ + @Deprecated public void notifyWriters() throws IOException { // PASS } diff --git a/java/core/src/java/org/apache/orc/impl/OrcTail.java b/java/core/src/java/org/apache/orc/impl/OrcTail.java index 9297f953c4..be0d8ffd3b 100644 --- a/java/core/src/java/org/apache/orc/impl/OrcTail.java +++ b/java/core/src/java/org/apache/orc/impl/OrcTail.java @@ -207,6 +207,7 @@ public OrcProto.FileTail getMinimalFileTail() { * @return the stripe statistics * @deprecated the user should use Reader.getStripeStatistics instead. */ + @Deprecated public List getStripeStatistics() throws IOException { if (reader == null) { LOG.warn("Please use Reader.getStripeStatistics or give `Reader` to OrcTail constructor."); diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java index 9e018157f6..f1c6b146bd 100644 --- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java +++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java @@ -480,6 +480,7 @@ protected static void ensureOrcFooter(FSDataInputStream in, * @param buffer the tail of the file * @deprecated Use {@link ReaderImpl#ensureOrcFooter(FSDataInputStream, Path, int, ByteBuffer)} instead. */ + @Deprecated protected static void ensureOrcFooter(ByteBuffer buffer, int psLen) throws IOException { int magicLength = OrcFile.MAGIC.length(); int fullLength = magicLength + 1; @@ -717,6 +718,7 @@ private static void read(FSDataInputStream file, * @deprecated Use {@link ReaderImpl#extractFileTail(FileSystem, Path, long)} instead. * This is for backward compatibility. */ + @Deprecated public static OrcTail extractFileTail(ByteBuffer buffer) throws IOException { return extractFileTail(buffer, -1,-1); @@ -738,6 +740,7 @@ public static int getCompressionBlockSize(OrcProto.PostScript postScript) { * @deprecated Use {@link ReaderImpl#extractFileTail(FileSystem, Path, long)} instead. * This is for backward compatibility. */ + @Deprecated public static OrcTail extractFileTail(ByteBuffer buffer, long fileLen, long modificationTime) throws IOException { OrcProto.PostScript ps; diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java index 5bd9809253..b2120db86c 100644 --- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java +++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java @@ -1119,6 +1119,7 @@ public static class SargApplier { /** * @deprecated Use the constructor having full parameters. This exists for backward compatibility. */ + @Deprecated public SargApplier(SearchArgument sarg, long rowIndexStride, SchemaEvolution evolution, diff --git a/java/core/src/test/org/apache/orc/impl/TestSerializationUtils.java b/java/core/src/test/org/apache/orc/impl/TestSerializationUtils.java index 206306513a..cda818893d 100644 --- a/java/core/src/test/org/apache/orc/impl/TestSerializationUtils.java +++ b/java/core/src/test/org/apache/orc/impl/TestSerializationUtils.java @@ -135,21 +135,21 @@ public void testSubtractionOverflowGuava() { LongMath.checkedSubtract(22222222222L, Long.MIN_VALUE); fail("expected ArithmeticException for overflow"); } catch (ArithmeticException ex) { - assertEquals("overflow: checkedSubtract(22222222222, -9223372036854775808)", ex.getMessage()); + assertEquals("long overflow", ex.getMessage()); } try { LongMath.checkedSubtract(-22222222222L, Long.MAX_VALUE); fail("expected ArithmeticException for overflow"); } catch (ArithmeticException ex) { - assertEquals("overflow: checkedSubtract(-22222222222, 9223372036854775807)", ex.getMessage()); + assertEquals("long overflow", ex.getMessage()); } try { LongMath.checkedSubtract(Long.MIN_VALUE, Long.MAX_VALUE); fail("expected ArithmeticException for overflow"); } catch (ArithmeticException ex) { - assertEquals("overflow: checkedSubtract(-9223372036854775808, 9223372036854775807)", ex.getMessage()); + assertEquals("long overflow", ex.getMessage()); } assertEquals(-8106206116692740190L, diff --git a/java/examples/pom.xml b/java/examples/pom.xml index ee37c28bdb..5856d7a541 100644 --- a/java/examples/pom.xml +++ b/java/examples/pom.xml @@ -17,7 +17,7 @@ org.apache.orc orc - 2.2.0 + 2.2.1 ../pom.xml diff --git a/java/mapreduce/pom.xml b/java/mapreduce/pom.xml index df30923633..9e674e4626 100644 --- a/java/mapreduce/pom.xml +++ b/java/mapreduce/pom.xml @@ -17,7 +17,7 @@ org.apache.orc orc - 2.2.0 + 2.2.1 ../pom.xml diff --git a/java/pom.xml b/java/pom.xml index 69a7ceeeb2..6112649444 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -21,7 +21,7 @@ org.apache.orc orc - 2.2.0 + 2.2.1 pom Apache ORC @@ -62,24 +62,26 @@ 1.80 1.18.0 - 10.26.1 + 11.0.1 + 1.10.0 ${project.basedir}/../../examples - 3.4.1 + 3.4.2 17 ${project.basedir}/../target/javadoc 1.20.0 - 5.13.1 + 5.13.4 3.7.1 3.8.1 + 3.6.1 3.6.0 17 false 3.9.11 5.10.0 - 1.1.0 + 1.1.1 - 2025-07-26T22:16:01Z + 2025-09-27T18:58:03Z 3.25.8 2.0.17 2.8.1 @@ -99,7 +101,7 @@ org.apache.orc orc-shims - 2.2.0 + 2.2.1 org.apache.hadoop @@ -114,17 +116,17 @@ org.apache.orc orc-core - 2.2.0 + 2.2.1 org.apache.orc orc-mapreduce - 2.2.0 + 2.2.1 org.apache.orc orc-tools - 2.2.0 + 2.2.1 @@ -136,7 +138,7 @@ com.google.code.gson gson - 2.13.0 + 2.13.2 com.google.protobuf @@ -146,7 +148,7 @@ commons-cli commons-cli - 1.9.0 + ${commons-cli.version} org.apache.commons @@ -219,7 +221,7 @@ com.google.guava guava - 33.4.0-jre + 33.5.0-jre test @@ -607,7 +609,7 @@ org.apache.maven.plugins maven-enforcer-plugin - 3.6.0 + ${maven-enforcer-plugin.version} org.codehaus.mojo @@ -716,7 +718,7 @@ false -Xmx2048m -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED --enable-native-access=ALL-UNNAMED -XX:+EnableDynamicAgentLoading - US/Pacific + America/Los_Angeles en_US.UTF-8 false diff --git a/java/shims/pom.xml b/java/shims/pom.xml index 811525e215..f5fa7562bd 100644 --- a/java/shims/pom.xml +++ b/java/shims/pom.xml @@ -17,7 +17,7 @@ org.apache.orc orc - 2.2.0 + 2.2.1 ../pom.xml diff --git a/java/tools/pom.xml b/java/tools/pom.xml index b0aac7dfa0..b60cf07b0d 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -17,7 +17,7 @@ org.apache.orc orc - 2.2.0 + 2.2.1 ../pom.xml diff --git a/java/tools/src/test/resources/orc-file-dump-column-type.out b/java/tools/src/test/resources/orc-file-dump-column-type.out index 73267e7287..f5784d91ce 100644 --- a/java/tools/src/test/resources/orc-file-dump-column-type.out +++ b/java/tools/src/test/resources/orc-file-dump-column-type.out @@ -58,7 +58,7 @@ File Statistics: Column 14: count: 2048 hasNull: false bytesOnDisk: 1277 min: 10000 max: 9999 sum: 10240 Stripes: - Stripe: offset: 3 data: 15540 rows: 2048 tail: 225 index: 464 + Stripe: offset: 3 data: 15540 rows: 2048 tail: 238 index: 464 Stream: column 0 section ROW_INDEX start: 3 length 12 Stream: column 1 section ROW_INDEX start: 15 length 24 Stream: column 2 section ROW_INDEX start: 39 length 28 diff --git a/meson.build b/meson.build index db23c300aa..a88c147537 100644 --- a/meson.build +++ b/meson.build @@ -18,7 +18,7 @@ project( 'orc', 'cpp', - version: '2.2.0-SNAPSHOT', + version: '2.2.1', license: 'Apache-2.0', meson_version: '>=1.3.0', default_options: [ diff --git a/site/_docs/building.md b/site/_docs/building.md index d10be485c2..967cde2e69 100644 --- a/site/_docs/building.md +++ b/site/_docs/building.md @@ -9,8 +9,8 @@ dockerUrl: https://github.com/apache/orc/blob/main/docker The C++ library is supported on the following operating systems: -* MacOS 13 to 15 -* Debian 11 to 12 +* MacOS 14 to 26 +* Debian 11 to 13 * Ubuntu 22.04 to 24.04 * Oracle Linux 8 to 9 * Amazon Linux 2023 @@ -28,6 +28,7 @@ is in the docker subdirectory, for the list of packages required to build ORC: * [Debian 11]({{ page.dockerUrl }}/debian11/Dockerfile) * [Debian 12]({{ page.dockerUrl }}/debian12/Dockerfile) +* [Debian 13]({{ page.dockerUrl }}/debian13/Dockerfile) * [Ubuntu 22]({{ page.dockerUrl }}/ubuntu22/Dockerfile) * [Ubuntu 24]({{ page.dockerUrl }}/ubuntu24/Dockerfile) * [Oracle Linux 8]({{ page.dockerUrl }}/oraclelinux8/Dockerfile) diff --git a/subprojects/gtest.wrap b/subprojects/gtest.wrap index a0e99a1a24..2406c2c1ce 100644 --- a/subprojects/gtest.wrap +++ b/subprojects/gtest.wrap @@ -16,14 +16,15 @@ # under the License. [wrap-file] -directory = googletest-release-1.12.1 -source_url = https://github.com/google/googletest/archive/release-1.12.1.tar.gz -source_filename = gtest-1.12.1.tar.gz -source_hash = 81964fe578e9bd7c94dfdb09c8e4d6e6759e19967e397dbea48d1c10e45d0df2 -patch_filename = gtest_1.12.1-1_patch.zip -patch_url = https://wrapdb.mesonbuild.com/v2/gtest_1.12.1-1/get_patch -patch_hash = 75143f11e174952bc768699fde3176511fe8e33b25dc6f6347d89e41648e99cf -wrapdb_version = 1.12.1-1 +directory = googletest-1.17.0 +source_url = https://github.com/google/googletest/archive/refs/tags/v1.17.0.tar.gz +source_filename = googletest-1.17.0.tar.gz +source_hash = 65fab701d9829d38cb77c14acdc431d2108bfdbf8979e40eb8ae567edf10b27c +patch_filename = gtest_1.17.0-3_patch.zip +patch_url = https://wrapdb.mesonbuild.com/v2/gtest_1.17.0-3/get_patch +patch_hash = 3e2799683f27c6dce138b7bae823416581c467ddde755c9a516c0863225f0ceb +source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/gtest_1.17.0-3/googletest-1.17.0.tar.gz +wrapdb_version = 1.17.0-3 [provide] gtest = gtest_dep diff --git a/tools/src/FileMetadata.cc b/tools/src/FileMetadata.cc index 94b4a678d5..afc46edfe0 100644 --- a/tools/src/FileMetadata.cc +++ b/tools/src/FileMetadata.cc @@ -28,6 +28,7 @@ // #include "Adaptor.hh" #include "wrap/orc-proto-wrapper.hh" +#include void printStripeInformation(std::ostream& out, uint64_t index, uint64_t columns, std::unique_ptr stripe, bool verbose) { @@ -82,7 +83,10 @@ void printRawTail(std::ostream& out, const char* filename) { if (!tail.ParseFromString(reader->getSerializedFileTail())) { throw orc::ParseError("Failed to parse the file tail from string"); } - out << tail.DebugString(); + google::protobuf::TextFormat::Printer printer; + std::string text_output; + printer.PrintToString(tail, &text_output); + out << text_output; } void printAttributes(std::ostream& out, const orc::Type& type, const std::string& name,