Skip to content

Commit 0389645

Browse files
authored
GH-47905: [C++][Parquet] MakeColumnStats should use user-provided memory pool (#47894)
### Rationale for this change Looks like we forgot to pass the user-provided memory pool in `ColumnChunkMetaData::statistics` ### What changes are included in this PR? Pass the memory pool from ReaderProperties ### Are these changes tested? By existing tests. ### Are there any user-facing changes? No. * GitHub Issue: #47905 Authored-by: Smith Cruise <[email protected]> Signed-off-by: Antoine Pitrou <[email protected]>
1 parent 25b351f commit 0389645

File tree

1 file changed

+16
-13
lines changed

1 file changed

+16
-13
lines changed

cpp/src/parquet/metadata.cc

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ std::string ParquetVersionToString(ParquetVersion::type ver) {
9191

9292
template <typename DType>
9393
static std::shared_ptr<Statistics> MakeTypedColumnStats(
94-
const format::ColumnMetaData& metadata, const ColumnDescriptor* descr) {
94+
const format::ColumnMetaData& metadata, const ColumnDescriptor* descr,
95+
::arrow::MemoryPool* pool) {
9596
std::optional<bool> min_exact =
9697
metadata.statistics.__isset.is_min_value_exact
9798
? std::optional<bool>(metadata.statistics.is_min_value_exact)
@@ -108,7 +109,7 @@ static std::shared_ptr<Statistics> MakeTypedColumnStats(
108109
metadata.statistics.null_count, metadata.statistics.distinct_count,
109110
metadata.statistics.__isset.max_value && metadata.statistics.__isset.min_value,
110111
metadata.statistics.__isset.null_count,
111-
metadata.statistics.__isset.distinct_count, min_exact, max_exact);
112+
metadata.statistics.__isset.distinct_count, min_exact, max_exact, pool);
112113
}
113114
// Default behavior
114115
return MakeStatistics<DType>(
@@ -117,7 +118,7 @@ static std::shared_ptr<Statistics> MakeTypedColumnStats(
117118
metadata.statistics.null_count, metadata.statistics.distinct_count,
118119
metadata.statistics.__isset.max && metadata.statistics.__isset.min,
119120
metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count,
120-
min_exact, max_exact);
121+
min_exact, max_exact, pool);
121122
}
122123

123124
namespace {
@@ -134,7 +135,8 @@ std::shared_ptr<geospatial::GeoStatistics> MakeColumnGeometryStats(
134135
}
135136

136137
std::shared_ptr<Statistics> MakeColumnStats(const format::ColumnMetaData& meta_data,
137-
const ColumnDescriptor* descr) {
138+
const ColumnDescriptor* descr,
139+
::arrow::MemoryPool* pool) {
138140
auto metadata_type = LoadEnumSafe(&meta_data.type);
139141
if (descr->physical_type() != metadata_type) {
140142
throw ParquetException(
@@ -143,21 +145,21 @@ std::shared_ptr<Statistics> MakeColumnStats(const format::ColumnMetaData& meta_d
143145
}
144146
switch (metadata_type) {
145147
case Type::BOOLEAN:
146-
return MakeTypedColumnStats<BooleanType>(meta_data, descr);
148+
return MakeTypedColumnStats<BooleanType>(meta_data, descr, pool);
147149
case Type::INT32:
148-
return MakeTypedColumnStats<Int32Type>(meta_data, descr);
150+
return MakeTypedColumnStats<Int32Type>(meta_data, descr, pool);
149151
case Type::INT64:
150-
return MakeTypedColumnStats<Int64Type>(meta_data, descr);
152+
return MakeTypedColumnStats<Int64Type>(meta_data, descr, pool);
151153
case Type::INT96:
152-
return MakeTypedColumnStats<Int96Type>(meta_data, descr);
154+
return MakeTypedColumnStats<Int96Type>(meta_data, descr, pool);
153155
case Type::DOUBLE:
154-
return MakeTypedColumnStats<DoubleType>(meta_data, descr);
156+
return MakeTypedColumnStats<DoubleType>(meta_data, descr, pool);
155157
case Type::FLOAT:
156-
return MakeTypedColumnStats<FloatType>(meta_data, descr);
158+
return MakeTypedColumnStats<FloatType>(meta_data, descr, pool);
157159
case Type::BYTE_ARRAY:
158-
return MakeTypedColumnStats<ByteArrayType>(meta_data, descr);
160+
return MakeTypedColumnStats<ByteArrayType>(meta_data, descr, pool);
159161
case Type::FIXED_LEN_BYTE_ARRAY:
160-
return MakeTypedColumnStats<FLBAType>(meta_data, descr);
162+
return MakeTypedColumnStats<FLBAType>(meta_data, descr, pool);
161163
case Type::UNDEFINED:
162164
break;
163165
}
@@ -363,7 +365,8 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
363365
if (is_stats_set()) {
364366
const std::lock_guard<std::mutex> guard(stats_mutex_);
365367
if (possible_stats_ == nullptr) {
366-
possible_stats_ = MakeColumnStats(*column_metadata_, descr_);
368+
possible_stats_ =
369+
MakeColumnStats(*column_metadata_, descr_, properties_.memory_pool());
367370
}
368371
return possible_stats_;
369372
}

0 commit comments

Comments
 (0)