Skip to content

Commit a9644bf

Browse files
authored
Small cleanup for the model IO. (#11602)
- Remove binary-IO related code. - Split up the approx tree SHAP implementation.
1 parent ab0c714 commit a9644bf

File tree

14 files changed

+135
-262
lines changed

14 files changed

+135
-262
lines changed

R-package/src/Makevars.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ OBJECTS= \
8585
$(PKGROOT)/src/data/iterative_dmatrix.o \
8686
$(PKGROOT)/src/predictor/predictor.o \
8787
$(PKGROOT)/src/predictor/cpu_predictor.o \
88-
$(PKGROOT)/src/predictor/cpu_treeshap.o \
88+
$(PKGROOT)/src/predictor/treeshap.o \
8989
$(PKGROOT)/src/tree/constraints.o \
9090
$(PKGROOT)/src/tree/param.o \
9191
$(PKGROOT)/src/tree/fit_stump.o \

R-package/src/Makevars.win.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ OBJECTS= \
8484
$(PKGROOT)/src/data/iterative_dmatrix.o \
8585
$(PKGROOT)/src/predictor/predictor.o \
8686
$(PKGROOT)/src/predictor/cpu_predictor.o \
87-
$(PKGROOT)/src/predictor/cpu_treeshap.o \
87+
$(PKGROOT)/src/predictor/treeshap.o \
8888
$(PKGROOT)/src/tree/constraints.o \
8989
$(PKGROOT)/src/tree/param.o \
9090
$(PKGROOT)/src/tree/fit_stump.o \

include/xgboost/linalg.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515

1616
#include <algorithm>
1717
#include <cassert>
18-
#include <cinttypes> // for int32_t
19-
#include <cstddef> // for size_t
18+
#include <cstddef> // for size_t
19+
#include <cstdint> // for int32_t
2020
#include <limits>
2121
#include <string>
2222
#include <tuple> // for make_tuple

include/xgboost/tree_model.h

Lines changed: 16 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
/**
22
* Copyright 2014-2025, XGBoost Contributors
3-
* \file tree_model.h
3+
*
44
* \brief model structure for tree
55
* \author Tianqi Chen
66
*/
77
#ifndef XGBOOST_TREE_MODEL_H_
88
#define XGBOOST_TREE_MODEL_H_
99

10-
#include <dmlc/io.h>
11-
#include <dmlc/parameter.h>
1210
#include <xgboost/base.h>
1311
#include <xgboost/data.h>
1412
#include <xgboost/feature_map.h>
@@ -28,67 +26,24 @@
2826
namespace xgboost {
2927
class Json;
3028

31-
// FIXME(trivialfis): Once binary IO is gone, make this parameter internal as it should
32-
// not be configured by users.
33-
/*! \brief meta parameters of the tree */
34-
struct TreeParam : public dmlc::Parameter<TreeParam> {
35-
/*! \brief (Deprecated) number of start root */
36-
int deprecated_num_roots{1};
37-
/*! \brief total number of nodes */
38-
int num_nodes{1};
39-
/*!\brief number of deleted nodes */
40-
int num_deleted{0};
41-
/*! \brief maximum depth, this is a statistics of the tree */
42-
int deprecated_max_depth{0};
43-
/*! \brief number of features used for tree construction */
29+
/** @brief meta parameters of the tree */
30+
struct TreeParam {
31+
/** @brief The number of nodes */
32+
bst_node_t num_nodes{1};
33+
/** @brief The number of deleted nodes */
34+
bst_node_t num_deleted{0};
35+
/** @brief The number of features used for tree construction */
4436
bst_feature_t num_feature{0};
45-
/*!
46-
* \brief leaf vector size, used for vector tree
47-
* used to store more than one dimensional information in tree
48-
*/
37+
/** @brief leaf vector size. Used by the vector leaf. */
4938
bst_target_t size_leaf_vector{1};
50-
/*! \brief reserved part, make sure alignment works for 64bit */
51-
int reserved[31];
52-
/*! \brief constructor */
53-
TreeParam() {
54-
// assert compact alignment
55-
static_assert(sizeof(TreeParam) == (31 + 6) * sizeof(int), "TreeParam: 64 bit align");
56-
std::memset(reserved, 0, sizeof(reserved));
57-
}
58-
59-
// Swap byte order for all fields. Useful for transporting models between machines with different
60-
// endianness (big endian vs little endian)
61-
[[nodiscard]] TreeParam ByteSwap() const {
62-
TreeParam x = *this;
63-
dmlc::ByteSwap(&x.deprecated_num_roots, sizeof(x.deprecated_num_roots), 1);
64-
dmlc::ByteSwap(&x.num_nodes, sizeof(x.num_nodes), 1);
65-
dmlc::ByteSwap(&x.num_deleted, sizeof(x.num_deleted), 1);
66-
dmlc::ByteSwap(&x.deprecated_max_depth, sizeof(x.deprecated_max_depth), 1);
67-
dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1);
68-
dmlc::ByteSwap(&x.size_leaf_vector, sizeof(x.size_leaf_vector), 1);
69-
dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
70-
return x;
71-
}
72-
73-
// declare the parameters
74-
DMLC_DECLARE_PARAMETER(TreeParam) {
75-
// only declare the parameters that can be set by the user.
76-
// other arguments are set by the algorithm.
77-
DMLC_DECLARE_FIELD(num_nodes).set_lower_bound(1).set_default(1);
78-
DMLC_DECLARE_FIELD(num_feature)
79-
.set_default(0)
80-
.describe("Number of features used in tree construction.");
81-
DMLC_DECLARE_FIELD(num_deleted).set_default(0);
82-
DMLC_DECLARE_FIELD(size_leaf_vector)
83-
.set_lower_bound(0)
84-
.set_default(1)
85-
.describe("Size of leaf vector, reserved for vector tree");
86-
}
8739

8840
bool operator==(const TreeParam& b) const {
8941
return num_nodes == b.num_nodes && num_deleted == b.num_deleted &&
9042
num_feature == b.num_feature && size_leaf_vector == b.size_leaf_vector;
9143
}
44+
45+
void FromJson(Json const& in);
46+
void ToJson(Json* p_out) const;
9247
};
9348

9449
/*! \brief node statistics used in regression tree */
@@ -109,16 +64,6 @@ struct RTreeNodeStat {
10964
return loss_chg == b.loss_chg && sum_hess == b.sum_hess &&
11065
base_weight == b.base_weight && leaf_child_cnt == b.leaf_child_cnt;
11166
}
112-
// Swap byte order for all fields. Useful for transporting models between machines with different
113-
// endianness (big endian vs little endian)
114-
[[nodiscard]] RTreeNodeStat ByteSwap() const {
115-
RTreeNodeStat x = *this;
116-
dmlc::ByteSwap(&x.loss_chg, sizeof(x.loss_chg), 1);
117-
dmlc::ByteSwap(&x.sum_hess, sizeof(x.sum_hess), 1);
118-
dmlc::ByteSwap(&x.base_weight, sizeof(x.base_weight), 1);
119-
dmlc::ByteSwap(&x.leaf_child_cnt, sizeof(x.leaf_child_cnt), 1);
120-
return x;
121-
}
12267
};
12368

12469
/**
@@ -166,12 +111,11 @@ class RegTree : public Model {
166111
public:
167112
XGBOOST_DEVICE Node() {
168113
// assert compact alignment
169-
static_assert(sizeof(Node) == 4 * sizeof(int) + sizeof(Info),
170-
"Node: 64 bit align");
114+
static_assert(sizeof(Node) == 4 * sizeof(int) + sizeof(Info), "Node: 64 bit align");
171115
}
172-
Node(int32_t cleft, int32_t cright, int32_t parent,
173-
uint32_t split_ind, float split_cond, bool default_left) :
174-
parent_{parent}, cleft_{cleft}, cright_{cright} {
116+
Node(int32_t cleft, int32_t cright, int32_t parent, uint32_t split_ind, float split_cond,
117+
bool default_left)
118+
: parent_{parent}, cleft_{cleft}, cright_{cright} {
175119
this->SetParent(parent_);
176120
this->SetSplit(split_ind, split_cond, default_left);
177121
}
@@ -261,16 +205,6 @@ class RegTree : public Model {
261205
info_.leaf_value == b.info_.leaf_value;
262206
}
263207

264-
[[nodiscard]] Node ByteSwap() const {
265-
Node x = *this;
266-
dmlc::ByteSwap(&x.parent_, sizeof(x.parent_), 1);
267-
dmlc::ByteSwap(&x.cleft_, sizeof(x.cleft_), 1);
268-
dmlc::ByteSwap(&x.cright_, sizeof(x.cright_), 1);
269-
dmlc::ByteSwap(&x.sindex_, sizeof(x.sindex_), 1);
270-
dmlc::ByteSwap(&x.info_, sizeof(x.info_), 1);
271-
return x;
272-
}
273-
274208
private:
275209
/*!
276210
* \brief in leaf node, we have weights, in non-leaf nodes,
@@ -320,7 +254,6 @@ class RegTree : public Model {
320254
}
321255

322256
RegTree() {
323-
param_.Init(Args{});
324257
nodes_.resize(param_.num_nodes);
325258
stats_.resize(param_.num_nodes);
326259
split_types_.resize(param_.num_nodes, FeatureType::kNumerical);
@@ -589,14 +522,6 @@ class RegTree : public Model {
589522
bool has_missing_;
590523
};
591524

592-
/*!
593-
* \brief calculate the approximate feature contributions for the given root
594-
* \param feat dense feature vector, if the feature is missing the field is set to NaN
595-
* \param out_contribs output vector to hold the contributions
596-
*/
597-
void CalculateContributionsApprox(const RegTree::FVec& feat,
598-
std::vector<float>* mean_values,
599-
bst_float* out_contribs) const;
600525
/*!
601526
* \brief dump the model in the requested format as a text string
602527
* \param fmap feature map that may help give interpretations of feature

src/gbm/gblinear_model.cc

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
#include "xgboost/json.h"
77
#include "gblinear_model.h"
88

9-
namespace xgboost {
10-
namespace gbm {
11-
9+
namespace xgboost::gbm {
1210
void GBLinearModel::SaveModel(Json* p_out) const {
1311
auto& out = *p_out;
1412

@@ -42,7 +40,4 @@ void GBLinearModel::LoadModel(Json const& in) {
4240
this->num_boosted_rounds = 0;
4341
}
4442
}
45-
46-
DMLC_REGISTER_PARAMETER(DeprecatedGBLinearModelParam);
47-
} // namespace gbm
48-
} // namespace xgboost
43+
} // namespace xgboost::gbm

src/gbm/gblinear_model.h

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -14,40 +14,14 @@
1414
#include "xgboost/feature_map.h"
1515
#include "xgboost/model.h"
1616
#include "xgboost/json.h"
17-
#include "xgboost/parameter.h"
1817

1918
namespace xgboost {
2019
class Json;
2120
namespace gbm {
22-
// Deprecated in 1.0.0. model parameter. Only staying here for compatible binary model IO.
23-
struct DeprecatedGBLinearModelParam : public dmlc::Parameter<DeprecatedGBLinearModelParam> {
24-
// number of feature dimension
25-
uint32_t deprecated_num_feature;
26-
// deprecated. use learner_model_param_->num_output_group.
27-
int32_t deprecated_num_output_group;
28-
// reserved field
29-
int32_t reserved[32];
30-
// constructor
31-
DeprecatedGBLinearModelParam() {
32-
static_assert(sizeof(*this) == sizeof(int32_t) * 34,
33-
"Model parameter size can not be changed.");
34-
std::memset(this, 0, sizeof(DeprecatedGBLinearModelParam));
35-
}
36-
37-
DMLC_DECLARE_PARAMETER(DeprecatedGBLinearModelParam) {
38-
DMLC_DECLARE_FIELD(deprecated_num_feature);
39-
DMLC_DECLARE_FIELD(deprecated_num_output_group);
40-
}
41-
};
42-
4321
// model for linear booster
4422
class GBLinearModel : public Model {
45-
private:
46-
// Deprecated in 1.0.0
47-
DeprecatedGBLinearModelParam param_;
48-
4923
public:
50-
int32_t num_boosted_rounds{0};
24+
std::int32_t num_boosted_rounds{0};
5125
LearnerModelParam const* learner_model_param;
5226

5327
public:

src/gbm/gbtree_model.h

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -42,21 +42,13 @@ struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
4242
/**
4343
* \brief number of trees
4444
*/
45-
std::int32_t num_trees;
45+
std::int32_t num_trees{0};
4646
/**
4747
* \brief Number of trees for a forest.
4848
*/
49-
std::int32_t num_parallel_tree;
50-
/*! \brief reserved parameters */
51-
int32_t reserved[38];
52-
53-
/*! \brief constructor */
54-
GBTreeModelParam() {
55-
std::memset(this, 0, sizeof(GBTreeModelParam)); // FIXME(trivialfis): Why?
56-
static_assert(sizeof(GBTreeModelParam) == (4 + 2 + 2 + 32) * sizeof(int32_t),
57-
"64/32 bit compatibility issue");
58-
num_parallel_tree = 1;
59-
}
49+
std::int32_t num_parallel_tree{1};
50+
51+
GBTreeModelParam() = default;
6052

6153
// declare parameters, only declare those that need to be set.
6254
DMLC_DECLARE_PARAMETER(GBTreeModelParam) {
@@ -69,16 +61,6 @@ struct GBTreeModelParam : public dmlc::Parameter<GBTreeModelParam> {
6961
"Number of parallel trees constructed during each iteration."
7062
" This option is used to support boosted random forest.");
7163
}
72-
73-
// Swap byte order for all fields. Useful for transporting models between machines with different
74-
// endianness (big endian vs little endian)
75-
GBTreeModelParam ByteSwap() const {
76-
GBTreeModelParam x = *this;
77-
dmlc::ByteSwap(&x.num_trees, sizeof(x.num_trees), 1);
78-
dmlc::ByteSwap(&x.num_parallel_tree, sizeof(x.num_parallel_tree), 1);
79-
dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
80-
return x;
81-
}
8264
};
8365

8466
struct GBTreeModel : public Model {

src/learner.cc

Lines changed: 10 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141
#include "common/random.h" // for GlobalRandom
4242
#include "common/timer.h" // for Monitor
4343
#include "common/version.h" // for Version
44-
#include "dmlc/endian.h" // for ByteSwap, DMLC_IO_NO_ENDIAN_SWAP
4544
#include "xgboost/base.h" // for Args, bst_float, GradientPair, bst_feature_t, ...
4645
#include "xgboost/context.h" // for Context
4746
#include "xgboost/data.h" // for DMatrix, MetaInfo
@@ -84,22 +83,22 @@ T& UsePtr(T& ptr) { // NOLINT
8483
*/
8584
struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy> {
8685
/* \brief global bias */
87-
bst_float base_score;
86+
bst_float base_score{ObjFunction::DefaultBaseScore()};
8887
/* \brief number of features */
89-
bst_feature_t num_feature;
88+
bst_feature_t num_feature{0};
9089
/* \brief number of classes, if it is multi-class classification */
91-
std::int32_t num_class;
90+
std::int32_t num_class{0};
9291
/*! \brief Model contain additional properties */
93-
int32_t contain_extra_attrs;
92+
int32_t contain_extra_attrs{0};
9493
/*! \brief Model contain eval metrics */
95-
int32_t contain_eval_metrics;
94+
int32_t contain_eval_metrics{0};
9695
/*! \brief the version of XGBoost. */
97-
std::uint32_t major_version;
98-
std::uint32_t minor_version;
96+
std::int32_t major_version{std::get<0>(Version::Self())};
97+
std::int32_t minor_version{std::get<1>(Version::Self())};
9998
/**
10099
* \brief Number of target variables.
101100
*/
102-
bst_target_t num_target;
101+
bst_target_t num_target{1};
103102
/**
104103
* \brief Whether we should calculate the base score from training data.
105104
*
@@ -110,19 +109,8 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
110109
* of bool for the ease of serialization.
111110
*/
112111
std::int32_t boost_from_average{true};
113-
/*! \brief reserved field */
114-
int reserved[25];
115-
/*! \brief constructor */
116-
LearnerModelParamLegacy() {
117-
std::memset(this, 0, sizeof(LearnerModelParamLegacy));
118-
base_score = ObjFunction::DefaultBaseScore();
119-
num_target = 1;
120-
major_version = std::get<0>(Version::Self());
121-
minor_version = std::get<1>(Version::Self());
122-
boost_from_average = true;
123-
static_assert(sizeof(LearnerModelParamLegacy) == 136,
124-
"Do not change the size of this struct, as it will break binary IO.");
125-
}
112+
113+
LearnerModelParamLegacy() = default;
126114

127115
// Skip other legacy fields.
128116
[[nodiscard]] Json ToJson() const {
@@ -175,21 +163,6 @@ struct LearnerModelParamLegacy : public dmlc::Parameter<LearnerModelParamLegacy>
175163
from_chars(str.c_str(), str.c_str() + str.size(), base_score);
176164
}
177165

178-
[[nodiscard]] LearnerModelParamLegacy ByteSwap() const {
179-
LearnerModelParamLegacy x = *this;
180-
dmlc::ByteSwap(&x.base_score, sizeof(x.base_score), 1);
181-
dmlc::ByteSwap(&x.num_feature, sizeof(x.num_feature), 1);
182-
dmlc::ByteSwap(&x.num_class, sizeof(x.num_class), 1);
183-
dmlc::ByteSwap(&x.contain_extra_attrs, sizeof(x.contain_extra_attrs), 1);
184-
dmlc::ByteSwap(&x.contain_eval_metrics, sizeof(x.contain_eval_metrics), 1);
185-
dmlc::ByteSwap(&x.major_version, sizeof(x.major_version), 1);
186-
dmlc::ByteSwap(&x.minor_version, sizeof(x.minor_version), 1);
187-
dmlc::ByteSwap(&x.num_target, sizeof(x.num_target), 1);
188-
dmlc::ByteSwap(&x.boost_from_average, sizeof(x.boost_from_average), 1);
189-
dmlc::ByteSwap(x.reserved, sizeof(x.reserved[0]), sizeof(x.reserved) / sizeof(x.reserved[0]));
190-
return x;
191-
}
192-
193166
template <typename Container>
194167
Args UpdateAllowUnknown(Container const& kwargs) {
195168
// Detect whether user has made their own base score.

0 commit comments

Comments
 (0)