Skip to content

Commit dcefbdb

Browse files
authored
[doc] Add R code blocks for the intercept introduction. (#11685)
1 parent 910c34b commit dcefbdb

File tree

2 files changed

+154
-55
lines changed

2 files changed

+154
-55
lines changed

doc/tutorials/advanced_custom_obj.rst

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ Convince yourself that the implementation is correct:
185185
def gen_random_dirichlet(rng: np.random.Generator, m: int, k: int):
186186
alpha = np.exp(rng.standard_normal(size=k))
187187
return rng.dirichlet(alpha, size=m)
188-
188+
189189
def test_dirichlet_fun_grad_hess():
190190
k = 3
191191
m = 10
@@ -233,12 +233,12 @@ Convince yourself that the implementation is correct:
233233
alpha <- exp(rnorm(k))
234234
y <- rdirichlet(m, alpha)
235235
x0 <- rnorm(k)
236-
236+
237237
for (row in seq_len(m)) {
238238
logpdf <- dirichlet.fun(matrix(x0, nrow=1), y[row,,drop=F])
239239
ref_logpdf <- ddirichlet(y[row,,drop=F], exp(x0), log = T)
240240
expect_equal(logpdf, -ref_logpdf)
241-
241+
242242
eps <- 1e-7
243243
grad_num <- numeric(k)
244244
for (col in seq_len(k)) {
@@ -249,10 +249,10 @@ Convince yourself that the implementation is correct:
249249
- dirichlet.fun(matrix(x0, nrow=1), y[row,,drop=F])
250250
) / eps
251251
}
252-
252+
253253
grad <- dirichlet.grad(matrix(x0, nrow=1), y[row,,drop=F])
254254
expect_equal(grad |> as.vector(), grad_num, tolerance=1e-6)
255-
255+
256256
H_numeric <- array(dim=c(k, k))
257257
for (ii in seq_len(k)) {
258258
xplus <- x0
@@ -264,7 +264,7 @@ Convince yourself that the implementation is correct:
264264
) / eps
265265
}
266266
}
267-
267+
268268
H <- dirichlet.hess(matrix(xplus, nrow=1), y[row,,drop=F])
269269
expect_equal(H[1,,], H_numeric, tolerance=1e-6)
270270
}
@@ -346,7 +346,7 @@ point, which means it will be a minimum rather than a maximum or saddle point).
346346
alpha <- exp(x0)
347347
n.samples <- 5e6
348348
y.samples <- rdirichlet(n.samples, alpha)
349-
349+
350350
x.broadcast <- rep(x0, n.samples) |> matrix(ncol=k, byrow=T)
351351
grad.samples <- dirichlet.grad(x.broadcast, y.samples)
352352
ref <- crossprod(grad.samples) / n.samples
@@ -420,7 +420,7 @@ required for XGBoost's custom objectives:
420420
.. code-tab:: r R
421421

422422
library(xgboost)
423-
423+
424424
dirichlet.xgb.objective <- function(pred, dtrain) {
425425
y <- getinfo(dtrain, "label")
426426
return(
@@ -473,7 +473,7 @@ The data:
473473

474474
.. tabs::
475475
.. code-tab:: py
476-
476+
477477
# depth
478478
X = np.array([
479479
10.4,11.7,12.8,13,15.7,16.3,18,18.7,20.7,22.1,
@@ -508,9 +508,9 @@ Fitting an XGBoost model and making predictions:
508508

509509
.. tabs::
510510
.. code-tab:: py
511-
511+
512512
from typing import Dict, List
513-
513+
514514
dtrain = xgb.DMatrix(X, label=Y)
515515
results: Dict[str, Dict[str, List[float]]] = {}
516516
booster = xgb.train(

doc/tutorials/intercept.rst

Lines changed: 143 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,28 @@ automatically based on targets upon training. The behavior can be controlled by
99
``base_score`` to a constant value. The following snippet disables the automatic
1010
estimation:
1111

12-
.. code-block:: python
12+
.. tabs::
13+
.. code-tab:: py
1314

14-
import xgboost as xgb
15+
import xgboost as xgb
1516

16-
reg = xgb.XGBRegressor()
17-
reg.set_params(base_score=0.5)
17+
clf = xgb.XGBClassifier(n_estimators=10)
18+
clf.set_params(base_score=0.5)
19+
20+
.. code-tab:: r R
21+
22+
library(xgboost)
23+
24+
# Load built-in dataset
25+
data(agaricus.train, package = "xgboost")
26+
27+
# Set base_score parameter directly
28+
model <- xgboost(
29+
x = agaricus.train$data,
30+
y = factor(agaricus.train$label),
31+
base_score = 0.5,
32+
nrounds = 10
33+
)
1834

1935
In addition, here 0.5 represents the value after applying the inverse link function. See
2036
the end of the document for a description.
@@ -24,22 +40,53 @@ Other than the ``base_score``, users can also provide global bias via the data f
2440
and multi-class, the ``base_margin`` is a matrix with size ``(n_samples, n_targets)`` or
2541
``(n_samples, n_classes)``.
2642

27-
.. code-block:: python
43+
.. tabs::
44+
.. code-tab:: py
45+
46+
import xgboost as xgb
47+
from sklearn.datasets import make_classification
48+
49+
X, y = make_classification()
50+
51+
clf = xgb.XGBClassifier()
52+
clf.fit(X, y)
53+
# Request for raw prediction
54+
m = clf.predict(X, output_margin=True)
55+
56+
clf_1 = xgb.XGBClassifier()
57+
# Feed the prediction into the next model
58+
# Using base margin overrides the base score, see below sections.
59+
clf_1.fit(X, y, base_margin=m)
60+
clf_1.predict(X, base_margin=m)
61+
62+
.. code-tab:: r R
63+
64+
library(xgboost)
65+
66+
# Load built-in dataset
67+
data(agaricus.train, package = "xgboost")
2868

29-
import xgboost as xgb
30-
from sklearn.datasets import make_regression
69+
# Train first model
70+
model_1 <- xgboost(
71+
x = agaricus.train$data,
72+
y = factor(agaricus.train$label),
73+
nrounds = 10
74+
)
3175

32-
X, y = make_regression()
76+
# Request for raw prediction
77+
m <- predict(model_1, agaricus.train$data, type = "raw")
3378

34-
reg = xgb.XGBRegressor()
35-
reg.fit(X, y)
36-
# Request for raw prediction
37-
m = reg.predict(X, output_margin=True)
79+
# Feed the prediction into the next model using base_margin
80+
# Using base margin overrides the base score, see below sections.
81+
model_2 <- xgboost(
82+
x = agaricus.train$data,
83+
y = factor(agaricus.train$label),
84+
base_margin = m,
85+
nrounds = 10
86+
)
3887

39-
reg_1 = xgb.XGBRegressor()
40-
# Feed the prediction into the next model
41-
reg_1.fit(X, y, base_margin=m)
42-
reg_1.predict(X, base_margin=m)
88+
# Make predictions with base_margin
89+
pred <- predict(model_2, agaricus.train$data, base_margin = m)
4390

4491

4592
It specifies the bias for each sample and can be used for stacking an XGBoost model on top
@@ -145,49 +192,101 @@ Example
145192
The following example shows the relationship between ``base_score`` and ``base_margin``
146193
using binary logistic with a `logit` link function:
147194

148-
.. code-block:: python
195+
.. tabs::
196+
.. code-tab:: py
149197

150-
import numpy as np
151-
from scipy.special import logit
152-
from sklearn.datasets import make_classification
153-
from xgboost import train, DMatrix
198+
import numpy as np
199+
from scipy.special import logit
200+
from sklearn.datasets import make_classification
154201

155-
X, y = make_classification(random_state=2025)
202+
import xgboost as xgb
203+
204+
X, y = make_classification(random_state=2025)
205+
206+
.. code-tab:: r R
207+
208+
library(xgboost)
209+
210+
# Load built-in dataset
211+
data(agaricus.train, package = "xgboost")
212+
X <- agaricus.train$data
213+
y <- agaricus.train$label
156214

157215
The intercept is a valid probability (0.5). It's used as the initial estimation of the
158216
probability of obtaining a positive sample.
159217

160-
.. code-block:: python
218+
.. tabs::
219+
.. code-tab:: py
220+
221+
intercept = 0.5
222+
223+
.. code-tab:: r R
161224

162-
intercept = 0.5
225+
intercept <- 0.5
163226

164227
First we use the intercept to train a model:
165228

166-
.. code-block:: python
229+
.. tabs::
230+
.. code-tab:: py
167231

168-
booster = train(
169-
{"base_score": intercept, "objective": "binary:logistic"},
170-
dtrain=DMatrix(X, y),
171-
num_boost_round=1,
172-
)
173-
predt_0 = booster.predict(DMatrix(X, y))
232+
booster = xgb.train(
233+
{"base_score": intercept, "objective": "binary:logistic"},
234+
dtrain=xgb.DMatrix(X, y),
235+
num_boost_round=1,
236+
)
237+
predt_0 = booster.predict(xgb.DMatrix(X, y))
174238

175-
Apply :py:func:`~scipy.special.logit` to obtain the "margin":
239+
.. code-tab:: r R
240+
241+
# First model with base_score
242+
model_0 <- xgboost(
243+
x = X, y = factor(y),
244+
base_score = intercept,
245+
objective = "binary:logistic",
246+
nrounds = 1
247+
)
248+
predt_0 <- predict(model_0, X)
176249

177-
.. code-block:: python
250+
Apply :py:func:`~scipy.special.logit` to obtain the "margin":
178251

179-
margin = np.full(y.shape, fill_value=logit(intercept), dtype=np.float32)
180-
Xy = DMatrix(X, y, base_margin=margin)
181-
# 0.2 is a dummy value to show that `base_margin` overrides `base_score`.
182-
booster = train(
183-
{"base_score": 0.2, "objective": "binary:logistic"},
184-
dtrain=Xy,
185-
num_boost_round=1,
186-
)
187-
predt_1 = booster.predict(Xy)
252+
.. tabs::
253+
.. code-tab:: py
254+
255+
# Apply logit function to obtain the "margin"
256+
margin = np.full(y.shape, fill_value=logit(intercept), dtype=np.float32)
257+
Xy = xgb.DMatrix(X, y, base_margin=margin)
258+
# Second model with base_margin
259+
# 0.2 is a dummy value to show that `base_margin` overrides `base_score`.
260+
booster = xgb.train(
261+
{"base_score": 0.2, "objective": "binary:logistic"},
262+
dtrain=Xy,
263+
num_boost_round=1,
264+
)
265+
predt_1 = booster.predict(Xy)
266+
267+
.. code-tab:: r R
268+
269+
# Apply logit function to obtain the "margin"
270+
logit_intercept <- log(intercept / (1 - intercept))
271+
margin <- rep(logit_intercept, length(y))
272+
# Second model with base_margin
273+
# 0.2 is a dummy value to show that `base_margin` overrides `base_score`
274+
model_1 <- xgboost(
275+
x = X, y = factor(y),
276+
base_margin = margin,
277+
base_score = 0.2,
278+
objective = "binary:logistic",
279+
nrounds = 1
280+
)
281+
predt_1 <- predict(model_1, X, base_margin = margin)
188282

189283
Compare the results:
190284

191-
.. code-block:: python
285+
.. tabs::
286+
.. code-tab:: py
287+
288+
np.testing.assert_allclose(predt_0, predt_1)
289+
290+
.. code-tab:: r R
192291

193-
np.testing.assert_allclose(predt_0, predt_1)
292+
all.equal(predt_0, predt_1, tolerance = 1e-6)

0 commit comments

Comments
 (0)