Skip to content

Commit 39634b2

Browse files
authored
fix(security): address reported unsafe pickle.load usages (#2099)
1 parent 16acb76 commit 39634b2

File tree

7 files changed

+18
-14
lines changed

7 files changed

+18
-14
lines changed

qlib/contrib/rolling/ddgda.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from qlib.model.trainer import TrainerR
1515
from qlib.typehint import Literal
1616
from qlib.utils import init_instance_by_config
17+
from qlib.utils.pickle_utils import restricted_pickle_load
1718
from qlib.workflow import R
1819
from qlib.workflow.task.utils import replace_task_handler_with_cache
1920

@@ -298,7 +299,7 @@ def _train_meta_model(self, fill_method="max"):
298299
# but their task test segment are not aligned! It worked in my previous experiment.
299300
# So the misalignment will not affect the effectiveness of the method.
300301
with self._internal_data_path.open("rb") as f:
301-
internal_data = pickle.load(f)
302+
internal_data = restricted_pickle_load(f)
302303

303304
md = MetaDatasetDS(exp_name=internal_data, **kwargs)
304305

@@ -360,7 +361,7 @@ def get_task_list(self):
360361
)
361362

362363
with self._internal_data_path.open("rb") as f:
363-
internal_data = pickle.load(f)
364+
internal_data = restricted_pickle_load(f)
364365
mds = MetaDatasetDS(exp_name=internal_data, **kwargs)
365366

366367
# 3) meta model make inference and get new qlib task

qlib/contrib/tuner/tuner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import yaml
99
import json
1010
import copy
11-
import pickle
1211
import logging
1312
import importlib
1413
import subprocess
@@ -18,6 +17,7 @@
1817
from abc import abstractmethod
1918

2019
from ...log import get_module_logger, TimeInspector
20+
from ...utils.pickle_utils import restricted_pickle_load
2121
from hyperopt import fmin, tpe
2222
from hyperopt import STATUS_OK, STATUS_FAIL
2323

@@ -136,7 +136,7 @@ def fetch_result(self):
136136
exp_result_dir = os.path.join(self.ex_dir, QLibTuner.EXP_RESULT_DIR.format(estimator_ex_id))
137137
exp_result_path = os.path.join(exp_result_dir, QLibTuner.EXP_RESULT_NAME)
138138
with open(exp_result_path, "rb") as fp:
139-
analysis_df = pickle.load(fp)
139+
analysis_df = restricted_pickle_load(fp)
140140

141141
# 4. Get the backtest factor which user want to optimize, if user want to maximize the factor, then reverse the result
142142
res = analysis_df.loc[self.optim_config.report_type].loc[self.optim_config.report_factor]

qlib/data/cache.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
normalize_cache_fields,
3131
normalize_cache_instruments,
3232
)
33+
from ..utils.pickle_utils import restricted_pickle_load
3334

3435
from ..log import get_module_logger
3536
from .base import Feature
@@ -225,7 +226,7 @@ def visit(cache_path: Union[str, Path]):
225226
cache_path = Path(cache_path)
226227
meta_path = cache_path.with_suffix(".meta")
227228
with meta_path.open("rb") as f:
228-
d = pickle.load(f)
229+
d = restricted_pickle_load(f)
229230
with meta_path.open("wb") as f:
230231
try:
231232
d["meta"]["last_visit"] = str(time.time())
@@ -592,7 +593,7 @@ def update(self, sid, cache_uri, freq: str = "day"):
592593

593594
with CacheUtils.writer_lock(self.r, f"{str(C.dpm.get_data_uri())}:expression-{cache_uri}"):
594595
with meta_path.open("rb") as f:
595-
d = pickle.load(f)
596+
d = restricted_pickle_load(f)
596597
instrument = d["info"]["instrument"]
597598
field = d["info"]["field"]
598599
freq = d["info"]["freq"]
@@ -959,7 +960,7 @@ def update(self, cache_uri, freq: str = "day"):
959960
im = DiskDatasetCache.IndexManager(cp_cache_uri)
960961
with CacheUtils.writer_lock(self.r, f"{str(C.dpm.get_data_uri())}:dataset-{cache_uri}"):
961962
with meta_path.open("rb") as f:
962-
d = pickle.load(f)
963+
d = restricted_pickle_load(f)
963964
instruments = d["info"]["instruments"]
964965
fields = d["info"]["fields"]
965966
freq = d["info"]["freq"]

qlib/data/dataset/loader.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# Licensed under the MIT License.
33

44
import abc
5-
import pickle
65
from pathlib import Path
76
import warnings
87
import pandas as pd
@@ -11,6 +10,7 @@
1110

1211
from qlib.data import D
1312
from qlib.utils import load_dataset, init_instance_by_config, time_to_slc_point
13+
from qlib.utils.pickle_utils import restricted_pickle_load
1414
from qlib.log import get_module_logger
1515
from qlib.utils.serial import Serializable
1616

@@ -283,7 +283,7 @@ def _maybe_load_raw_data(self):
283283
self._data = pd.read_parquet(self._config, engine="pyarrow")
284284
else:
285285
with Path(self._config).open("rb") as f:
286-
self._data = pickle.load(f)
286+
self._data = restricted_pickle_load(f)
287287
elif isinstance(self._config, pd.DataFrame):
288288
self._data = self._config
289289

qlib/utils/mod.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import importlib
1212
import os
1313
from pathlib import Path
14-
import pickle
1514
import pkgutil
1615
import re
1716
import sys
@@ -20,6 +19,7 @@
2019
from urllib.parse import urlparse
2120

2221
from qlib.typehint import InstConf
22+
from qlib.utils.pickle_utils import restricted_pickle_load
2323

2424

2525
def get_module_by_module_path(module_path: Union[str, ModuleType]):
@@ -168,10 +168,10 @@ def init_instance_by_config(
168168

169169
pr_path = os.path.join(pr.netloc, path) if bool(pr.path) else pr.netloc
170170
with open(os.path.normpath(pr_path), "rb") as f:
171-
return pickle.load(f)
171+
return restricted_pickle_load(f)
172172
else:
173173
with config.open("rb") as f:
174-
return pickle.load(f)
174+
return restricted_pickle_load(f)
175175

176176
klass, cls_kwargs = get_callable_kwargs(config, default_module=default_module)
177177

qlib/utils/objm.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pathlib import Path
77

88
from qlib.config import C
9+
from qlib.utils.pickle_utils import restricted_pickle_load
910

1011

1112
class ObjManager:
@@ -116,7 +117,7 @@ def save_objs(self, obj_name_l):
116117

117118
def load_obj(self, name):
118119
with (self.path / name).open("rb") as f:
119-
return pickle.load(f)
120+
return restricted_pickle_load(f)
120121

121122
def exists(self, name):
122123
return (self.path / name).exists()

qlib/workflow/task/manage.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
from .utils import get_mongodb
3030
from ...config import C
31+
from ...utils.pickle_utils import restricted_pickle_loads
3132

3233

3334
class TaskManager:
@@ -131,7 +132,7 @@ def _decode_task(self, task):
131132
for prefix in self.ENCODE_FIELDS_PREFIX:
132133
for k in list(task.keys()):
133134
if k.startswith(prefix):
134-
task[k] = pickle.loads(task[k])
135+
task[k] = restricted_pickle_loads(task[k])
135136
return task
136137

137138
def _dict_to_str(self, flt):

0 commit comments

Comments
 (0)