JuliaAI · ablaom · Mar 19, 2025 · Mar 18, 2025 · Mar 18, 2025 · Mar 19, 2025
diff --git a/.github/workflows/check_registry.yml b/.github/workflows/check_registry.yml
@@ -24,7 +24,7 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - uses: actions/cache@v1
+      - uses: julia-actions/cache@v1
         env:
           cache-name: cache-artifacts
         with:

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -17,7 +17,6 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.6'
           - '1.10'
           - '1'
         os:
@@ -30,7 +29,7 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - uses: actions/cache@v1
+      - uses: julia-actions/cache@v1
         env:
           cache-name: cache-artifacts
         with:

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "MLJModels"
 uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
 authors = ["Anthony D. Blaom <[email protected]>"]
-version = "0.17.8"
+version = "0.17.9"
 
 [deps]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"

diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml
@@ -6282,9 +6282,9 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "Microsoft LightGBM FFI wrapper: Classifier"
+":docstring" = """```\nLGBMClassifier\n```\n\nA model type for constructing a LightGBM classifier, based on [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLGBMClassifier = @load LGBMClassifier pkg=LightGBM\n```\n\nDo `model = LGBMClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LGBMClassifier(boosting=...)`.\n\n`LightGBM, short for light gradient-boosting machine, is a framework for gradient boosting based on decision tree algorithms and used for classification and other machine learning tasks, with a focus on performance and scalability. This model in particular is used for various types of classification tasks.\n\n# Training data In MLJ or MLJBase, bind an instance `model` to data with\n\nmach = machine(model, X, y) \n\nHere:\n\n  * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`; alternatively, `X` is any `AbstractMatrix` with `Continuous` elements; check the scitype with `scitype(X)`.\n  * y is a vector of targets whose items are of scitype `Continuous`. Check the scitype with scitype(y).\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Hyper-parameters\n\nSee https://lightgbm.readthedocs.io/en/v3.3.5/Parameters.html.\n\nCurrently, the following parameters and their defaults are supported:\n\n  * `boosting::String = \"gbdt\"`,\n  * `num_iterations::Int = 100::(_ >= 0)`,\n  * `learning_rate::Float64 = 0.1::(_ > 0.)`,\n  * `num_leaves::Int = 31::(1 < _ <= 131072)`,\n  * `max_depth::Int = -1`,\n  * `tree_learner::String = \"serial\"`,\n  * `histogram_pool_size::Float64 = -1.0`,\n  * `min_data_in_leaf::Int = 20::(_ >= 0)`,\n  * `min_sum_hessian_in_leaf::Float64 = 1e-3::(_ >= 0.0)`,\n  * `max_delta_step::Float64 = 0.0`,\n  * `lambda_l1::Float64 = 0.0::(_ >= 0.0)`,\n  * `lambda_l2::Float64 = 0.0::(_ >= 0.0)`,\n  * `min_gain_to_split::Float64 = 0.0::(_ >= 0.0)`,\n  * `feature_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n  * `feature_fraction_bynode::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n  * `feature_fraction_seed::Int = 2`,\n  * `bagging_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n  * `bagging_freq::Int = 0::(_ >= 0)`,\n  * `bagging_seed::Int = 3`,\n  * `early_stopping_round::Int = 0`,\n  * `extra_trees::Bool = false`,\n  * `extra_seed::Int = 6`,\n  * `max_bin::Int = 255::(_ > 1)`,\n  * `bin_construct_sample_cnt = 200000::(_ > 0)`,\n  * `drop_rate::Float64 = 0.1::(0.0 <= _ <= 1.0)`,\n  * `max_drop::Int = 50`,\n  * `skip_drop:: Float64 = 0.5::(0.0 <= _ <= 1)`,\n  * `xgboost_dart_mode::Bool = false`,\n  * `uniform_drop::Bool = false`,\n  * `drop_seed::Int = 4`,\n  * `top_rate::Float64 = 0.2::(0.0 <= _ <= 1.0)`,\n  * `other_rate::Float64 = 0.1::(0.0 <= _ <= 1.0)`,\n  * `min_data_per_group::Int = 100::(_ > 0)`,\n  * `max_cat_threshold::Int = 32::(_ > 0)`,\n  * `cat_l2::Float64 = 10.0::(_ >= 0)`,\n  * `cat_smooth::Float64 = 10.0::(_ >= 0)`,\n  * `objective::String = \"multiclass\"`,\n  * `categorical_feature::Vector{Int} = Vector{Int}()`,\n  * `data_random_seed::Int = 1`,\n  * `is_sparse::Bool = true`,\n  * `is_unbalance::Bool = false`,\n  * `boost_from_average::Bool = true`,\n  * `use_missing::Bool = true`,\n  * `linear_tree::Bool = false`,\n  * `feature_pre_filter::Bool = true`,\n  * `metric::Vector{String} = [\"none\"]`,\n  * `metric_freq::Int = 1::(_ > 0)`,\n  * `is_provide_training_metric::Bool = false`,\n  * `eval_at::Vector{Int} = Vector{Int}([1, 2, 3, 4, 5])::(all(_ .> 0))`,\n  * `num_machines::Int = 1::(_ > 0)`,\n  * `num_threads::Int  = 0::(_ >= 0)`,\n  * `local_listen_port::Int = 12400::(_ > 0)`,\n  * `time_out::Int = 120::(_ > 0)`,\n  * `machine_list_file::String = \"\"`,\n  * `save_binary::Bool = false`,\n  * `device_type::String = \"cpu\"`,\n  * `gpu_use_dp::Bool = false`,\n  * `gpu_platform_id::Int = -1`,\n  * `gpu_device_id::Int = -1`,\n  * `num_gpu::Int = 1`,\n  * `force_col_wise::Bool = false`,\n  * `force_row_wise::Bool = false`,\n  * `truncate_booster::Bool = true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `fitresult`: Fitted model information, contains a `LGBMClassification` object, a `CategoricalArray` of the input class names, and the classifier with all its parameters\n\n# Report\n\nThe fields of `report(mach)` are:\n\n  * `training_metrics`: A dictionary containing all training metrics.\n  * `importance`: A `namedtuple` containing:\n\n      * `gain`: The total gain of each split used by the model\n      * `split`: The number of times each feature is used by the model.\n\n# Examples\n\n```julia\n\nusing DataFrames\nusing MLJ\n\n# load the model\nLGBMClassifier = @load LGBMClassifier pkg=LightGBM \n\nX, y = @load_iris \nX = DataFrame(X)\ntrain, test = partition(collect(eachindex(y)), 0.70, shuffle=true)\n\nfirst(X, 3)\nlgb = LGBMClassifier() # initialise a model with default params\nmach = machine(lgb, X[train, :], y[train]) |> fit!\n\npredict(mach, X[test, :])\n\n# access feature importances\nmodel_report = report(mach)\ngain_importance = model_report.importance.gain\nsplit_importance = model_report.importance.split\n```\n\nSee also [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl) and the unwrapped model type [`LightGBM.LGBMClassification`](@ref)\n"""
 ":name" = "LGBMClassifier"
-":human_name" = "lgbm classifier"
+":human_name" = "LightGBM classifier"
 ":is_supervised" = "`true`"
 ":prediction_type" = ":probabilistic"
 ":abstract_type" = "`MLJModelInterface.Probabilistic`"
@@ -6318,9 +6318,9 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "Microsoft LightGBM FFI wrapper: Regressor"
+":docstring" = """```\nLGBMRegressor\n```\n\nA model type for constructing a LightGBM regressor, based on [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLGBMRegressor = @load LGBMRegressor pkg=LightGBM\n```\n\nDo `model = LGBMRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LGBMRegressor(boosting=...)`.\n\nLightGBM, short for light gradient-boosting machine, is a framework for gradient boosting based on decision tree algorithms and used for classification, regression and other machine learning tasks, with a focus on performance and scalability. This model in particular is used for various types of regression tasks.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with \n\nmach = machine(model, X, y) \n\nHere:\n\n  * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`; alternatively, `X` is any `AbstractMatrix` with `Continuous` elements; check the scitype with `scitype(X)`.\n  * y is a vector of targets whose items are of scitype `Continuous`. Check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Hyper-parameters\n\nSee https://lightgbm.readthedocs.io/en/v3.3.5/Parameters.html.\n\nCurrently, the following parameters and their defaults are supported:\n\n  * `boosting::String = \"gbdt\"`,\n  * `num_iterations::Int = 100::(_ >= 0)`,\n  * `learning_rate::Float64 = 0.1::(_ > 0.)`,\n  * `num_leaves::Int = 31::(1 < _ <= 131072)`,\n  * `max_depth::Int = -1`,\n  * `tree_learner::String = \"serial\"`,\n  * `histogram_pool_size::Float64 = -1.0`,\n  * `min_data_in_leaf::Int = 20::(_ >= 0)`,\n  * `min_sum_hessian_in_leaf::Float64 = 1e-3::(_ >= 0.0)`,\n  * `max_delta_step::Float64 = 0.0`,\n  * `lambda_l1::Float64 = 0.0::(_ >= 0.0)`,\n  * `lambda_l2::Float64 = 0.0::(_ >= 0.0)`,\n  * `min_gain_to_split::Float64 = 0.0::(_ >= 0.0)`,\n  * `feature_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n  * `feature_fraction_bynode::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n  * `feature_fraction_seed::Int = 2`,\n  * `bagging_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n  * `pos_bagging_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n  * `neg_bagging_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n  * `bagging_freq::Int = 0::(_ >= 0)`,\n  * `bagging_seed::Int = 3`,\n  * `early_stopping_round::Int = 0`,\n  * `extra_trees::Bool = false`,\n  * `extra_seed::Int = 6`,\n  * `max_bin::Int = 255::(_ > 1)`,\n  * `bin_construct_sample_cnt = 200000::(_ > 0)`,\n  * `drop_rate::Float64 = 0.1::(0.0 <= _ <= 1.0)`,\n  * `max_drop::Int = 50`,\n  * `skip_drop:: Float64 = 0.5::(0.0 <= _ <= 1)`,\n  * `xgboost_dart_mode::Bool = false`,\n  * `uniform_drop::Bool = false`,\n  * `drop_seed::Int = 4`,\n  * `top_rate::Float64 = 0.2::(0.0 <= _ <= 1.0)`,\n  * `other_rate::Float64 = 0.1::(0.0 <= _ <= 1.0)`,\n  * `min_data_per_group::Int = 100::(_ > 0)`,\n  * `max_cat_threshold::Int = 32::(_ > 0)`,\n  * `cat_l2::Float64 = 10.0::(_ >= 0)`,\n  * `cat_smooth::Float64 = 10.0::(_ >= 0)`,\n  * `objective::String = \"regression\"`,\n  * `categorical_feature::Vector{Int} = Vector{Int}()`,\n  * `data_random_seed::Int = 1`,\n  * `is_sparse::Bool = true`,\n  * `is_unbalance::Bool = false`,\n  * `boost_from_average::Bool = true`,\n  * `scale_pos_weight::Float64 = 1.0`,\n  * `use_missing::Bool = true`,\n  * `linear_tree::Bool = false`,\n  * `feature_pre_filter::Bool = true`,\n  * `alpha::Float64 = 0.9::(_ > 0.0 )`,\n  * `metric::Vector{String} = [\"l2\"]`,\n  * `metric_freq::Int = 1::(_ > 0)`,\n  * `is_provide_training_metric::Bool = false`,\n  * `eval_at::Vector{Int} = Vector{Int}([1, 2, 3, 4, 5])::(all(_ .> 0))`,\n  * `num_machines::Int = 1::(_ > 0)`,\n  * `num_threads::Int  = 0::(_ >= 0)`,\n  * `local_listen_port::Int = 12400::(_ > 0)`,\n  * `time_out::Int = 120::(_ > 0)`,\n  * `machine_list_file::String = \"\"`,\n  * `save_binary::Bool = false`,\n  * `device_type::String = \"cpu\"`,\n  * `gpu_use_dp::Bool = false`,\n  * `gpu_platform_id::Int = -1`,\n  * `gpu_device_id::Int = -1`,\n  * `num_gpu::Int = 1`,\n  * `force_col_wise::Bool = false`,\n  * `force_row_wise::Bool = false`,\n  * `truncate_booster::Bool = true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `fitresult`: Fitted model information, contains a `LGBMRegression` object, an empty vector, and the regressor with all its parameters\n\n# Report\n\nThe fields of `report(mach)` are:\n\n  * `training_metrics`: A dictionary containing all training metrics.\n  * `importance`: A `namedtuple` containing:\n\n      * `gain`: The total gain of each split used by the model\n      * `split`: The number of times each feature is used by the model.\n\n# Examples\n\n```julia\n\nusing DataFrames\nusing MLJ\n\n# load the model\nLGBMRegressor = @load LGBMRegressor pkg=LightGBM \n\nX, y = @load_boston # a table and a vector \nX = DataFrame(X)\ntrain, test = partition(collect(eachindex(y)), 0.70, shuffle=true)\n\nfirst(X, 3)\nlgb = LGBMRegressor() # initialise a model with default params\nmach = machine(lgb, X[train, :], y[train]) |> fit!\n\npredict(mach, X[test, :])\n\n# access feature importances\nmodel_report = report(mach)\ngain_importance = model_report.importance.gain\nsplit_importance = model_report.importance.split\n```\n\nSee also [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl) and the unwrapped model type [`LightGBM.LGBMRegression`](@ref)\n"""
 ":name" = "LGBMRegressor"
-":human_name" = "lgbm regressor"
+":human_name" = "LightGBM regressor"
 ":is_supervised" = "`true`"
 ":prediction_type" = ":deterministic"
 ":abstract_type" = "`MLJModelInterface.Deterministic`"