diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml index 42bd9b2..1b8dc09 100644 --- a/src/registry/Metadata.toml +++ b/src/registry/Metadata.toml @@ -542,9 +542,9 @@ [BetaML.NeuralNetworkClassifier] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Count, ScientificTypesBase.Finite}}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractVector{<:Union{ScientificTypesBase.Count, ScientificTypesBase.Finite}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":target_in_fit" = "`true`" @@ -719,6 +719,222 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" +[MLJTransforms.TargetEncoder] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Table`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":target_in_fit" = "`true`" +":is_pure_julia" = "`true`" +":package_name" = "MLJTransforms" +":package_license" = "unknown" +":load_path" = "MLJTransforms.TargetEncoder" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nTargetEncoder\n```\n\nA model type for constructing a target encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nTargetEncoder = @load TargetEncoder pkg=MLJTransforms\n```\n\nDo `model = TargetEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TargetEncoder(features=...)`.\n\n`TargetEncoder` implements target encoding as defined in [1] to encode categorical variables into continuous ones using statistics from the target variable.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous` or `Count` for regression problems and `Multiclass` or `OrderedFactor` for classification problems; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `λ`: Shrinkage hyperparameter used to mix between posterior and prior statistics as described in [1]\n * `m`: An integer hyperparameter to compute shrinkage as described in [1]. If `m=:auto` then m will be computed using\n\nempirical Bayes estimation as described in [1]\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply target encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `task`: Whether the task is `Classification` or `Regression`\n * `y_statistic_given_feat_level`: A dictionary with the necessary statistics to encode each categorical feature. It maps each level in each categorical feature to a statistic computed over the target.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Define the target variable \ny = [\"c1\", \"c2\", \"c3\", \"c1\", \"c2\",]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\ny = coerce(y, Multiclass)\n\nencoder = TargetEncoder(ordered_factor = false, lambda = 1.0, m = 0,)\nmach = fit!(machine(encoder, X, y))\nXnew = transform(mach, X)\n\njulia > schema(Xnew)\n┌───────┬──────────────────┬─────────────────────────────────┐\n│ names │ scitypes │ types │\n├───────┼──────────────────┼─────────────────────────────────┤\n│ A_1 │ Continuous │ Float64 │\n│ A_2 │ Continuous │ Float64 │\n│ A_3 │ Continuous │ Float64 │\n│ B │ Continuous │ Float64 │\n│ C_1 │ Continuous │ Float64 │\n│ C_2 │ Continuous │ Float64 │\n│ C_3 │ Continuous │ Float64 │\n│ D_1 │ Continuous │ Float64 │\n│ D_2 │ Continuous │ Float64 │\n│ D_3 │ Continuous │ Float64 │\n│ E │ OrderedFactor{5} │ CategoricalValue{Int64, UInt32} │\n└───────┴──────────────────┴─────────────────────────────────┘\n```\n\n# Reference\n\n[1] Micci-Barreca, Daniele. “A preprocessing scheme for high-cardinality categorical attributes in classification and prediction problems” SIGKDD Explor. Newsl. 3, 1 (July 2001), 27–32.\n\nSee also [`OneHotEncoder`](@ref)\n""" +":name" = "TargetEncoder" +":human_name" = "target encoder" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :lambda, :m)`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Real\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJTransforms.MissingnessEncoder] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Table`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":target_in_fit" = "`false`" +":is_pure_julia" = "`true`" +":package_name" = "MLJTransforms" +":package_license" = "unknown" +":load_path" = "MLJTransforms.MissingnessEncoder" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nMissingnessEncoder\n```\n\nA model type for constructing a missingness encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMissingnessEncoder = @load MissingnessEncoder pkg=MLJTransforms\n```\n\nDo `model = MissingnessEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MissingnessEncoder(features=...)`.\n\n`MissingnessEncoder` maps any missing level of a categorical feature into a new level (e.g., \"Missing\"). By this, missingness will be treated as a new level by any subsequent model. This assumes that the categorical features have raw types that are in `Char`, `AbstractString`, and `Number`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `label_for_missing::Dict{<:Type, <:Any}()= Dict( AbstractString => \"missing\", Char => 'm', )`: A\n\ndictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and where each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then missing values will be replaced with `\"missing\"` and if the raw type subtypes `Char` then the new value is `'m'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `label_for_missing_given_feature`: A dictionary that for each column, maps `missing` into some value according to `label_for_missing`\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define a table with missing values\nXm = (\n A = categorical([\"Ben\", \"John\", missing, missing, \"Mary\", \"John\", missing]),\n B = [1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C= categorical([7, 5, missing, missing, 10, 0, missing]),\n D = [23, 23, 44, 66, 14, 23, 11],\n E = categorical([missing, 'g', 'r', missing, 'r', 'g', 'p'])\n)\n\nencoder = MissingnessEncoder()\nmach = fit!(machine(encoder, Xm))\nXnew = transform(mach, Xm)\n\njulia> Xnew\n(A = [\"Ben\", \"John\", \"missing\", \"missing\", \"Mary\", \"John\", \"missing\"],\n B = Union{Missing, Float64}[1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C = [7, 5, -1, -1, 10, 0, -1],\n D = [23, 23, 44, 66, 14, 23, 11],\n E = ['m', 'g', 'r', 'm', 'r', 'g', 'p'],)\n\n```\n\nSee also [`CardinalityReducer`](@ref)\n""" +":name" = "MissingnessEncoder" +":human_name" = "missingness encoder" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :label_for_missing)`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Dict{T} where T<:Type\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJTransforms.ContrastEncoder] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Table`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":target_in_fit" = "`false`" +":is_pure_julia" = "`true`" +":package_name" = "MLJTransforms" +":package_license" = "unknown" +":load_path" = "MLJTransforms.ContrastEncoder" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nContrastEncoder\n```\n\nA model type for constructing a contrast encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContrastEncoder = @load ContrastEncoder pkg=MLJTransforms\n```\n\nDo `model = ContrastEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContrastEncoder(features=...)`.\n\n`ContrastEncoder` implements the following contrast encoding methods for categorical features: dummy, sum, backward/forward difference, and Helmert coding. More generally, users can specify a custom contrast or hypothesis matrix, and each feature can be encoded using a different method.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * `mode=:dummy`: The type of encoding to use. Can be one of `:contrast`, `:dummy`, `:sum`, `:backward_diff`, `:forward_diff`, `:helmert` or `:hypothesis`.\n\nIf `ignore=false` (features to be encoded are listed explictly in `features`), then this can be a vector of the same length as `features` to specify a different contrast encoding scheme for each feature\n\n * `buildmatrix=nothing`: A function or other callable with signature `buildmatrix(colname, k)`,\n\nwhere `colname` is the name of the feature levels and `k` is it's length, and which returns contrast or hypothesis matrix with row/column ordering consistent with the ordering of `levels(col)`. Only relevant if `mode` is `:contrast` or `:hypothesis`.\n\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply contrast encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vector_given_value_given_feature`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical dataset\nX = (\n name = categorical([\"Ben\", \"John\", \"Mary\", \"John\"]),\n height = [1.85, 1.67, 1.5, 1.67],\n favnum = categorical([7, 5, 10, 1]),\n age = [23, 23, 14, 23],\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = ContrastEncoder(\n features = [:name, :favnum],\n ignore = false, \n mode = [:dummy, :helmert],\n)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (name_John = [1.0, 0.0, 0.0, 0.0],\n name_Mary = [0.0, 1.0, 0.0, 1.0],\n height = [1.85, 1.67, 1.5, 1.67],\n favnum_5 = [0.0, 1.0, 0.0, -1.0],\n favnum_7 = [2.0, -1.0, 0.0, -1.0],\n favnum_10 = [-1.0, -1.0, 3.0, -1.0],\n age = [23, 23, 14, 23],)\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" +":name" = "ContrastEncoder" +":human_name" = "contrast encoder" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":hyperparameters" = "`(:features, :ignore, :mode, :buildmatrix, :ordered_factor)`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Union{Symbol, AbstractVector{Symbol}}\", \"Any\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJTransforms.FrequencyEncoder] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Table`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":target_in_fit" = "`false`" +":is_pure_julia" = "`true`" +":package_name" = "MLJTransforms" +":package_license" = "unknown" +":load_path" = "MLJTransforms.FrequencyEncoder" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nFrequencyEncoder\n```\n\nA model type for constructing a frequency encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFrequencyEncoder = @load FrequencyEncoder pkg=MLJTransforms\n```\n\nDo `model = FrequencyEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FrequencyEncoder(features=...)`.\n\n`FrequencyEncoder` implements frequency encoding which replaces the categorical values in the specified categorical features with their (normalized or raw) frequencies of occurrence in the dataset. \n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `normalize=false`: Whether to use normalized frequencies that sum to 1 over category values or to use raw counts.\n * `output_type=Float32`: The type of the output values. The default is `Float32`, but you can set it to `Float64` or any other type that can hold the frequency values.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply frequency encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `statistic_given_feat_val`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = FrequencyEncoder(ordered_factor = false, normalize=true)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 2, 2],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [4, 4, 4, 1, 4],\n D = [3, 2, 3, 2, 3],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" +":name" = "FrequencyEncoder" +":human_name" = "frequency encoder" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :normalize, :output_type)`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Bool\", \"Type\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJTransforms.CardinalityReducer] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Table`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":target_in_fit" = "`false`" +":is_pure_julia" = "`true`" +":package_name" = "MLJTransforms" +":package_license" = "unknown" +":load_path" = "MLJTransforms.CardinalityReducer" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nCardinalityReducer\n```\n\nA model type for constructing a cardinality reducer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCardinalityReducer = @load CardinalityReducer pkg=MLJTransforms\n```\n\nDo `model = CardinalityReducer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CardinalityReducer(features=...)`.\n\n`CardinalityReducer` maps any level of a categorical feature that occurs with frequency < `min_frequency` into a new level (e.g., \"Other\"). This is useful when some categorical features have high cardinality and many levels are infrequent. This assumes that the categorical features have raw types that are in `Union{AbstractString, Char, Number}`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `min_frequency::Real=3`: Any level of a categorical feature that occurs with frequency < `min_frequency` will be mapped to a new level. Could be\n\nan integer or a float which decides whether raw counts or normalized frequencies are used.\n\n * `label_for_infrequent::Dict{<:Type, <:Any}()= Dict( AbstractString => \"Other\", Char => 'O', )`: A\n\ndictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then the new value is `\"Other\"` and if the raw type subtypes `Char` then the new value is `'O'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `new_cat_given_col_val`: A dictionary that maps each level in a categorical feature to a new level (either itself or the new level specified in `label_for_infrequent`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define categorical features\nA = [ [\"a\" for i in 1:100]..., \"b\", \"b\", \"b\", \"c\", \"d\"]\nB = [ [0 for i in 1:100]..., 1, 2, 3, 4, 4]\n\n# Combine into a named tuple\nX = (A = A, B = B)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Multiclass\n)\n\nencoder = CardinalityReducer(ordered_factor = false, min_frequency=3)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia> proportionmap(Xnew.A)\nDict{CategoricalArrays.CategoricalValue{String, UInt32}, Float64} with 3 entries:\n \"Other\" => 0.0190476\n \"b\" => 0.0285714\n \"a\" => 0.952381\n\njulia> proportionmap(Xnew.B)\nDict{CategoricalArrays.CategoricalValue{Int64, UInt32}, Float64} with 2 entries:\n 0 => 0.952381\n -1 => 0.047619\n```\n\nSee also [`FrequencyEncoder`](@ref)\n""" +":name" = "CardinalityReducer" +":human_name" = "cardinality reducer" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :min_frequency, :label_for_infrequent)`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Dict{T} where T<:Type\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJTransforms.OrdinalEncoder] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Table`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":target_in_fit" = "`false`" +":is_pure_julia" = "`true`" +":package_name" = "MLJTransforms" +":package_license" = "unknown" +":load_path" = "MLJTransforms.OrdinalEncoder" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nOrdinalEncoder\n```\n\nA model type for constructing a ordinal encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOrdinalEncoder = @load OrdinalEncoder pkg=MLJTransforms\n```\n\nDo `model = OrdinalEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OrdinalEncoder(features=...)`.\n\n`OrdinalEncoder` implements ordinal encoding which replaces the categorical values in the specified categorical features with integers (ordered arbitrarily). This will create an implicit ordering between categories which may not be a proper modelling assumption.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `output_type`: The numerical concrete type of the encoded features. Default is `Float32`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply ordinal encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `index_given_feat_level`: A dictionary that maps each level for each column in a subset of the categorical features of X into an integer.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercion:\nschema(X)\n\nencoder = OrdinalEncoder(ordered_factor = false)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 3, 3],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [1, 1, 1, 2, 1],\n D = [2, 1, 2, 1, 2],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" +":name" = "OrdinalEncoder" +":human_name" = "ordinal encoder" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :output_type)`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Type\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + [CatBoost.CatBoostRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" @@ -854,7 +1070,7 @@ ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mode"] ":hyperparameters" = "`(:K, :algorithm, :metric, :leafsize, :reorder, :weights, :output_type)`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\", \"Type{<:Union{AbstractDict{<:AbstractString, <:AbstractVector}, AbstractDict{Symbol, <:AbstractVector}, NamedTuple{names, T} where {N, names, T<:NTuple{N, AbstractVector}}}}\")`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\", \"Type{<:Union{AbstractDict{<:AbstractString, <:AbstractVector}, AbstractDict{Symbol, <:AbstractVector}, NamedTuple{names, T} where {N, names, T<:Tuple{Vararg{AbstractVector, N}}}}}\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" @@ -4626,7 +4842,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nUnion{Types...}\n```\n\nA `Union` type is an abstract type which includes all instances of any of its argument types. This means that `T <: Union{T,S}` and `S <: Union{T,S}`.\n\nLike other abstract types, it cannot be instantiated, even if all of its arguments are non abstract.\n\n# Examples\n\n```jldoctest\njulia> IntOrString = Union{Int,AbstractString}\nUnion{Int64, AbstractString}\n\njulia> 1 isa IntOrString # instance of Int is included in the union\ntrue\n\njulia> \"Hello!\" isa IntOrString # String is also included\ntrue\n\njulia> 1.0 isa IntOrString # Float64 is not included because it is neither Int nor AbstractString\nfalse\n```\n\n# Extended Help\n\nUnlike most other parametric types, unions are covariant in their parameters. For example, `Union{Real, String}` is a subtype of `Union{Number, AbstractString}`.\n\nThe empty union [`Union{}`](@ref) is the bottom type of Julia.\n""" +":docstring" = """```\nUnion{Types...}\n```\n\nA type union is an abstract type which includes all instances of any of its argument types. The empty union [`Union{}`](@ref) is the bottom type of Julia.\n\n# Examples\n\n```jldoctest\njulia> IntOrString = Union{Int,AbstractString}\nUnion{Int64, AbstractString}\n\njulia> 1 isa IntOrString\ntrue\n\njulia> \"Hello!\" isa IntOrString\ntrue\n\njulia> 1.0 isa IntOrString\nfalse\n```\n""" ":name" = "Stack" ":human_name" = "probabilistic stack" ":is_supervised" = "`true`" @@ -6156,13 +6372,13 @@ ":constructor" = "`nothing`" [MLJText.TfidfTransformer] -":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" ":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":target_in_fit" = "`false`" ":is_pure_julia" = "`true`" ":package_name" = "MLJText" @@ -6192,13 +6408,13 @@ ":constructor" = "`nothing`" [MLJText.CountTransformer] -":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" ":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":target_in_fit" = "`false`" ":is_pure_julia" = "`true`" ":package_name" = "MLJText" @@ -6228,13 +6444,13 @@ ":constructor" = "`nothing`" [MLJText.BM25Transformer] -":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" ":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":target_in_fit" = "`false`" ":is_pure_julia" = "`true`" ":package_name" = "MLJText" @@ -6282,16 +6498,16 @@ ":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLGBMClassifier\n```\n\nA model type for constructing a LightGBM classifier, based on [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLGBMClassifier = @load LGBMClassifier pkg=LightGBM\n```\n\nDo `model = LGBMClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LGBMClassifier(boosting=...)`.\n\n`LightGBM, short for light gradient-boosting machine, is a framework for gradient boosting based on decision tree algorithms and used for classification and other machine learning tasks, with a focus on performance and scalability. This model in particular is used for various types of classification tasks.\n\n# Training data In MLJ or MLJBase, bind an instance `model` to data with\n\nmach = machine(model, X, y) \n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`; alternatively, `X` is any `AbstractMatrix` with `Continuous` elements; check the scitype with `scitype(X)`.\n * y is a vector of targets whose items are of scitype `Continuous`. Check the scitype with scitype(y).\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Hyper-parameters\n\nSee https://lightgbm.readthedocs.io/en/v3.3.5/Parameters.html.\n\nCurrently, the following parameters and their defaults are supported:\n\n * `boosting::String = \"gbdt\"`,\n * `num_iterations::Int = 100::(_ >= 0)`,\n * `learning_rate::Float64 = 0.1::(_ > 0.)`,\n * `num_leaves::Int = 31::(1 < _ <= 131072)`,\n * `max_depth::Int = -1`,\n * `tree_learner::String = \"serial\"`,\n * `histogram_pool_size::Float64 = -1.0`,\n * `min_data_in_leaf::Int = 20::(_ >= 0)`,\n * `min_sum_hessian_in_leaf::Float64 = 1e-3::(_ >= 0.0)`,\n * `max_delta_step::Float64 = 0.0`,\n * `lambda_l1::Float64 = 0.0::(_ >= 0.0)`,\n * `lambda_l2::Float64 = 0.0::(_ >= 0.0)`,\n * `min_gain_to_split::Float64 = 0.0::(_ >= 0.0)`,\n * `feature_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n * `feature_fraction_bynode::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n * `feature_fraction_seed::Int = 2`,\n * `bagging_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n * `bagging_freq::Int = 0::(_ >= 0)`,\n * `bagging_seed::Int = 3`,\n * `early_stopping_round::Int = 0`,\n * `extra_trees::Bool = false`,\n * `extra_seed::Int = 6`,\n * `max_bin::Int = 255::(_ > 1)`,\n * `bin_construct_sample_cnt = 200000::(_ > 0)`,\n * `drop_rate::Float64 = 0.1::(0.0 <= _ <= 1.0)`,\n * `max_drop::Int = 50`,\n * `skip_drop:: Float64 = 0.5::(0.0 <= _ <= 1)`,\n * `xgboost_dart_mode::Bool = false`,\n * `uniform_drop::Bool = false`,\n * `drop_seed::Int = 4`,\n * `top_rate::Float64 = 0.2::(0.0 <= _ <= 1.0)`,\n * `other_rate::Float64 = 0.1::(0.0 <= _ <= 1.0)`,\n * `min_data_per_group::Int = 100::(_ > 0)`,\n * `max_cat_threshold::Int = 32::(_ > 0)`,\n * `cat_l2::Float64 = 10.0::(_ >= 0)`,\n * `cat_smooth::Float64 = 10.0::(_ >= 0)`,\n * `objective::String = \"multiclass\"`,\n * `categorical_feature::Vector{Int} = Vector{Int}()`,\n * `data_random_seed::Int = 1`,\n * `is_sparse::Bool = true`,\n * `is_unbalance::Bool = false`,\n * `boost_from_average::Bool = true`,\n * `use_missing::Bool = true`,\n * `linear_tree::Bool = false`,\n * `feature_pre_filter::Bool = true`,\n * `metric::Vector{String} = [\"none\"]`,\n * `metric_freq::Int = 1::(_ > 0)`,\n * `is_provide_training_metric::Bool = false`,\n * `eval_at::Vector{Int} = Vector{Int}([1, 2, 3, 4, 5])::(all(_ .> 0))`,\n * `num_machines::Int = 1::(_ > 0)`,\n * `num_threads::Int = 0::(_ >= 0)`,\n * `local_listen_port::Int = 12400::(_ > 0)`,\n * `time_out::Int = 120::(_ > 0)`,\n * `machine_list_file::String = \"\"`,\n * `save_binary::Bool = false`,\n * `device_type::String = \"cpu\"`,\n * `gpu_use_dp::Bool = false`,\n * `gpu_platform_id::Int = -1`,\n * `gpu_device_id::Int = -1`,\n * `num_gpu::Int = 1`,\n * `force_col_wise::Bool = false`,\n * `force_row_wise::Bool = false`,\n * `truncate_booster::Bool = true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: Fitted model information, contains a `LGBMClassification` object, a `CategoricalArray` of the input class names, and the classifier with all its parameters\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_metrics`: A dictionary containing all training metrics.\n * `importance`: A `namedtuple` containing:\n\n * `gain`: The total gain of each split used by the model\n * `split`: The number of times each feature is used by the model.\n\n# Examples\n\n```julia\n\nusing DataFrames\nusing MLJ\n\n# load the model\nLGBMClassifier = @load LGBMClassifier pkg=LightGBM \n\nX, y = @load_iris \nX = DataFrame(X)\ntrain, test = partition(collect(eachindex(y)), 0.70, shuffle=true)\n\nfirst(X, 3)\nlgb = LGBMClassifier() # initialise a model with default params\nmach = machine(lgb, X[train, :], y[train]) |> fit!\n\npredict(mach, X[test, :])\n\n# access feature importances\nmodel_report = report(mach)\ngain_importance = model_report.importance.gain\nsplit_importance = model_report.importance.split\n```\n\nSee also [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl) and the unwrapped model type [`LightGBM.LGBMClassification`](@ref)\n""" +":docstring" = """```\nLGBMClassifier\n```\n\nA model type for constructing a LightGBM classifier, based on [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLGBMClassifier = @load LGBMClassifier pkg=LightGBM\n```\n\nDo `model = LGBMClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LGBMClassifier(objective=...)`.\n\n`LightGBM, short for light gradient-boosting machine, is a framework for gradient boosting based on decision tree algorithms and used for classification and other machine learning tasks, with a focus on performance and scalability. This model in particular is used for various types of classification tasks.\n\n# Training data In MLJ or MLJBase, bind an instance `model` to data with\n\nmach = machine(model, X, y) \n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`; alternatively, `X` is any `AbstractMatrix` with `Continuous` elements; check the scitype with `scitype(X)`.\n * y is a vector of targets whose items are of scitype `Continuous`. Check the scitype with scitype(y).\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Hyper-parameters\n\nSee https://lightgbm.readthedocs.io/en/v3.3.5/Parameters.html.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: Fitted model information, contains a `LGBMClassification` object, a `CategoricalArray` of the input class names, and the classifier with all its parameters\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_metrics`: A dictionary containing all training metrics.\n * `importance`: A `namedtuple` containing:\n\n * `gain`: The total gain of each split used by the model\n * `split`: The number of times each feature is used by the model.\n\n# Examples\n\n```julia\n\nusing DataFrames\nusing MLJ\n\n# load the model\nLGBMClassifier = @load LGBMClassifier pkg=LightGBM \n\nX, y = @load_iris \nX = DataFrame(X)\ntrain, test = partition(collect(eachindex(y)), 0.70, shuffle=true)\n\nfirst(X, 3)\nlgb = LGBMClassifier() # initialise a model with default params\nmach = machine(lgb, X[train, :], y[train]) |> fit!\n\npredict(mach, X[test, :])\n\n# access feature importances\nmodel_report = report(mach)\ngain_importance = model_report.importance.gain\nsplit_importance = model_report.importance.split\n```\n\nSee also [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl) and the unwrapped model type [`LightGBM.LGBMClassification`](@ref)\n""" ":name" = "LGBMClassifier" ":human_name" = "LightGBM classifier" ":is_supervised" = "`true`" ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":predict", ":update"] -":hyperparameters" = "`(:boosting, :num_iterations, :learning_rate, :num_leaves, :max_depth, :tree_learner, :histogram_pool_size, :min_data_in_leaf, :min_sum_hessian_in_leaf, :max_delta_step, :lambda_l1, :lambda_l2, :min_gain_to_split, :feature_fraction, :feature_fraction_bynode, :feature_fraction_seed, :bagging_fraction, :pos_bagging_fraction, :neg_bagging_fraction, :bagging_freq, :bagging_seed, :early_stopping_round, :extra_trees, :extra_seed, :max_bin, :bin_construct_sample_cnt, :drop_rate, :max_drop, :skip_drop, :xgboost_dart_mode, :uniform_drop, :drop_seed, :top_rate, :other_rate, :min_data_per_group, :max_cat_threshold, :cat_l2, :cat_smooth, :sigmoid, :objective, :categorical_feature, :data_random_seed, :is_enable_sparse, :is_unbalance, :boost_from_average, :scale_pos_weight, :use_missing, :linear_tree, :feature_pre_filter, :metric, :metric_freq, :is_provide_training_metric, :eval_at, :num_machines, :num_threads, :local_listen_port, :time_out, :machine_list_filename, :save_binary, :device_type, :gpu_use_dp, :gpu_platform_id, :gpu_device_id, :num_gpu, :force_col_wise, :force_row_wise, :truncate_booster)`" -":hyperparameter_types" = "`(\"String\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Vector{Int64}\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"Bool\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:objective, :boosting, :num_iterations, :learning_rate, :num_leaves, :tree_learner, :num_threads, :device_type, :seed, :deterministic, :force_col_wise, :force_row_wise, :histogram_pool_size, :max_depth, :min_data_in_leaf, :min_sum_hessian_in_leaf, :bagging_fraction, :pos_bagging_fraction, :neg_bagging_fraction, :bagging_freq, :bagging_seed, :feature_fraction, :feature_fraction_bynode, :feature_fraction_seed, :extra_trees, :extra_seed, :early_stopping_round, :first_metric_only, :max_delta_step, :lambda_l1, :lambda_l2, :linear_lambda, :min_gain_to_split, :drop_rate, :max_drop, :skip_drop, :xgboost_dart_mode, :uniform_drop, :drop_seed, :top_rate, :other_rate, :min_data_per_group, :max_cat_threshold, :cat_l2, :cat_smooth, :max_cat_to_onehot, :top_k, :monotone_constraints, :monotone_constraints_method, :monotone_penalty, :feature_contri, :forcedsplits_filename, :refit_decay_rate, :cegb_tradeoff, :cegb_penalty_split, :cegb_penalty_feature_lazy, :cegb_penalty_feature_coupled, :path_smooth, :interaction_constraints, :verbosity, :linear_tree, :max_bin, :max_bin_by_feature, :min_data_in_bin, :bin_construct_sample_cnt, :data_random_seed, :is_enable_sparse, :enable_bundle, :use_missing, :zero_as_missing, :feature_pre_filter, :pre_partition, :two_round, :header, :label_column, :weight_column, :ignore_column, :categorical_feature, :forcedbins_filename, :precise_float_parser, :start_iteration_predict, :num_iteration_predict, :predict_raw_score, :predict_leaf_index, :predict_contrib, :predict_disable_shape_check, :pred_early_stop, :pred_early_stop_freq, :pred_early_stop_margin, :is_unbalance, :scale_pos_weight, :sigmoid, :boost_from_average, :metric, :metric_freq, :is_provide_training_metric, :eval_at, :multi_error_top_k, :auc_mu_weights, :num_machines, :local_listen_port, :time_out, :machine_list_filename, :machines, :gpu_platform_id, :gpu_device_id, :gpu_use_dp, :num_gpu, :truncate_booster)`" +":hyperparameter_types" = "`(\"String\", \"String\", \"Int64\", \"Float64\", \"Int64\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Vector{Int64}\", \"String\", \"Float64\", \"Vector{Float64}\", \"String\", \"Float64\", \"Float64\", \"Float64\", \"Vector{Float64}\", \"Vector{Float64}\", \"Float64\", \"Vector{Vector{Int64}}\", \"Int64\", \"Bool\", \"Int64\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"String\", \"String\", \"String\", \"Vector{Int64}\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"Float64\", \"Bool\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Vector{Float64}\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" @@ -6318,16 +6534,16 @@ ":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLGBMRegressor\n```\n\nA model type for constructing a LightGBM regressor, based on [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLGBMRegressor = @load LGBMRegressor pkg=LightGBM\n```\n\nDo `model = LGBMRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LGBMRegressor(boosting=...)`.\n\nLightGBM, short for light gradient-boosting machine, is a framework for gradient boosting based on decision tree algorithms and used for classification, regression and other machine learning tasks, with a focus on performance and scalability. This model in particular is used for various types of regression tasks.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with \n\nmach = machine(model, X, y) \n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`; alternatively, `X` is any `AbstractMatrix` with `Continuous` elements; check the scitype with `scitype(X)`.\n * y is a vector of targets whose items are of scitype `Continuous`. Check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Hyper-parameters\n\nSee https://lightgbm.readthedocs.io/en/v3.3.5/Parameters.html.\n\nCurrently, the following parameters and their defaults are supported:\n\n * `boosting::String = \"gbdt\"`,\n * `num_iterations::Int = 100::(_ >= 0)`,\n * `learning_rate::Float64 = 0.1::(_ > 0.)`,\n * `num_leaves::Int = 31::(1 < _ <= 131072)`,\n * `max_depth::Int = -1`,\n * `tree_learner::String = \"serial\"`,\n * `histogram_pool_size::Float64 = -1.0`,\n * `min_data_in_leaf::Int = 20::(_ >= 0)`,\n * `min_sum_hessian_in_leaf::Float64 = 1e-3::(_ >= 0.0)`,\n * `max_delta_step::Float64 = 0.0`,\n * `lambda_l1::Float64 = 0.0::(_ >= 0.0)`,\n * `lambda_l2::Float64 = 0.0::(_ >= 0.0)`,\n * `min_gain_to_split::Float64 = 0.0::(_ >= 0.0)`,\n * `feature_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n * `feature_fraction_bynode::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n * `feature_fraction_seed::Int = 2`,\n * `bagging_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n * `pos_bagging_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n * `neg_bagging_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n * `bagging_freq::Int = 0::(_ >= 0)`,\n * `bagging_seed::Int = 3`,\n * `early_stopping_round::Int = 0`,\n * `extra_trees::Bool = false`,\n * `extra_seed::Int = 6`,\n * `max_bin::Int = 255::(_ > 1)`,\n * `bin_construct_sample_cnt = 200000::(_ > 0)`,\n * `drop_rate::Float64 = 0.1::(0.0 <= _ <= 1.0)`,\n * `max_drop::Int = 50`,\n * `skip_drop:: Float64 = 0.5::(0.0 <= _ <= 1)`,\n * `xgboost_dart_mode::Bool = false`,\n * `uniform_drop::Bool = false`,\n * `drop_seed::Int = 4`,\n * `top_rate::Float64 = 0.2::(0.0 <= _ <= 1.0)`,\n * `other_rate::Float64 = 0.1::(0.0 <= _ <= 1.0)`,\n * `min_data_per_group::Int = 100::(_ > 0)`,\n * `max_cat_threshold::Int = 32::(_ > 0)`,\n * `cat_l2::Float64 = 10.0::(_ >= 0)`,\n * `cat_smooth::Float64 = 10.0::(_ >= 0)`,\n * `objective::String = \"regression\"`,\n * `categorical_feature::Vector{Int} = Vector{Int}()`,\n * `data_random_seed::Int = 1`,\n * `is_sparse::Bool = true`,\n * `is_unbalance::Bool = false`,\n * `boost_from_average::Bool = true`,\n * `scale_pos_weight::Float64 = 1.0`,\n * `use_missing::Bool = true`,\n * `linear_tree::Bool = false`,\n * `feature_pre_filter::Bool = true`,\n * `alpha::Float64 = 0.9::(_ > 0.0 )`,\n * `metric::Vector{String} = [\"l2\"]`,\n * `metric_freq::Int = 1::(_ > 0)`,\n * `is_provide_training_metric::Bool = false`,\n * `eval_at::Vector{Int} = Vector{Int}([1, 2, 3, 4, 5])::(all(_ .> 0))`,\n * `num_machines::Int = 1::(_ > 0)`,\n * `num_threads::Int = 0::(_ >= 0)`,\n * `local_listen_port::Int = 12400::(_ > 0)`,\n * `time_out::Int = 120::(_ > 0)`,\n * `machine_list_file::String = \"\"`,\n * `save_binary::Bool = false`,\n * `device_type::String = \"cpu\"`,\n * `gpu_use_dp::Bool = false`,\n * `gpu_platform_id::Int = -1`,\n * `gpu_device_id::Int = -1`,\n * `num_gpu::Int = 1`,\n * `force_col_wise::Bool = false`,\n * `force_row_wise::Bool = false`,\n * `truncate_booster::Bool = true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: Fitted model information, contains a `LGBMRegression` object, an empty vector, and the regressor with all its parameters\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_metrics`: A dictionary containing all training metrics.\n * `importance`: A `namedtuple` containing:\n\n * `gain`: The total gain of each split used by the model\n * `split`: The number of times each feature is used by the model.\n\n# Examples\n\n```julia\n\nusing DataFrames\nusing MLJ\n\n# load the model\nLGBMRegressor = @load LGBMRegressor pkg=LightGBM \n\nX, y = @load_boston # a table and a vector \nX = DataFrame(X)\ntrain, test = partition(collect(eachindex(y)), 0.70, shuffle=true)\n\nfirst(X, 3)\nlgb = LGBMRegressor() # initialise a model with default params\nmach = machine(lgb, X[train, :], y[train]) |> fit!\n\npredict(mach, X[test, :])\n\n# access feature importances\nmodel_report = report(mach)\ngain_importance = model_report.importance.gain\nsplit_importance = model_report.importance.split\n```\n\nSee also [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl) and the unwrapped model type [`LightGBM.LGBMRegression`](@ref)\n""" +":docstring" = """```\nLGBMRegressor\n```\n\nA model type for constructing a LightGBM regressor, based on [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLGBMRegressor = @load LGBMRegressor pkg=LightGBM\n```\n\nDo `model = LGBMRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LGBMRegressor(objective=...)`.\n\nLightGBM, short for light gradient-boosting machine, is a framework for gradient boosting based on decision tree algorithms and used for classification, regression and other machine learning tasks, with a focus on performance and scalability. This model in particular is used for various types of regression tasks.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with \n\nmach = machine(model, X, y) \n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`; alternatively, `X` is any `AbstractMatrix` with `Continuous` elements; check the scitype with `scitype(X)`.\n * y is a vector of targets whose items are of scitype `Continuous`. Check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Hyper-parameters\n\nSee https://lightgbm.readthedocs.io/en/v3.3.5/Parameters.html.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: Fitted model information, contains a `LGBMRegression` object, an empty vector, and the regressor with all its parameters\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_metrics`: A dictionary containing all training metrics.\n * `importance`: A `namedtuple` containing:\n\n * `gain`: The total gain of each split used by the model\n * `split`: The number of times each feature is used by the model.\n\n# Examples\n\n```julia\n\nusing DataFrames\nusing MLJ\n\n# load the model\nLGBMRegressor = @load LGBMRegressor pkg=LightGBM \n\nX, y = @load_boston # a table and a vector \nX = DataFrame(X)\ntrain, test = partition(collect(eachindex(y)), 0.70, shuffle=true)\n\nfirst(X, 3)\nlgb = LGBMRegressor() # initialise a model with default params\nmach = machine(lgb, X[train, :], y[train]) |> fit!\n\npredict(mach, X[test, :])\n\n# access feature importances\nmodel_report = report(mach)\ngain_importance = model_report.importance.gain\nsplit_importance = model_report.importance.split\n```\n\nSee also [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl) and the unwrapped model type [`LightGBM.LGBMRegression`](@ref)\n""" ":name" = "LGBMRegressor" ":human_name" = "LightGBM regressor" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":predict", ":update"] -":hyperparameters" = "`(:boosting, :num_iterations, :learning_rate, :num_leaves, :max_depth, :tree_learner, :histogram_pool_size, :min_data_in_leaf, :min_sum_hessian_in_leaf, :max_delta_step, :lambda_l1, :lambda_l2, :min_gain_to_split, :feature_fraction, :feature_fraction_bynode, :feature_fraction_seed, :bagging_fraction, :bagging_freq, :bagging_seed, :early_stopping_round, :extra_trees, :extra_seed, :max_bin, :bin_construct_sample_cnt, :drop_rate, :max_drop, :skip_drop, :xgboost_dart_mode, :uniform_drop, :drop_seed, :top_rate, :other_rate, :min_data_per_group, :max_cat_threshold, :cat_l2, :cat_smooth, :objective, :categorical_feature, :data_random_seed, :is_enable_sparse, :is_unbalance, :boost_from_average, :use_missing, :linear_tree, :feature_pre_filter, :alpha, :metric, :metric_freq, :is_provide_training_metric, :eval_at, :num_machines, :num_threads, :local_listen_port, :time_out, :machine_list_filename, :save_binary, :device_type, :gpu_use_dp, :gpu_platform_id, :gpu_device_id, :num_gpu, :force_col_wise, :force_row_wise, :truncate_booster)`" -":hyperparameter_types" = "`(\"String\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"String\", \"Vector{Int64}\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"Bool\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:objective, :boosting, :num_iterations, :learning_rate, :num_leaves, :tree_learner, :num_threads, :device_type, :seed, :deterministic, :force_col_wise, :force_row_wise, :histogram_pool_size, :max_depth, :min_data_in_leaf, :min_sum_hessian_in_leaf, :bagging_fraction, :bagging_freq, :bagging_seed, :feature_fraction, :feature_fraction_bynode, :feature_fraction_seed, :extra_trees, :extra_seed, :early_stopping_round, :first_metric_only, :max_delta_step, :lambda_l1, :lambda_l2, :linear_lambda, :min_gain_to_split, :drop_rate, :max_drop, :skip_drop, :xgboost_dart_mode, :uniform_drop, :drop_seed, :top_rate, :other_rate, :min_data_per_group, :max_cat_threshold, :cat_l2, :cat_smooth, :max_cat_to_onehot, :top_k, :monotone_constraints, :monotone_constraints_method, :monotone_penalty, :feature_contri, :forcedsplits_filename, :refit_decay_rate, :cegb_tradeoff, :cegb_penalty_split, :cegb_penalty_feature_lazy, :cegb_penalty_feature_coupled, :path_smooth, :interaction_constraints, :verbosity, :linear_tree, :max_bin, :max_bin_by_feature, :min_data_in_bin, :bin_construct_sample_cnt, :data_random_seed, :is_enable_sparse, :enable_bundle, :use_missing, :zero_as_missing, :feature_pre_filter, :pre_partition, :two_round, :header, :label_column, :weight_column, :ignore_column, :categorical_feature, :forcedbins_filename, :precise_float_parser, :start_iteration_predict, :num_iteration_predict, :predict_raw_score, :predict_leaf_index, :predict_contrib, :predict_disable_shape_check, :is_unbalance, :boost_from_average, :reg_sqrt, :alpha, :fair_c, :poisson_max_delta_step, :tweedie_variance_power, :metric, :metric_freq, :is_provide_training_metric, :eval_at, :num_machines, :local_listen_port, :time_out, :machine_list_filename, :machines, :gpu_platform_id, :gpu_device_id, :gpu_use_dp, :num_gpu, :truncate_booster)`" +":hyperparameter_types" = "`(\"String\", \"String\", \"Int64\", \"Float64\", \"Int64\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Vector{Int64}\", \"String\", \"Float64\", \"Vector{Float64}\", \"String\", \"Float64\", \"Float64\", \"Float64\", \"Vector{Float64}\", \"Vector{Float64}\", \"Float64\", \"Vector{Vector{Int64}}\", \"Int64\", \"Bool\", \"Int64\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"String\", \"String\", \"String\", \"Vector{Int64}\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" @@ -6541,9 +6757,9 @@ ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"] -":hyperparameters" = "`(:loss, :metric, :nrounds, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :tree_type, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :tree_type, :rng, :device)`" +":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":nrounds" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" @@ -6556,7 +6772,7 @@ ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Continuous}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":target_in_fit" = "`true`" @@ -6577,9 +6793,9 @@ ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"] -":hyperparameters" = "`(:loss, :metric, :nrounds, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" +":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":nrounds" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" @@ -6592,7 +6808,7 @@ ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Continuous}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":target_in_fit" = "`true`" @@ -6613,9 +6829,9 @@ ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"] -":hyperparameters" = "`(:loss, :metric, :nrounds, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" +":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":nrounds" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" @@ -6649,9 +6865,9 @@ ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"] -":hyperparameters" = "`(:loss, :metric, :nrounds, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" +":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":nrounds" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" @@ -6664,7 +6880,7 @@ ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Count}`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Count}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Count}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Count}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":target_in_fit" = "`true`" @@ -6685,9 +6901,9 @@ ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"] -":hyperparameters" = "`(:loss, :metric, :nrounds, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" +":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":nrounds" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" @@ -6714,16 +6930,16 @@ ":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nSRTestRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on\n[SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSRTestRegressor = @load SRTestRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRTestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SRTestRegressor(defaults=...)`.\n# Hyper-parameters\n\n- `defaults = nothing`\n\n- `binary_operators = nothing`\n\n- `unary_operators = nothing`\n\n- `maxsize = nothing`\n\n- `maxdepth = nothing`\n\n- `expression_spec = nothing`\n\n- `populations = nothing`\n\n- `population_size = nothing`\n\n- `ncycles_per_iteration = nothing`\n\n- `elementwise_loss = nothing`\n\n- `loss_function = nothing`\n\n- `loss_function_expression = nothing`\n\n- `dimensional_constraint_penalty = nothing`\n\n- `parsimony = nothing`\n\n- `constraints = nothing`\n\n- `nested_constraints = nothing`\n\n- `complexity_of_operators = nothing`\n\n- `complexity_of_constants = nothing`\n\n- `complexity_of_variables = nothing`\n\n- `warmup_maxsize_by = nothing`\n\n- `adaptive_parsimony_scaling = nothing`\n\n- `mutation_weights = nothing`\n\n- `crossover_probability = nothing`\n\n- `annealing = nothing`\n\n- `alpha = nothing`\n\n- `tournament_selection_n = nothing`\n\n- `tournament_selection_p = nothing`\n\n- `early_stop_condition = nothing`\n\n- `batching = nothing`\n\n- `batch_size = nothing`\n\n- `dimensionless_constants_only = false`\n\n- `complexity_mapping = nothing`\n\n- `use_frequency = true`\n\n- `use_frequency_in_tournament = true`\n\n- `should_simplify = nothing`\n\n- `perturbation_factor = nothing`\n\n- `probability_negate_constant = nothing`\n\n- `skip_mutation_failures = true`\n\n- `optimizer_algorithm = Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}(LineSearches.InitialStatic{Float64}\n alpha: Float64 1.0\n scaled: Bool false\n, LineSearches.BackTracking{Float64, Int64}\n c_1: Float64 0.0001\n ρ_hi: Float64 0.5\n ρ_lo: Float64 0.1\n iterations: Int64 1000\n order: Int64 3\n maxstep: Float64 Inf\n cache: Nothing nothing\n, nothing, nothing, Optim.Flat())`\n\n- `optimizer_nrestarts = 2`\n\n- `optimizer_probability = 0.14`\n\n- `optimizer_iterations = nothing`\n\n- `optimizer_f_calls_limit = nothing`\n\n- `optimizer_options = nothing`\n\n- `should_optimize_constants = true`\n\n- `migration = true`\n\n- `hof_migration = true`\n\n- `fraction_replaced = nothing`\n\n- `fraction_replaced_hof = nothing`\n\n- `topn = nothing`\n\n- `timeout_in_seconds = nothing`\n\n- `max_evals = nothing`\n\n- `input_stream = Base.TTY(RawFD(9) paused, 0 bytes waiting)`\n\n- `turbo = false`\n\n- `bumper = false`\n\n- `autodiff_backend = nothing`\n\n- `deterministic = false`\n\n- `seed = nothing`\n\n- `verbosity = nothing`\n\n- `print_precision = 5`\n\n- `progress = nothing`\n\n- `output_directory = nothing`\n\n- `save_to_file = true`\n\n- `bin_constraints = nothing`\n\n- `una_constraints = nothing`\n\n- `terminal_width = nothing`\n\n- `use_recorder = false`\n\n- `recorder_file = pysr_recorder.json`\n\n- `define_helper_functions = true`\n\n- `expression_type = nothing`\n\n- `expression_options = nothing`\n\n- `node_type = nothing`\n\n- `output_file = nothing`\n\n- `fast_cycle = false`\n\n- `npopulations = nothing`\n\n- `npop = nothing`\n\n- `niterations = 1`\n\n- `parallelism = multithreading`\n\n- `numprocs = nothing`\n\n- `procs = nothing`\n\n- `addprocs_function = nothing`\n\n- `heap_size_hint_in_bytes = nothing`\n\n- `worker_imports = nothing`\n\n- `logger = nothing`\n\n- `runtests = true`\n\n- `run_id = nothing`\n\n- `loss_type = Nothing`\n\n- `selection_method = choose_best`\n\n- `dimensions_type = DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}`\n\n""" +":docstring" = """```\nSRTestRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on\n[SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSRTestRegressor = @load SRTestRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRTestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SRTestRegressor(defaults=...)`.\n# Hyper-parameters\n\n- `defaults = nothing`\n\n- `binary_operators = nothing`\n\n- `unary_operators = nothing`\n\n- `maxsize = nothing`\n\n- `maxdepth = nothing`\n\n- `expression_spec = nothing`\n\n- `populations = nothing`\n\n- `population_size = nothing`\n\n- `ncycles_per_iteration = nothing`\n\n- `elementwise_loss = nothing`\n\n- `loss_function = nothing`\n\n- `loss_function_expression = nothing`\n\n- `dimensional_constraint_penalty = nothing`\n\n- `parsimony = nothing`\n\n- `constraints = nothing`\n\n- `nested_constraints = nothing`\n\n- `complexity_of_operators = nothing`\n\n- `complexity_of_constants = nothing`\n\n- `complexity_of_variables = nothing`\n\n- `warmup_maxsize_by = nothing`\n\n- `adaptive_parsimony_scaling = nothing`\n\n- `operator_enum_constructor = nothing`\n\n- `mutation_weights = nothing`\n\n- `crossover_probability = nothing`\n\n- `annealing = nothing`\n\n- `alpha = nothing`\n\n- `tournament_selection_n = nothing`\n\n- `tournament_selection_p = nothing`\n\n- `early_stop_condition = nothing`\n\n- `batching = nothing`\n\n- `batch_size = nothing`\n\n- `dimensionless_constants_only = false`\n\n- `loss_scale = log`\n\n- `complexity_mapping = nothing`\n\n- `use_frequency = true`\n\n- `use_frequency_in_tournament = true`\n\n- `should_simplify = nothing`\n\n- `perturbation_factor = nothing`\n\n- `probability_negate_constant = nothing`\n\n- `skip_mutation_failures = true`\n\n- `optimizer_algorithm = Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}(LineSearches.InitialStatic{Float64}\n alpha: Float64 1.0\n scaled: Bool false\n, LineSearches.BackTracking{Float64, Int64}\n c_1: Float64 0.0001\n ρ_hi: Float64 0.5\n ρ_lo: Float64 0.1\n iterations: Int64 1000\n order: Int64 3\n maxstep: Float64 Inf\n cache: Nothing nothing\n, nothing, nothing, Optim.Flat())`\n\n- `optimizer_nrestarts = 2`\n\n- `optimizer_probability = 0.14`\n\n- `optimizer_iterations = nothing`\n\n- `optimizer_f_calls_limit = nothing`\n\n- `optimizer_options = nothing`\n\n- `should_optimize_constants = true`\n\n- `migration = true`\n\n- `hof_migration = true`\n\n- `fraction_replaced = nothing`\n\n- `fraction_replaced_hof = nothing`\n\n- `topn = nothing`\n\n- `timeout_in_seconds = nothing`\n\n- `max_evals = nothing`\n\n- `input_stream = Base.TTY(RawFD(11) paused, 0 bytes waiting)`\n\n- `turbo = false`\n\n- `bumper = false`\n\n- `autodiff_backend = nothing`\n\n- `deterministic = false`\n\n- `seed = nothing`\n\n- `verbosity = nothing`\n\n- `print_precision = 5`\n\n- `progress = nothing`\n\n- `output_directory = nothing`\n\n- `save_to_file = true`\n\n- `bin_constraints = nothing`\n\n- `una_constraints = nothing`\n\n- `terminal_width = nothing`\n\n- `use_recorder = false`\n\n- `recorder_file = pysr_recorder.json`\n\n- `define_helper_functions = true`\n\n- `expression_type = nothing`\n\n- `expression_options = nothing`\n\n- `node_type = nothing`\n\n- `output_file = nothing`\n\n- `fast_cycle = false`\n\n- `npopulations = nothing`\n\n- `npop = nothing`\n\n- `niterations = 1`\n\n- `parallelism = multithreading`\n\n- `numprocs = nothing`\n\n- `procs = nothing`\n\n- `addprocs_function = nothing`\n\n- `heap_size_hint_in_bytes = nothing`\n\n- `worker_imports = nothing`\n\n- `logger = nothing`\n\n- `runtests = true`\n\n- `run_id = nothing`\n\n- `loss_type = Nothing`\n\n- `selection_method = choose_best`\n\n- `dimensions_type = DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}`\n\n""" ":name" = "SRTestRegressor" ":human_name" = "Symbolic Regression via Evolutionary Search" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [] -":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" -":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" +":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" @@ -6750,16 +6966,16 @@ ":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMultitargetSRTestRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on\n[SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMultitargetSRTestRegressor = @load MultitargetSRTestRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRTestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MultitargetSRTestRegressor(defaults=...)`.\n# Hyper-parameters\n\n- `defaults = nothing`\n\n- `binary_operators = nothing`\n\n- `unary_operators = nothing`\n\n- `maxsize = nothing`\n\n- `maxdepth = nothing`\n\n- `expression_spec = nothing`\n\n- `populations = nothing`\n\n- `population_size = nothing`\n\n- `ncycles_per_iteration = nothing`\n\n- `elementwise_loss = nothing`\n\n- `loss_function = nothing`\n\n- `loss_function_expression = nothing`\n\n- `dimensional_constraint_penalty = nothing`\n\n- `parsimony = nothing`\n\n- `constraints = nothing`\n\n- `nested_constraints = nothing`\n\n- `complexity_of_operators = nothing`\n\n- `complexity_of_constants = nothing`\n\n- `complexity_of_variables = nothing`\n\n- `warmup_maxsize_by = nothing`\n\n- `adaptive_parsimony_scaling = nothing`\n\n- `mutation_weights = nothing`\n\n- `crossover_probability = nothing`\n\n- `annealing = nothing`\n\n- `alpha = nothing`\n\n- `tournament_selection_n = nothing`\n\n- `tournament_selection_p = nothing`\n\n- `early_stop_condition = nothing`\n\n- `batching = nothing`\n\n- `batch_size = nothing`\n\n- `dimensionless_constants_only = false`\n\n- `complexity_mapping = nothing`\n\n- `use_frequency = true`\n\n- `use_frequency_in_tournament = true`\n\n- `should_simplify = nothing`\n\n- `perturbation_factor = nothing`\n\n- `probability_negate_constant = nothing`\n\n- `skip_mutation_failures = true`\n\n- `optimizer_algorithm = Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}(LineSearches.InitialStatic{Float64}\n alpha: Float64 1.0\n scaled: Bool false\n, LineSearches.BackTracking{Float64, Int64}\n c_1: Float64 0.0001\n ρ_hi: Float64 0.5\n ρ_lo: Float64 0.1\n iterations: Int64 1000\n order: Int64 3\n maxstep: Float64 Inf\n cache: Nothing nothing\n, nothing, nothing, Optim.Flat())`\n\n- `optimizer_nrestarts = 2`\n\n- `optimizer_probability = 0.14`\n\n- `optimizer_iterations = nothing`\n\n- `optimizer_f_calls_limit = nothing`\n\n- `optimizer_options = nothing`\n\n- `should_optimize_constants = true`\n\n- `migration = true`\n\n- `hof_migration = true`\n\n- `fraction_replaced = nothing`\n\n- `fraction_replaced_hof = nothing`\n\n- `topn = nothing`\n\n- `timeout_in_seconds = nothing`\n\n- `max_evals = nothing`\n\n- `input_stream = Base.TTY(RawFD(9) paused, 0 bytes waiting)`\n\n- `turbo = false`\n\n- `bumper = false`\n\n- `autodiff_backend = nothing`\n\n- `deterministic = false`\n\n- `seed = nothing`\n\n- `verbosity = nothing`\n\n- `print_precision = 5`\n\n- `progress = nothing`\n\n- `output_directory = nothing`\n\n- `save_to_file = true`\n\n- `bin_constraints = nothing`\n\n- `una_constraints = nothing`\n\n- `terminal_width = nothing`\n\n- `use_recorder = false`\n\n- `recorder_file = pysr_recorder.json`\n\n- `define_helper_functions = true`\n\n- `expression_type = nothing`\n\n- `expression_options = nothing`\n\n- `node_type = nothing`\n\n- `output_file = nothing`\n\n- `fast_cycle = false`\n\n- `npopulations = nothing`\n\n- `npop = nothing`\n\n- `niterations = 1`\n\n- `parallelism = multithreading`\n\n- `numprocs = nothing`\n\n- `procs = nothing`\n\n- `addprocs_function = nothing`\n\n- `heap_size_hint_in_bytes = nothing`\n\n- `worker_imports = nothing`\n\n- `logger = nothing`\n\n- `runtests = true`\n\n- `run_id = nothing`\n\n- `loss_type = Nothing`\n\n- `selection_method = choose_best`\n\n- `dimensions_type = DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}`\n\n""" +":docstring" = """```\nMultitargetSRTestRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on\n[SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMultitargetSRTestRegressor = @load MultitargetSRTestRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRTestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MultitargetSRTestRegressor(defaults=...)`.\n# Hyper-parameters\n\n- `defaults = nothing`\n\n- `binary_operators = nothing`\n\n- `unary_operators = nothing`\n\n- `maxsize = nothing`\n\n- `maxdepth = nothing`\n\n- `expression_spec = nothing`\n\n- `populations = nothing`\n\n- `population_size = nothing`\n\n- `ncycles_per_iteration = nothing`\n\n- `elementwise_loss = nothing`\n\n- `loss_function = nothing`\n\n- `loss_function_expression = nothing`\n\n- `dimensional_constraint_penalty = nothing`\n\n- `parsimony = nothing`\n\n- `constraints = nothing`\n\n- `nested_constraints = nothing`\n\n- `complexity_of_operators = nothing`\n\n- `complexity_of_constants = nothing`\n\n- `complexity_of_variables = nothing`\n\n- `warmup_maxsize_by = nothing`\n\n- `adaptive_parsimony_scaling = nothing`\n\n- `operator_enum_constructor = nothing`\n\n- `mutation_weights = nothing`\n\n- `crossover_probability = nothing`\n\n- `annealing = nothing`\n\n- `alpha = nothing`\n\n- `tournament_selection_n = nothing`\n\n- `tournament_selection_p = nothing`\n\n- `early_stop_condition = nothing`\n\n- `batching = nothing`\n\n- `batch_size = nothing`\n\n- `dimensionless_constants_only = false`\n\n- `loss_scale = log`\n\n- `complexity_mapping = nothing`\n\n- `use_frequency = true`\n\n- `use_frequency_in_tournament = true`\n\n- `should_simplify = nothing`\n\n- `perturbation_factor = nothing`\n\n- `probability_negate_constant = nothing`\n\n- `skip_mutation_failures = true`\n\n- `optimizer_algorithm = Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}(LineSearches.InitialStatic{Float64}\n alpha: Float64 1.0\n scaled: Bool false\n, LineSearches.BackTracking{Float64, Int64}\n c_1: Float64 0.0001\n ρ_hi: Float64 0.5\n ρ_lo: Float64 0.1\n iterations: Int64 1000\n order: Int64 3\n maxstep: Float64 Inf\n cache: Nothing nothing\n, nothing, nothing, Optim.Flat())`\n\n- `optimizer_nrestarts = 2`\n\n- `optimizer_probability = 0.14`\n\n- `optimizer_iterations = nothing`\n\n- `optimizer_f_calls_limit = nothing`\n\n- `optimizer_options = nothing`\n\n- `should_optimize_constants = true`\n\n- `migration = true`\n\n- `hof_migration = true`\n\n- `fraction_replaced = nothing`\n\n- `fraction_replaced_hof = nothing`\n\n- `topn = nothing`\n\n- `timeout_in_seconds = nothing`\n\n- `max_evals = nothing`\n\n- `input_stream = Base.TTY(RawFD(11) paused, 0 bytes waiting)`\n\n- `turbo = false`\n\n- `bumper = false`\n\n- `autodiff_backend = nothing`\n\n- `deterministic = false`\n\n- `seed = nothing`\n\n- `verbosity = nothing`\n\n- `print_precision = 5`\n\n- `progress = nothing`\n\n- `output_directory = nothing`\n\n- `save_to_file = true`\n\n- `bin_constraints = nothing`\n\n- `una_constraints = nothing`\n\n- `terminal_width = nothing`\n\n- `use_recorder = false`\n\n- `recorder_file = pysr_recorder.json`\n\n- `define_helper_functions = true`\n\n- `expression_type = nothing`\n\n- `expression_options = nothing`\n\n- `node_type = nothing`\n\n- `output_file = nothing`\n\n- `fast_cycle = false`\n\n- `npopulations = nothing`\n\n- `npop = nothing`\n\n- `niterations = 1`\n\n- `parallelism = multithreading`\n\n- `numprocs = nothing`\n\n- `procs = nothing`\n\n- `addprocs_function = nothing`\n\n- `heap_size_hint_in_bytes = nothing`\n\n- `worker_imports = nothing`\n\n- `logger = nothing`\n\n- `runtests = true`\n\n- `run_id = nothing`\n\n- `loss_type = Nothing`\n\n- `selection_method = choose_best`\n\n- `dimensions_type = DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}`\n\n""" ":name" = "MultitargetSRTestRegressor" ":human_name" = "Multi-Target Symbolic Regression via Evolutionary Search" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [] -":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" -":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" +":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" @@ -6786,16 +7002,16 @@ ":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMultitargetSRRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetSRRegressor(defaults=...)`.\n\nMulti-target Symbolic Regression regressor (`MultitargetSRRegressor`) conducts several searches for expressions that predict each target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype\n\n`Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n\n * `y` is the target, which can be any table of target variables whose element scitype is `Continuous`; check the scitype with `schema(y)`. Units in columns of `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`. The same weights are used for all targets.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of lists of trained models. The models chosen from each of these lists is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `defaults`: What set of defaults to use for `Options`. The default, `nothing`, will simply take the default options from the current version of SymbolicRegression. However, you may also select the defaults from an earlier version, such as `v\"0.24.5\"`.\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::AbstractExpressionNode{T}`, `dataset::Dataset{T}`, and `options::AbstractOptions`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `loss_function_expression`: Similar to `loss_function`, but takes `AbstractExpression` instead of `AbstractExpressionNode` as its first argument. Useful for `TemplateExpressionSpec`.\n * `expression_spec::AbstractExpressionSpec`: A specification of what types of expressions to use in the search. For example, `ExpressionSpec()` (default). You can also see `TemplateExpressionSpec` and `ParametricExpressionSpec` for specialized cases.\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to use of a variable, which can also be a vector indicating different per-variable complexity. By default, this is 1.\n * `complexity_mapping`: Alternatively, you can pass a function that takes the expression as input and returns the complexity. Make sure that this operates on `AbstractExpression` (and unpacks to `AbstractExpressionNode`), and returns an integer.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `dimensionless_constants_only`: Whether to only allow dimensionless constants.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is `Optim.BFGS(linesearch=LineSearches.BackTracking())`.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_probability`: Probability of performing optimization of constants at the end of a given iteration.\n * `optimizer_iterations`: How many optimization iterations to perform. This gets passed to `Optim.Options` as `iterations`. The default is 8.\n * `optimizer_f_calls_limit`: How many function calls to allow during optimization. This gets passed to `Optim.Options` as `f_calls_limit`. The default is `10_000`.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `autodiff_backend`: The backend to use for differentiation, which should be an instance of `AbstractADType` (see `ADTypes.jl`). Default is `nothing`, which means `Optim.jl` will estimate gradients (likely with finite differences). You can also pass a symbolic version of the backend type, such as `:Zygote` for Zygote, `:Enzyme`, etc. Most backends will not work, and many will never work due to incompatibilities, though support for some is gradually being added.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` (or any `AbstractMutationWeights`) should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `output_directory`: The base directory to save output files to. Files will be saved in a subdirectory according to the run ID. By default, this is `./outputs`.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `input_stream`: the stream to read user input from. By default, this is `stdin`. If you encounter issues with reading from `stdin`, like a hang, you can simply pass `devnull` to this argument.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `worker_imports::Union{Vector{Symbol},Nothing}=nothing`: If you want to import additional modules on each worker, pass them here as a vector of symbols. By default some of the extensions will automatically be loaded when needed.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `run_id::Union{String,Nothing}=nothing`: A unique identifier for the run. This will be used to store outputs from the run in the `outputs` directory. If not specified, a unique ID will be generated.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity. `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Vector{Int}}`: The complexity of each expression in each Pareto frontier.\n * `losses::Vector{Vector{L}}`: The loss of each expression in each Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{Vector{L}}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\nX = (a=rand(100), b=rand(100), c=rand(100))\nY = (y1=(@. cos(X.c) * 2.1 - 0.9), y2=(@. X.a * X.b + X.c))\nmodel = MultitargetSRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, Y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equations used:\nr = report(mach)\nfor (output_index, (eq, i)) in enumerate(zip(r.equation_strings, r.best_idx))\n println(\"Equation used for \", output_index, \": \", eq[i])\nend\n```\n\nSee also [`SRRegressor`](@ref).\n""" +":docstring" = """```\nMultitargetSRRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetSRRegressor(defaults=...)`.\n\nMulti-target Symbolic Regression regressor (`MultitargetSRRegressor`) conducts several searches for expressions that predict each target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype\n\n`Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n\n * `y` is the target, which can be any table of target variables whose element scitype is `Continuous`; check the scitype with `schema(y)`. Units in columns of `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`. The same weights are used for all targets.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of lists of trained models. The models chosen from each of these lists is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `defaults`: What set of defaults to use for `Options`. The default, `nothing`, will simply take the default options from the current version of SymbolicRegression. However, you may also select the defaults from an earlier version, such as `v\"0.24.5\"`.\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `operator_enum_constructor`: Constructor function to use for creating the operators enum. By default, OperatorEnum is used, but you can provide a different constructor like GenericOperatorEnum. The constructor must accept the keyword arguments 'binary*operators' and 'unary*operators'.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::AbstractExpressionNode{T}`, `dataset::Dataset{T}`, and `options::AbstractOptions`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `loss_function_expression`: Similar to `loss_function`, but takes `AbstractExpression` instead of `AbstractExpressionNode` as its first argument. Useful for `TemplateExpressionSpec`.\n * `loss_scale`: Determines how loss values are scaled when computing scores. Options are:\n\n * `:log` (default): Uses logarithmic scaling of loss ratios. This mode requires non-negative loss values and is ideal for traditional loss functions that are always positive.\n * `:linear`: Uses direct differences between losses. This mode handles any loss values (including negative) and is useful for custom loss functions, especially those based on likelihoods.\n * `expression_spec::AbstractExpressionSpec`: A specification of what types of expressions to use in the search. For example, `ExpressionSpec()` (default). You can also see `TemplateExpressionSpec` and `ParametricExpressionSpec` for specialized cases.\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to use of a variable, which can also be a vector indicating different per-variable complexity. By default, this is 1.\n * `complexity_mapping`: Alternatively, you can pass a function that takes the expression as input and returns the complexity. Make sure that this operates on `AbstractExpression` (and unpacks to `AbstractExpressionNode`), and returns an integer.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `dimensionless_constants_only`: Whether to only allow dimensionless constants.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is `Optim.BFGS(linesearch=LineSearches.BackTracking())`.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_probability`: Probability of performing optimization of constants at the end of a given iteration.\n * `optimizer_iterations`: How many optimization iterations to perform. This gets passed to `Optim.Options` as `iterations`. The default is 8.\n * `optimizer_f_calls_limit`: How many function calls to allow during optimization. This gets passed to `Optim.Options` as `f_calls_limit`. The default is `10_000`.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `autodiff_backend`: The backend to use for differentiation, which should be an instance of `AbstractADType` (see `ADTypes.jl`). Default is `nothing`, which means `Optim.jl` will estimate gradients (likely with finite differences). You can also pass a symbolic version of the backend type, such as `:Zygote` for Zygote, `:Enzyme`, etc. Most backends will not work, and many will never work due to incompatibilities, though support for some is gradually being added.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` (or any `AbstractMutationWeights`) should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `output_directory`: The base directory to save output files to. Files will be saved in a subdirectory according to the run ID. By default, this is `./outputs`.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `input_stream`: the stream to read user input from. By default, this is `stdin`. If you encounter issues with reading from `stdin`, like a hang, you can simply pass `devnull` to this argument.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `worker_imports::Union{Vector{Symbol},Nothing}=nothing`: If you want to import additional modules on each worker, pass them here as a vector of symbols. By default some of the extensions will automatically be loaded when needed.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `run_id::Union{String,Nothing}=nothing`: A unique identifier for the run. This will be used to store outputs from the run in the `outputs` directory. If not specified, a unique ID will be generated.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity. `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Vector{Int}}`: The complexity of each expression in each Pareto frontier.\n * `losses::Vector{Vector{L}}`: The loss of each expression in each Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{Vector{L}}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\nX = (a=rand(100), b=rand(100), c=rand(100))\nY = (y1=(@. cos(X.c) * 2.1 - 0.9), y2=(@. X.a * X.b + X.c))\nmodel = MultitargetSRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, Y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equations used:\nr = report(mach)\nfor (output_index, (eq, i)) in enumerate(zip(r.equation_strings, r.best_idx))\n println(\"Equation used for \", output_index, \": \", eq[i])\nend\n```\n\nSee also [`SRRegressor`](@ref).\n""" ":name" = "MultitargetSRRegressor" ":human_name" = "Multi-Target Symbolic Regression via Evolutionary Search" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [] -":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" -":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" +":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" @@ -6822,16 +7038,16 @@ ":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nSRRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SRRegressor(defaults=...)`.\n\nSingle-target Symbolic Regression regressor (`SRRegressor`) searches for symbolic expressions that predict a single target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`. Units in `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of trained models. The model chosen from this list is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `defaults`: What set of defaults to use for `Options`. The default, `nothing`, will simply take the default options from the current version of SymbolicRegression. However, you may also select the defaults from an earlier version, such as `v\"0.24.5\"`.\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::AbstractExpressionNode{T}`, `dataset::Dataset{T}`, and `options::AbstractOptions`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `loss_function_expression`: Similar to `loss_function`, but takes `AbstractExpression` instead of `AbstractExpressionNode` as its first argument. Useful for `TemplateExpressionSpec`.\n * `expression_spec::AbstractExpressionSpec`: A specification of what types of expressions to use in the search. For example, `ExpressionSpec()` (default). You can also see `TemplateExpressionSpec` and `ParametricExpressionSpec` for specialized cases.\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to use of a variable, which can also be a vector indicating different per-variable complexity. By default, this is 1.\n * `complexity_mapping`: Alternatively, you can pass a function that takes the expression as input and returns the complexity. Make sure that this operates on `AbstractExpression` (and unpacks to `AbstractExpressionNode`), and returns an integer.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `dimensionless_constants_only`: Whether to only allow dimensionless constants.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is `Optim.BFGS(linesearch=LineSearches.BackTracking())`.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_probability`: Probability of performing optimization of constants at the end of a given iteration.\n * `optimizer_iterations`: How many optimization iterations to perform. This gets passed to `Optim.Options` as `iterations`. The default is 8.\n * `optimizer_f_calls_limit`: How many function calls to allow during optimization. This gets passed to `Optim.Options` as `f_calls_limit`. The default is `10_000`.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `autodiff_backend`: The backend to use for differentiation, which should be an instance of `AbstractADType` (see `ADTypes.jl`). Default is `nothing`, which means `Optim.jl` will estimate gradients (likely with finite differences). You can also pass a symbolic version of the backend type, such as `:Zygote` for Zygote, `:Enzyme`, etc. Most backends will not work, and many will never work due to incompatibilities, though support for some is gradually being added.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` (or any `AbstractMutationWeights`) should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `output_directory`: The base directory to save output files to. Files will be saved in a subdirectory according to the run ID. By default, this is `./outputs`.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `input_stream`: the stream to read user input from. By default, this is `stdin`. If you encounter issues with reading from `stdin`, like a hang, you can simply pass `devnull` to this argument.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `worker_imports::Union{Vector{Symbol},Nothing}=nothing`: If you want to import additional modules on each worker, pass them here as a vector of symbols. By default some of the extensions will automatically be loaded when needed.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `run_id::Union{String,Nothing}=nothing`: A unique identifier for the run. This will be used to store outputs from the run in the `outputs` directory. If not specified, a unique ID will be generated.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity).\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Int}`: The complexity of each expression in the Pareto frontier.\n * `losses::Vector{L}`: The loss of each expression in the Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{L}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\nX, y = @load_boston\nmodel = SRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nWith units and variable names:\n\n```julia\nusing MLJ\nusing DynamicQuantities\nSRegressor = @load SRRegressor pkg=SymbolicRegression\n\nX = (; x1=rand(32) .* us\"km/h\", x2=rand(32) .* us\"km\")\ny = @. X.x2 / X.x1 + 0.5us\"h\"\nmodel = SRRegressor(binary_operators=[+, -, *, /])\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nSee also [`MultitargetSRRegressor`](@ref).\n""" +":docstring" = """```\nSRRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SRRegressor(defaults=...)`.\n\nSingle-target Symbolic Regression regressor (`SRRegressor`) searches for symbolic expressions that predict a single target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`. Units in `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of trained models. The model chosen from this list is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `defaults`: What set of defaults to use for `Options`. The default, `nothing`, will simply take the default options from the current version of SymbolicRegression. However, you may also select the defaults from an earlier version, such as `v\"0.24.5\"`.\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `operator_enum_constructor`: Constructor function to use for creating the operators enum. By default, OperatorEnum is used, but you can provide a different constructor like GenericOperatorEnum. The constructor must accept the keyword arguments 'binary*operators' and 'unary*operators'.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::AbstractExpressionNode{T}`, `dataset::Dataset{T}`, and `options::AbstractOptions`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `loss_function_expression`: Similar to `loss_function`, but takes `AbstractExpression` instead of `AbstractExpressionNode` as its first argument. Useful for `TemplateExpressionSpec`.\n * `loss_scale`: Determines how loss values are scaled when computing scores. Options are:\n\n * `:log` (default): Uses logarithmic scaling of loss ratios. This mode requires non-negative loss values and is ideal for traditional loss functions that are always positive.\n * `:linear`: Uses direct differences between losses. This mode handles any loss values (including negative) and is useful for custom loss functions, especially those based on likelihoods.\n * `expression_spec::AbstractExpressionSpec`: A specification of what types of expressions to use in the search. For example, `ExpressionSpec()` (default). You can also see `TemplateExpressionSpec` and `ParametricExpressionSpec` for specialized cases.\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to use of a variable, which can also be a vector indicating different per-variable complexity. By default, this is 1.\n * `complexity_mapping`: Alternatively, you can pass a function that takes the expression as input and returns the complexity. Make sure that this operates on `AbstractExpression` (and unpacks to `AbstractExpressionNode`), and returns an integer.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `dimensionless_constants_only`: Whether to only allow dimensionless constants.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is `Optim.BFGS(linesearch=LineSearches.BackTracking())`.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_probability`: Probability of performing optimization of constants at the end of a given iteration.\n * `optimizer_iterations`: How many optimization iterations to perform. This gets passed to `Optim.Options` as `iterations`. The default is 8.\n * `optimizer_f_calls_limit`: How many function calls to allow during optimization. This gets passed to `Optim.Options` as `f_calls_limit`. The default is `10_000`.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `autodiff_backend`: The backend to use for differentiation, which should be an instance of `AbstractADType` (see `ADTypes.jl`). Default is `nothing`, which means `Optim.jl` will estimate gradients (likely with finite differences). You can also pass a symbolic version of the backend type, such as `:Zygote` for Zygote, `:Enzyme`, etc. Most backends will not work, and many will never work due to incompatibilities, though support for some is gradually being added.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` (or any `AbstractMutationWeights`) should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `output_directory`: The base directory to save output files to. Files will be saved in a subdirectory according to the run ID. By default, this is `./outputs`.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `input_stream`: the stream to read user input from. By default, this is `stdin`. If you encounter issues with reading from `stdin`, like a hang, you can simply pass `devnull` to this argument.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `worker_imports::Union{Vector{Symbol},Nothing}=nothing`: If you want to import additional modules on each worker, pass them here as a vector of symbols. By default some of the extensions will automatically be loaded when needed.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `run_id::Union{String,Nothing}=nothing`: A unique identifier for the run. This will be used to store outputs from the run in the `outputs` directory. If not specified, a unique ID will be generated.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity).\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Int}`: The complexity of each expression in the Pareto frontier.\n * `losses::Vector{L}`: The loss of each expression in the Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{L}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\nX, y = @load_boston\nmodel = SRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nWith units and variable names:\n\n```julia\nusing MLJ\nusing DynamicQuantities\nSRegressor = @load SRRegressor pkg=SymbolicRegression\n\nX = (; x1=rand(32) .* us\"km/h\", x2=rand(32) .* us\"km\")\ny = @. X.x2 / X.x1 + 0.5us\"h\"\nmodel = SRRegressor(binary_operators=[+, -, *, /])\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nSee also [`MultitargetSRRegressor`](@ref).\n""" ":name" = "SRRegressor" ":human_name" = "Symbolic Regression via Evolutionary Search" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [] -":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" -":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" +":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" @@ -7038,7 +7254,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContinuousEncoder = @load ContinuousEncoder pkg=MLJModels\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (columns) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping columns) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" +":docstring" = """```\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContinuousEncoder = @load ContinuousEncoder pkg=unknown\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (features) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping features) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. features can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" ":name" = "ContinuousEncoder" ":human_name" = "continuous encoder" ":is_supervised" = "`false`" @@ -7110,7 +7326,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nInteractionTransformer = @load InteractionTransformer pkg=MLJModels\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n""" +":docstring" = """```\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nInteractionTransformer = @load InteractionTransformer pkg=unknown\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n""" ":name" = "InteractionTransformer" ":human_name" = "interaction transformer" ":is_supervised" = "`false`" @@ -7254,7 +7470,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFillImputer = @load FillImputer pkg=MLJModels\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (columns) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n""" +":docstring" = """```\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFillImputer = @load FillImputer pkg=unknown\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose features each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (features) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n""" ":name" = "FillImputer" ":human_name" = "fill imputer" ":is_supervised" = "`false`" @@ -8788,7 +9004,7 @@ ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Multiclass}`" ":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Image}, AbstractVector{<:ScientificTypesBase.Multiclass}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Multiclass}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Multiclass}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":target_in_fit" = "`true`" diff --git a/src/registry/Models.toml b/src/registry/Models.toml index 76ddb7e..d139334 100644 --- a/src/registry/Models.toml +++ b/src/registry/Models.toml @@ -1,4 +1,5 @@ BetaML = ["RandomForestRegressor", "GaussianMixtureImputer", "RandomForestClassifier", "RandomForestImputer", "PerceptronClassifier", "AutoEncoder", "DecisionTreeRegressor", "PegasosClassifier", "KMeansClusterer", "NeuralNetworkRegressor", "MultitargetGaussianMixtureRegressor", "GaussianMixtureRegressor", "MultitargetNeuralNetworkRegressor", "DecisionTreeClassifier", "GeneralImputer", "NeuralNetworkClassifier", "SimpleImputer", "GaussianMixtureClusterer", "KernelPerceptronClassifier", "KMedoidsClusterer"] +MLJEnsembles = ["EnsembleModel"] CatBoost = ["CatBoostRegressor", "CatBoostClassifier"] NearestNeighborModels = ["KNNClassifier", "MultitargetKNNClassifier", "MultitargetKNNRegressor", "KNNRegressor"] MLJScikitLearnInterface = ["ProbabilisticSGDClassifier", "RidgeCVClassifier", "LogisticClassifier", "RandomForestRegressor", "ElasticNetCVRegressor", "PerceptronClassifier", "MultiTaskLassoRegressor", "LinearRegressor", "HDBSCAN", "DBSCAN", "RidgeRegressor", "LassoLarsICRegressor", "ARDRegressor", "SVMNuRegressor", "RidgeClassifier", "SGDRegressor", "ComplementNBClassifier", "HuberRegressor", "SVMNuClassifier", "GradientBoostingClassifier", "GaussianProcessRegressor", "SVMLinearRegressor", "LarsRegressor", "MeanShift", "HistGradientBoostingClassifier", "AdaBoostRegressor", "AffinityPropagation", "MultiTaskLassoCVRegressor", "OrthogonalMatchingPursuitRegressor", "BernoulliNBClassifier", "PassiveAggressiveClassifier", "RidgeCVRegressor", "SVMRegressor", "GaussianNBClassifier", "ExtraTreesClassifier", "KMeans", "MultiTaskElasticNetCVRegressor", "LassoLarsCVRegressor", "OrthogonalMatchingPursuitCVRegressor", "AdaBoostClassifier", "PassiveAggressiveRegressor", "BayesianRidgeRegressor", "GaussianProcessClassifier", "BaggingClassifier", "OPTICS", "RANSACRegressor", "KNeighborsRegressor", "HistGradientBoostingRegressor", "MiniBatchKMeans", "LassoCVRegressor", "DummyRegressor", "BisectingKMeans", "LassoLarsRegressor", "LarsCVRegressor", "KNeighborsClassifier", "SVMLinearClassifier", "FeatureAgglomeration", "DummyClassifier", "BaggingRegressor", "BayesianQDA", "BayesianLDA", "SGDClassifier", "TheilSenRegressor", "SpectralClustering", "Birch", "AgglomerativeClustering", "ElasticNetRegressor", "RandomForestClassifier", "LogisticCVClassifier", "MultiTaskElasticNetRegressor", "ExtraTreesRegressor", "LassoRegressor", "MultinomialNBClassifier", "GradientBoostingRegressor", "SVMClassifier"] @@ -31,6 +32,6 @@ OutlierDetectionPython = ["MCDDetector", "COPODDetector", "HBOSDetector", "IFore SelfOrganizingMaps = ["SelfOrganizingMap"] LIBSVM = ["SVC", "EpsilonSVR", "LinearSVC", "ProbabilisticSVC", "NuSVR", "NuSVC", "ProbabilisticNuSVC", "OneClassSVM"] TSVD = ["TSVDTransformer"] +MLJTransforms = ["TargetEncoder", "MissingnessEncoder", "ContrastEncoder", "FrequencyEncoder", "CardinalityReducer", "OrdinalEncoder"] GLM = ["LinearBinaryClassifier", "LinearCountRegressor", "LinearRegressor"] MLJFlux = ["EntityEmbedder", "MultitargetNeuralNetworkRegressor", "NeuralNetworkClassifier", "ImageClassifier", "NeuralNetworkBinaryClassifier", "NeuralNetworkRegressor"] -MLJEnsembles = ["EnsembleModel"] diff --git a/src/registry/Project.toml b/src/registry/Project.toml index 61c2aca..df82b28 100644 --- a/src/registry/Project.toml +++ b/src/registry/Project.toml @@ -25,6 +25,7 @@ MLJScikitLearnInterface = "5ae90465-5518-4432-b9d2-8a1def2f0cab" MLJTSVDInterface = "7fa162e1-0e29-41ca-a6fa-c000ca4e7e7e" MLJTestInterface = "72560011-54dd-4dc2-94f3-c5de45b75ecd" MLJText = "5e27fcf9-6bac-46ba-8580-b5712f3d6387" +MLJTransforms = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" MLJTuning = "03970b2e-30c4-11ea-3135-d1576263f10f" MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91" Maxnet = "81f79f80-22f2-4e41-ab86-00c11cf0f26f"