From d21ac462d7c50065fa02c7cee6fe09b7fd51ef78 Mon Sep 17 00:00:00 2001 From: gAldeia Date: Tue, 3 Sep 2024 18:48:38 -0300 Subject: [PATCH 1/8] First principles datasets Data comes from two symbolic regression repos: - Miles Cranmer's PySR: https://github.com/MilesCranmer/PySR - Etienne Russeil et al.'s MvSR: https://github.com/erusseil/MvSR-analysis They are all datasets that have a first-principle equation derived from data and used in their respective papers to show how symbolic regression has the potential of retrieving the original equation when only observational data is available. While some of them have just a few samples and others are synthetically generated, they are challenging for symbolic regression methods and can be used to evaluate these algorithms. The idea of pushing them into PMLB is to help other users to quickly set up experiments with the data. I still need to write proper metadata for them. --- .../first_principles_absorption.tsv.gz | 3 +++ datasets/first_principles_bode/first_principles_bode.tsv.gz | 3 +++ .../first_principles_hubble/first_principles_hubble.tsv.gz | 3 +++ .../first_principles_ideal_gas.tsv.gz | 3 +++ .../first_principles_kepler/first_principles_kepler.tsv.gz | 3 +++ .../first_principles_leavitt/first_principles_leavitt.tsv.gz | 3 +++ .../first_principles_newton/first_principles_newton.tsv.gz | 3 +++ .../first_principles_planck/first_principles_planck.tsv.gz | 3 +++ .../first_principles_rydberg/first_principles_rydberg.tsv.gz | 3 +++ .../first_principles_schechter.tsv.gz | 3 +++ .../first_principles_supernovae_zg.tsv.gz | 3 +++ .../first_principles_supernovae_zr.tsv.gz | 3 +++ .../first_principles_tully_fisher.tsv.gz | 3 +++ 13 files changed, 39 insertions(+) create mode 100644 datasets/first_principles_absorption/first_principles_absorption.tsv.gz create mode 100644 datasets/first_principles_bode/first_principles_bode.tsv.gz create mode 100644 datasets/first_principles_hubble/first_principles_hubble.tsv.gz create mode 100644 datasets/first_principles_ideal_gas/first_principles_ideal_gas.tsv.gz create mode 100644 datasets/first_principles_kepler/first_principles_kepler.tsv.gz create mode 100644 datasets/first_principles_leavitt/first_principles_leavitt.tsv.gz create mode 100644 datasets/first_principles_newton/first_principles_newton.tsv.gz create mode 100644 datasets/first_principles_planck/first_principles_planck.tsv.gz create mode 100644 datasets/first_principles_rydberg/first_principles_rydberg.tsv.gz create mode 100644 datasets/first_principles_schechter/first_principles_schechter.tsv.gz create mode 100644 datasets/first_principles_supernovae_zg/first_principles_supernovae_zg.tsv.gz create mode 100644 datasets/first_principles_supernovae_zr/first_principles_supernovae_zr.tsv.gz create mode 100644 datasets/first_principles_tully_fisher/first_principles_tully_fisher.tsv.gz diff --git a/datasets/first_principles_absorption/first_principles_absorption.tsv.gz b/datasets/first_principles_absorption/first_principles_absorption.tsv.gz new file mode 100644 index 00000000..92259ecd --- /dev/null +++ b/datasets/first_principles_absorption/first_principles_absorption.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:469d734ef8b6f79d2e38bc487251940bcaa9349050cb455063209e3371dcd439 +size 158 diff --git a/datasets/first_principles_bode/first_principles_bode.tsv.gz b/datasets/first_principles_bode/first_principles_bode.tsv.gz new file mode 100644 index 00000000..6dd18717 --- /dev/null +++ b/datasets/first_principles_bode/first_principles_bode.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a568223ad5181bf644ca9e871fd235d0e614895b995155b0cb2dfec53f8f9328 +size 110 diff --git a/datasets/first_principles_hubble/first_principles_hubble.tsv.gz b/datasets/first_principles_hubble/first_principles_hubble.tsv.gz new file mode 100644 index 00000000..d102450e --- /dev/null +++ b/datasets/first_principles_hubble/first_principles_hubble.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bef7e5fb62eb611f1d60e8418df7b546c0df0ae28a51b5c3f7501b2128fdbb17 +size 674 diff --git a/datasets/first_principles_ideal_gas/first_principles_ideal_gas.tsv.gz b/datasets/first_principles_ideal_gas/first_principles_ideal_gas.tsv.gz new file mode 100644 index 00000000..35ff739e --- /dev/null +++ b/datasets/first_principles_ideal_gas/first_principles_ideal_gas.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eaf8e6160d9629cc73d8b5fccf5a25fdfae617a5c5e6d6e2c00f73d94bc6ab8 +size 1226 diff --git a/datasets/first_principles_kepler/first_principles_kepler.tsv.gz b/datasets/first_principles_kepler/first_principles_kepler.tsv.gz new file mode 100644 index 00000000..580957d5 --- /dev/null +++ b/datasets/first_principles_kepler/first_principles_kepler.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5704b631ba0afc6bf761196ae757565c6ea8398019094526f16383802b8f6cda +size 118 diff --git a/datasets/first_principles_leavitt/first_principles_leavitt.tsv.gz b/datasets/first_principles_leavitt/first_principles_leavitt.tsv.gz new file mode 100644 index 00000000..30fd5465 --- /dev/null +++ b/datasets/first_principles_leavitt/first_principles_leavitt.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85b0d6a23ccc3a1db5e2a94928c32e35dd2388d2cb13328c9a914fc594e7bab1 +size 526 diff --git a/datasets/first_principles_newton/first_principles_newton.tsv.gz b/datasets/first_principles_newton/first_principles_newton.tsv.gz new file mode 100644 index 00000000..d9e4443f --- /dev/null +++ b/datasets/first_principles_newton/first_principles_newton.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59df1d16cb8aa383900dc43a067ca79437912283dd3b32e4834b20185efc586e +size 1291 diff --git a/datasets/first_principles_planck/first_principles_planck.tsv.gz b/datasets/first_principles_planck/first_principles_planck.tsv.gz new file mode 100644 index 00000000..da027313 --- /dev/null +++ b/datasets/first_principles_planck/first_principles_planck.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a70e4196c6288b9132c1fa2515235a18be44b16f6a54fdb04ea043b8027b6ea0 +size 2979 diff --git a/datasets/first_principles_rydberg/first_principles_rydberg.tsv.gz b/datasets/first_principles_rydberg/first_principles_rydberg.tsv.gz new file mode 100644 index 00000000..74c54f98 --- /dev/null +++ b/datasets/first_principles_rydberg/first_principles_rydberg.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4d23f6bf28053df7f91fba6b92e45cd23ce4e37ec96bc0461fd24b38739e9e5 +size 564 diff --git a/datasets/first_principles_schechter/first_principles_schechter.tsv.gz b/datasets/first_principles_schechter/first_principles_schechter.tsv.gz new file mode 100644 index 00000000..17dd615b --- /dev/null +++ b/datasets/first_principles_schechter/first_principles_schechter.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f2719ff31476c7e53e05dd4f85fa2a53d165e5adc8d80a3ab8415474ffe232a +size 599 diff --git a/datasets/first_principles_supernovae_zg/first_principles_supernovae_zg.tsv.gz b/datasets/first_principles_supernovae_zg/first_principles_supernovae_zg.tsv.gz new file mode 100644 index 00000000..7b69fd22 --- /dev/null +++ b/datasets/first_principles_supernovae_zg/first_principles_supernovae_zg.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcaa3bfa39bf7786ccc074e5fc44d2f6e3870562c505d08fec8859cc5d51af05 +size 4109 diff --git a/datasets/first_principles_supernovae_zr/first_principles_supernovae_zr.tsv.gz b/datasets/first_principles_supernovae_zr/first_principles_supernovae_zr.tsv.gz new file mode 100644 index 00000000..51cdd954 --- /dev/null +++ b/datasets/first_principles_supernovae_zr/first_principles_supernovae_zr.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5910dd1ee08ef353c08e2c25b629f66d42565eb4c990656e06d1b575fe5880a7 +size 3923 diff --git a/datasets/first_principles_tully_fisher/first_principles_tully_fisher.tsv.gz b/datasets/first_principles_tully_fisher/first_principles_tully_fisher.tsv.gz new file mode 100644 index 00000000..e3c4ebc2 --- /dev/null +++ b/datasets/first_principles_tully_fisher/first_principles_tully_fisher.tsv.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a80f850226d7a536fad6f33fdae2d08bee9bf36c3a28c452ed0aaa244c88bf39 +size 423 From f23672c71e2fa52ad6abb768f80390dc90f1efa1 Mon Sep 17 00:00:00 2001 From: gAldeia Date: Thu, 31 Oct 2024 14:24:08 -0300 Subject: [PATCH 2/8] Re-generated broken datasets CI was failing to parse the contents of these specific ones. --- .../first_principles_ideal_gas.tsv.gz | 4 ++-- .../first_principles_rydberg/first_principles_rydberg.tsv.gz | 4 ++-- .../first_principles_schechter.tsv.gz | 4 ++-- .../first_principles_supernovae_zg.tsv.gz | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/datasets/first_principles_ideal_gas/first_principles_ideal_gas.tsv.gz b/datasets/first_principles_ideal_gas/first_principles_ideal_gas.tsv.gz index 35ff739e..1911b619 100644 --- a/datasets/first_principles_ideal_gas/first_principles_ideal_gas.tsv.gz +++ b/datasets/first_principles_ideal_gas/first_principles_ideal_gas.tsv.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2eaf8e6160d9629cc73d8b5fccf5a25fdfae617a5c5e6d6e2c00f73d94bc6ab8 -size 1226 +oid sha256:346b2e3bbc0c631bc00b2d001dfc5791fe729cb472795b23d593895252ce6bb8 +size 1205 diff --git a/datasets/first_principles_rydberg/first_principles_rydberg.tsv.gz b/datasets/first_principles_rydberg/first_principles_rydberg.tsv.gz index 74c54f98..a8c8f5ce 100644 --- a/datasets/first_principles_rydberg/first_principles_rydberg.tsv.gz +++ b/datasets/first_principles_rydberg/first_principles_rydberg.tsv.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a4d23f6bf28053df7f91fba6b92e45cd23ce4e37ec96bc0461fd24b38739e9e5 -size 564 +oid sha256:0c848103ae200b9a969cf5eb9836592b7663aa7553e8a129102d7fc387c2f490 +size 560 diff --git a/datasets/first_principles_schechter/first_principles_schechter.tsv.gz b/datasets/first_principles_schechter/first_principles_schechter.tsv.gz index 17dd615b..b0423e33 100644 --- a/datasets/first_principles_schechter/first_principles_schechter.tsv.gz +++ b/datasets/first_principles_schechter/first_principles_schechter.tsv.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f2719ff31476c7e53e05dd4f85fa2a53d165e5adc8d80a3ab8415474ffe232a -size 599 +oid sha256:21586d500f0961c0d2c8296644e3ec269e2ecf783f3c39c0fdf1dc6159edee0a +size 580 diff --git a/datasets/first_principles_supernovae_zg/first_principles_supernovae_zg.tsv.gz b/datasets/first_principles_supernovae_zg/first_principles_supernovae_zg.tsv.gz index 7b69fd22..2a837979 100644 --- a/datasets/first_principles_supernovae_zg/first_principles_supernovae_zg.tsv.gz +++ b/datasets/first_principles_supernovae_zg/first_principles_supernovae_zg.tsv.gz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fcaa3bfa39bf7786ccc074e5fc44d2f6e3870562c505d08fec8859cc5d51af05 -size 4109 +oid sha256:9f946159fb4fa4d6351952d89148572682de6c062f14b6c0f401b9479ad277de +size 4054 From 42b29f74db6466a662e761f7d1ce7b7fae3a31f8 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 31 Oct 2024 17:25:59 +0000 Subject: [PATCH 3/8] update dataset files Created by https://github.com/gAldeia/pmlb/actions/runs/11616806556\nfrom f23672c on 2024-10-31 --- .lfs-assets-id | 13 +++++++++ datasets/first_principles_ideal_gas/README.md | 6 ++++ .../first_principles_ideal_gas/metadata.yaml | 29 +++++++++++++++++++ .../summary_stats.tsv | 2 ++ datasets/first_principles_rydberg/README.md | 6 ++++ .../first_principles_rydberg/metadata.yaml | 24 +++++++++++++++ .../summary_stats.tsv | 2 ++ datasets/first_principles_schechter/README.md | 6 ++++ .../first_principles_schechter/metadata.yaml | 19 ++++++++++++ .../summary_stats.tsv | 2 ++ .../first_principles_supernovae_zg/README.md | 6 ++++ .../metadata.yaml | 19 ++++++++++++ .../summary_stats.tsv | 2 ++ pmlb/all_summary_stats.tsv | 4 +++ 14 files changed, 140 insertions(+) create mode 100644 datasets/first_principles_ideal_gas/README.md create mode 100644 datasets/first_principles_ideal_gas/metadata.yaml create mode 100644 datasets/first_principles_ideal_gas/summary_stats.tsv create mode 100644 datasets/first_principles_rydberg/README.md create mode 100644 datasets/first_principles_rydberg/metadata.yaml create mode 100644 datasets/first_principles_rydberg/summary_stats.tsv create mode 100644 datasets/first_principles_schechter/README.md create mode 100644 datasets/first_principles_schechter/metadata.yaml create mode 100644 datasets/first_principles_schechter/summary_stats.tsv create mode 100644 datasets/first_principles_supernovae_zg/README.md create mode 100644 datasets/first_principles_supernovae_zg/metadata.yaml create mode 100644 datasets/first_principles_supernovae_zg/summary_stats.tsv diff --git a/.lfs-assets-id b/.lfs-assets-id index 7d52a573..a7f8590a 100644 --- a/.lfs-assets-id +++ b/.lfs-assets-id @@ -20,6 +20,7 @@ 0baa7b708956fd05b84d47b18a86f926335db5f42d2cd7e29ca83558c139aff3 0be6203e167cc5e7b038368dbfe0a7790d5dc423c9d7e42887907c5c03f81c27 0c342ef5d61bbcf43180a3b71d407b9d994942ce43e8960052201daf88dd095d +0c848103ae200b9a969cf5eb9836592b7663aa7553e8a129102d7fc387c2f490 0d05767a4c118752a25c4632aeea3b71ffa1bfe122b6a2401f85d20541be19a4 0d39f17afc3a1712bd6c460aa941aed7835b3feb142538adfdd31ddc2451d60d 0d43780ab866e54a2a78d8c86ba231ad0a5d55588450a33ed6fe52bee9638341 @@ -41,6 +42,7 @@ 1d64387aa4bea78bd412ea3892bf17f48b32856074dfe3b5105f0bbc1b15603f 1ee8cf9693351db7afe68f6fc32942845caae3e1030c688efa6c5d0b24229f46 1f5cf829d2e58032e5d9067f1e7bf3fe7644cd5fe2825c81ef7fbaa445f496a7 +21586d500f0961c0d2c8296644e3ec269e2ecf783f3c39c0fdf1dc6159edee0a 21d506c397dfeb3edbbbc253b923f59be6edb516689677ebef535296c6c62242 22baa768886091d61e13e894610dfea3435dfd201f8300fbebe46cd6cf814c0b 22c0fae7298efcb4566c4d4cc19a53c505476cccf6518328303e2b7334110781 @@ -74,6 +76,7 @@ 32abe7e2579387f0439d1595cb72f1f0fe79f41822ec99d06f0e219a65dda362 32bdeb725bc79d00349bcc66c41d306f463005cf6ac623cf8c15b5f3f7bbbb83 32d94576f8622f22279a02099e5269511a8b14fee6a441e880730966763b79c9 +346b2e3bbc0c631bc00b2d001dfc5791fe729cb472795b23d593895252ce6bb8 34fd665457403f66db49a4d012c59d7c387f99a35597c8f0f0e31d40ada255f8 35724de77dbb2d325f81905aa01f639cae3f29a3d24ae7b24ec84acbc9e08a8f 35aefd558529484575b142f122c5e2af2eb337025a9607e3df5ab60a57783e09 @@ -102,6 +105,7 @@ 45b06a3b07f45e5aa49f13f030860b245507ba94185219571ceb314fbdd87c2f 45b6fb5d5c4bb09f2f21b53b069b3994e4f6fa69a5a932cf01c1ebb335bf8645 46578097c3f1477b9f4f2eb2dc74421162fa9a14e139b0e3e791e41679e459d9 +469d734ef8b6f79d2e38bc487251940bcaa9349050cb455063209e3371dcd439 46e26ab2e17e1e92728b1a27ebcad5fc8319b4195414746064da34d95f27280e 47478e3af60f8a6cc09dcfba8495af699c8187edc279a0e226e4e63b410d64b7 4757ff95609abb91a98577cf6023c804ba2b0b749a9dfcc48597ac49d4bf72a0 @@ -135,10 +139,13 @@ 56f25ef2fcadcd25cc5ccdc721663194ead75bcd90c4d5c1b806d72ea193948f 56f5ba3f3ba78f6e522a13e5a97e03ff28cf9fa4107021b22b932f6d4064c145 56f81f2a4cdd1968cc83d45bfdaa049c90cf7cdf3141b2db9979d31c000e3937 +5704b631ba0afc6bf761196ae757565c6ea8398019094526f16383802b8f6cda 57b36b18d3ed1b78d6ca647f701fdef974b27979cda7dda2fe91a81eb7e329d6 588ef519ce346285e4cb9cdd5780abbbac32cab9661625c6e517a9b70c87495d +5910dd1ee08ef353c08e2c25b629f66d42565eb4c990656e06d1b575fe5880a7 5940ac21f3c7e93dd1dca45266304d160c45256f0628419f001c8b54e1c98360 596303e877cb91ec3f96cab8c0eac3205f8c50d2e56debdde6a6b4bcffe57ccd +59df1d16cb8aa383900dc43a067ca79437912283dd3b32e4834b20185efc586e 5adc7d62cd741fde41554ad50ba608973a19f87f795787156e7f16855a227a49 5b1d8788d9512819fd46d1acf1e86e70cb5f4418c8e8bfe299ebd3abf2188217 5b20048751e68b6dd76a7b66ada790fb90f9dd41bd8c82d448fe439f8d038969 @@ -224,6 +231,7 @@ 840eff365b01eacb770f1e78f97bc889611362bd3bbf835f344a2792973c985a 849976657dc371578819f225ce81eb9a76bd084ce743b5ff6753e415567c68dd 854aedabba36d89fb2a79246592f2858d6905e7e75a1031128f67ed0c8a446a6 +85b0d6a23ccc3a1db5e2a94928c32e35dd2388d2cb13328c9a914fc594e7bab1 86e18c1aa7247f824f7097219d75d29f8dcba3034bc89e1ee92c7778aa31bd9c 8712952be1bc739d221729dd94705dc67e189981a728ecc9cfe08c8df5d8b125 87df1bbfb83b8204093ccd84ed18bb3695220c03aa4096f63a84cc5e136dbc0b @@ -273,16 +281,20 @@ 9def90abb62c1b9872ae2dbed3e425850ae44f98d3664e8b29e2444548e9fec9 9e1ec477e8af8356c3b731f8815f19b57bb404bd7c1629a2020bc9d90b0c028a 9e69e5aa34b36b4f528c42711c7fbaf88abcda9f0b7ba4181e4f57525b6d1527 +9f946159fb4fa4d6351952d89148572682de6c062f14b6c0f401b9479ad277de a15ad0797f0d445cafcf5afb14f26df0aee2417181a2081ad26b1c10e0aaf79c a2b0fbfc6f24cb86e3c612be4d59f5dc48b4e3e73620b480dd9f54dccf4d90da a39fc5f054db506e83a4a4ec47eba1f7f9bf9bfdc983174e699312f32f42f1f5 a4e3dfadeb34bba861ef56046d2fc99c4d50d8475882d610a0b652d39c510f6b a551e2941365201552a3a819f035c64f297467464f7c7c349d00711316ce3c57 a55638ba902c8afb52d5b006f2ac438c72dfcdd6325efe7488b125f7a9662989 +a568223ad5181bf644ca9e871fd235d0e614895b995155b0cb2dfec53f8f9328 a5a5d15adf74702c323b62b595cdefcdd157f68ebd86af5e504bc16c9890c2ff a5c0e0103dc8caf7a9b18ab4546a046ba2e01425259c639069fa02f0824ae0f2 a5fff4a8241312d53818c146ca0b132dc760a6848e7b89ec2e3271fc6454e7da +a70e4196c6288b9132c1fa2515235a18be44b16f6a54fdb04ea043b8027b6ea0 a71c05581e59bc83a6e500c66ea94ff9e355d5e457f0b094e10a61e7f13fdecc +a80f850226d7a536fad6f33fdae2d08bee9bf36c3a28c452ed0aaa244c88bf39 a857a458b0621f46b60f326d5760f7d3d39d6ba1fe87db1a7f1114bdfeb99862 a8a1931c568c9637e3671aa4919cc6a1a3acfa26358fd3f81ca89e4b01f96a72 a96ca8012634e20d924624747b939c1b99f7b4ec36a7819c80c962d7a1fafe49 @@ -317,6 +329,7 @@ bd0e747cb0a16d9f68843ccd6fa0b0d382bb21f2c83ccbc222712426ee42274b bd9d5214451c3b72e8a5ba4ae75a565d373d90a804659d0a5f4617fad3ac4cfd be6942e13096c21f10496a24056b01ff791e24d6172f5b7a09013c3307d38f28 beedc054b8e7d974a98326db8c834843eb188cffba0f07029a8370b193ce020e +bef7e5fb62eb611f1d60e8418df7b546c0df0ae28a51b5c3f7501b2128fdbb17 bf066d8b8431c89d3c8afd58b0bfe56f53e0aaedd8d4ec05c132268115af3f36 bfc6131af9d009576a82d25e0590955980535eb61c67d1553434da993e79af92 bfd1b9c6e5f6314f6ddfc285ace11381570ad688c6c857562c3155a4e478530d diff --git a/datasets/first_principles_ideal_gas/README.md b/datasets/first_principles_ideal_gas/README.md new file mode 100644 index 00000000..6d2ad97b --- /dev/null +++ b/datasets/first_principles_ideal_gas/README.md @@ -0,0 +1,6 @@ +# first_principles_ideal_gas + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_ideal_gas.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_ideal_gas/metadata.yaml b/datasets/first_principles_ideal_gas/metadata.yaml new file mode 100644 index 00000000..adc5b7ac --- /dev/null +++ b/datasets/first_principles_ideal_gas/metadata.yaml @@ -0,0 +1,29 @@ +# Reviewed by [your name here] +dataset: first_principles_ideal_gas +description: None yet. See our contributing guide to help us add one. +source: None yet. See our contributing guide to help us add one. +publication: None yet. See our contributing guide to help us add one. +task: regression +keywords: + - + - +target: + type: continuous + description: None yet. See our contributing guide to help us add one. + code: None yet. See our contributing guide to help us add one. +features: + - name: "n" + type: continuous + description: # optional but recommended, what the feature measures/indicates, unit + code: # optional, coding information, e.g., Control = 0, Case = 1 + transform: # optional, any transformation performed on the feature, e.g., log scaled + - name: T + type: continuous + description: + code: + transform: + - name: V + type: continuous + description: + code: + transform: diff --git a/datasets/first_principles_ideal_gas/summary_stats.tsv b/datasets/first_principles_ideal_gas/summary_stats.tsv new file mode 100644 index 00000000..9ebf1e14 --- /dev/null +++ b/datasets/first_principles_ideal_gas/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_ideal_gas 30 3 0 0 3 continuous 30.0 0.0 regression diff --git a/datasets/first_principles_rydberg/README.md b/datasets/first_principles_rydberg/README.md new file mode 100644 index 00000000..3b14b427 --- /dev/null +++ b/datasets/first_principles_rydberg/README.md @@ -0,0 +1,6 @@ +# first_principles_rydberg + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_rydberg.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_rydberg/metadata.yaml b/datasets/first_principles_rydberg/metadata.yaml new file mode 100644 index 00000000..63ed2532 --- /dev/null +++ b/datasets/first_principles_rydberg/metadata.yaml @@ -0,0 +1,24 @@ +# Reviewed by [your name here] +dataset: first_principles_rydberg +description: None yet. See our contributing guide to help us add one. +source: None yet. See our contributing guide to help us add one. +publication: None yet. See our contributing guide to help us add one. +task: regression +keywords: + - + - +target: + type: continuous + description: None yet. See our contributing guide to help us add one. + code: None yet. See our contributing guide to help us add one. +features: + - name: n_1 + type: categorical + description: # optional but recommended, what the feature measures/indicates, unit + code: # optional, coding information, e.g., Control = 0, Case = 1 + transform: # optional, any transformation performed on the feature, e.g., log scaled + - name: n_2 + type: categorical + description: + code: + transform: diff --git a/datasets/first_principles_rydberg/summary_stats.tsv b/datasets/first_principles_rydberg/summary_stats.tsv new file mode 100644 index 00000000..a052c301 --- /dev/null +++ b/datasets/first_principles_rydberg/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_rydberg 50 2 0 2 0 continuous 50.0 0.0 regression diff --git a/datasets/first_principles_schechter/README.md b/datasets/first_principles_schechter/README.md new file mode 100644 index 00000000..13740441 --- /dev/null +++ b/datasets/first_principles_schechter/README.md @@ -0,0 +1,6 @@ +# first_principles_schechter + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_schechter.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_schechter/metadata.yaml b/datasets/first_principles_schechter/metadata.yaml new file mode 100644 index 00000000..ec0c0d52 --- /dev/null +++ b/datasets/first_principles_schechter/metadata.yaml @@ -0,0 +1,19 @@ +# Reviewed by [your name here] +dataset: first_principles_schechter +description: None yet. See our contributing guide to help us add one. +source: None yet. See our contributing guide to help us add one. +publication: None yet. See our contributing guide to help us add one. +task: regression +keywords: + - + - +target: + type: continuous + description: None yet. See our contributing guide to help us add one. + code: None yet. See our contributing guide to help us add one. +features: + - name: L + type: continuous + description: # optional but recommended, what the feature measures/indicates, unit + code: # optional, coding information, e.g., Control = 0, Case = 1 + transform: # optional, any transformation performed on the feature, e.g., log scaled diff --git a/datasets/first_principles_schechter/summary_stats.tsv b/datasets/first_principles_schechter/summary_stats.tsv new file mode 100644 index 00000000..973ba127 --- /dev/null +++ b/datasets/first_principles_schechter/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_schechter 27 1 0 0 1 continuous 27.0 0.0 regression diff --git a/datasets/first_principles_supernovae_zg/README.md b/datasets/first_principles_supernovae_zg/README.md new file mode 100644 index 00000000..032ed7ca --- /dev/null +++ b/datasets/first_principles_supernovae_zg/README.md @@ -0,0 +1,6 @@ +# first_principles_supernovae_zg + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_supernovae_zg.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_supernovae_zg/metadata.yaml b/datasets/first_principles_supernovae_zg/metadata.yaml new file mode 100644 index 00000000..a1d08ead --- /dev/null +++ b/datasets/first_principles_supernovae_zg/metadata.yaml @@ -0,0 +1,19 @@ +# Reviewed by [your name here] +dataset: first_principles_supernovae_zg +description: None yet. See our contributing guide to help us add one. +source: None yet. See our contributing guide to help us add one. +publication: None yet. See our contributing guide to help us add one. +task: regression +keywords: + - + - +target: + type: continuous + description: None yet. See our contributing guide to help us add one. + code: None yet. See our contributing guide to help us add one. +features: + - name: Xaxis0 + type: continuous + description: # optional but recommended, what the feature measures/indicates, unit + code: # optional, coding information, e.g., Control = 0, Case = 1 + transform: # optional, any transformation performed on the feature, e.g., log scaled diff --git a/datasets/first_principles_supernovae_zg/summary_stats.tsv b/datasets/first_principles_supernovae_zg/summary_stats.tsv new file mode 100644 index 00000000..5de6f7ec --- /dev/null +++ b/datasets/first_principles_supernovae_zg/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_supernovae_zg 243 1 0 0 1 continuous 243.0 0.0 regression diff --git a/pmlb/all_summary_stats.tsv b/pmlb/all_summary_stats.tsv index 9011bd80..1f5e17a5 100644 --- a/pmlb/all_summary_stats.tsv +++ b/pmlb/all_summary_stats.tsv @@ -310,6 +310,10 @@ feynman_test_6 100000 7 0 0 7 continuous 100000.0 0.0 regression feynman_test_7 100000 5 0 0 5 continuous 100000.0 0.0 regression feynman_test_8 100000 4 0 0 4 continuous 100000.0 0.0 regression feynman_test_9 100000 5 0 0 5 continuous 100000.0 0.0 regression +first_principles_ideal_gas 30 3 0 0 3 continuous 30.0 0.0 regression +first_principles_rydberg 50 2 0 2 0 continuous 50.0 0.0 regression +first_principles_schechter 27 1 0 0 1 continuous 27.0 0.0 regression +first_principles_supernovae_zg 243 1 0 0 1 continuous 243.0 0.0 regression flags 178 43 36 5 2 categorical 5.0 0.04391806590077 classification flare 1066 10 4 6 0 categorical 2.0 0.4336704342653181 classification german 1000 20 3 14 3 categorical 2.0 0.1599999999999999 classification From 4742074bfcbae544e2bf2e0dad895a23a9367f48 Mon Sep 17 00:00:00 2001 From: gAldeia Date: Thu, 20 Feb 2025 09:04:51 -0300 Subject: [PATCH 4/8] New metadata --- .../first_principles_absorption/README.md | 6 +++++ .../first_principles_absorption/metadata.yaml | 18 +++++++++++++ .../summary_stats.tsv | 2 ++ datasets/first_principles_bode/README.md | 6 +++++ datasets/first_principles_bode/metadata.yaml | 25 +++++++++++++++++ .../first_principles_bode/summary_stats.tsv | 2 ++ datasets/first_principles_hubble/README.md | 6 +++++ .../first_principles_hubble/metadata.yaml | 19 +++++++++++++ .../first_principles_hubble/summary_stats.tsv | 2 ++ datasets/first_principles_ideal_gas/README.md | 6 +++++ .../first_principles_ideal_gas/metadata.yaml | 27 +++++++++++++++++++ .../summary_stats.tsv | 2 ++ datasets/first_principles_kepler/README.md | 6 +++++ .../first_principles_kepler/metadata.yaml | 23 ++++++++++++++++ .../first_principles_kepler/summary_stats.tsv | 2 ++ datasets/first_principles_leavitt/README.md | 6 +++++ .../first_principles_leavitt/metadata.yaml | 21 +++++++++++++++ .../summary_stats.tsv | 2 ++ datasets/first_principles_newton/README.md | 6 +++++ .../first_principles_newton/metadata.yaml | 27 +++++++++++++++++++ .../first_principles_newton/summary_stats.tsv | 2 ++ datasets/first_principles_planck/README.md | 6 +++++ .../first_principles_planck/metadata.yaml | 24 +++++++++++++++++ .../first_principles_planck/summary_stats.tsv | 2 ++ datasets/first_principles_rydberg/README.md | 6 +++++ .../first_principles_rydberg/metadata.yaml | 24 +++++++++++++++++ .../summary_stats.tsv | 2 ++ datasets/first_principles_schechter/README.md | 6 +++++ .../first_principles_schechter/metadata.yaml | 21 +++++++++++++++ .../summary_stats.tsv | 2 ++ .../first_principles_supernovae_zg/README.md | 6 +++++ .../metadata.yaml | 18 +++++++++++++ .../summary_stats.tsv | 2 ++ .../first_principles_supernovae_zr/README.md | 6 +++++ .../metadata.yaml | 18 +++++++++++++ .../summary_stats.tsv | 2 ++ .../first_principles_tully_fisher/README.md | 6 +++++ .../metadata.yaml | 21 +++++++++++++++ .../summary_stats.tsv | 2 ++ 39 files changed, 390 insertions(+) create mode 100644 datasets/first_principles_absorption/README.md create mode 100644 datasets/first_principles_absorption/metadata.yaml create mode 100644 datasets/first_principles_absorption/summary_stats.tsv create mode 100644 datasets/first_principles_bode/README.md create mode 100644 datasets/first_principles_bode/metadata.yaml create mode 100644 datasets/first_principles_bode/summary_stats.tsv create mode 100644 datasets/first_principles_hubble/README.md create mode 100644 datasets/first_principles_hubble/metadata.yaml create mode 100644 datasets/first_principles_hubble/summary_stats.tsv create mode 100644 datasets/first_principles_ideal_gas/README.md create mode 100644 datasets/first_principles_ideal_gas/metadata.yaml create mode 100644 datasets/first_principles_ideal_gas/summary_stats.tsv create mode 100644 datasets/first_principles_kepler/README.md create mode 100644 datasets/first_principles_kepler/metadata.yaml create mode 100644 datasets/first_principles_kepler/summary_stats.tsv create mode 100644 datasets/first_principles_leavitt/README.md create mode 100644 datasets/first_principles_leavitt/metadata.yaml create mode 100644 datasets/first_principles_leavitt/summary_stats.tsv create mode 100644 datasets/first_principles_newton/README.md create mode 100644 datasets/first_principles_newton/metadata.yaml create mode 100644 datasets/first_principles_newton/summary_stats.tsv create mode 100644 datasets/first_principles_planck/README.md create mode 100644 datasets/first_principles_planck/metadata.yaml create mode 100644 datasets/first_principles_planck/summary_stats.tsv create mode 100644 datasets/first_principles_rydberg/README.md create mode 100644 datasets/first_principles_rydberg/metadata.yaml create mode 100644 datasets/first_principles_rydberg/summary_stats.tsv create mode 100644 datasets/first_principles_schechter/README.md create mode 100644 datasets/first_principles_schechter/metadata.yaml create mode 100644 datasets/first_principles_schechter/summary_stats.tsv create mode 100644 datasets/first_principles_supernovae_zg/README.md create mode 100644 datasets/first_principles_supernovae_zg/metadata.yaml create mode 100644 datasets/first_principles_supernovae_zg/summary_stats.tsv create mode 100644 datasets/first_principles_supernovae_zr/README.md create mode 100644 datasets/first_principles_supernovae_zr/metadata.yaml create mode 100644 datasets/first_principles_supernovae_zr/summary_stats.tsv create mode 100644 datasets/first_principles_tully_fisher/README.md create mode 100644 datasets/first_principles_tully_fisher/metadata.yaml create mode 100644 datasets/first_principles_tully_fisher/summary_stats.tsv diff --git a/datasets/first_principles_absorption/README.md b/datasets/first_principles_absorption/README.md new file mode 100644 index 00000000..1e02fd29 --- /dev/null +++ b/datasets/first_principles_absorption/README.md @@ -0,0 +1,6 @@ +# first_principles_absorption + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_absorption.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_absorption/metadata.yaml b/datasets/first_principles_absorption/metadata.yaml new file mode 100644 index 00000000..2bc3e5a5 --- /dev/null +++ b/datasets/first_principles_absorption/metadata.yaml @@ -0,0 +1,18 @@ +# Created by Guilherme Aldeia (@galdeia) +dataset: first_principles_absorption +description: | + A real-world dataset containing the absorption of light for a solution containing a specific molecule at different levels of concentration. The original publication has data for 4 different molecules, and here we include data from only one of them (`real_data/absorption/examples/example0.csv`), the one with highest number of samples. +source: publication repository https://github.com/erusseil/MvSR-analysis +publication: Etienne Russeil, Fabricio Olivetti de Franca, Konstantin Malanchev, Bogdan Burlacu, Emille Ishida, Marion Leroux, Clément Michelin, Guillaume Moinard, and Emmanuel Gangler. 2024. Multiview Symbolic Regression. In Proceedings of the Genetic and Evolutionary Computation Conference (GECCO '24). Association for Computing Machinery, New York, NY, USA, 961-970. https://doi.org/10.1145/3638529.3654087 +task: regression +keywords: + - symbolic regression + - physics + - first principles +target: + type: continuous + description: Absorption, the amount of light absorbed by the solution +features: + - name: Xaxis0 + type: continuous + description: Concentration (mol/L) \ No newline at end of file diff --git a/datasets/first_principles_absorption/summary_stats.tsv b/datasets/first_principles_absorption/summary_stats.tsv new file mode 100644 index 00000000..d25c9522 --- /dev/null +++ b/datasets/first_principles_absorption/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_absorption 14 1 0 0 1 continuous 14.0 0.0 regression diff --git a/datasets/first_principles_bode/README.md b/datasets/first_principles_bode/README.md new file mode 100644 index 00000000..4be0639d --- /dev/null +++ b/datasets/first_principles_bode/README.md @@ -0,0 +1,6 @@ +# first_principles_bode + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_bode.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_bode/metadata.yaml b/datasets/first_principles_bode/metadata.yaml new file mode 100644 index 00000000..b8442c86 --- /dev/null +++ b/datasets/first_principles_bode/metadata.yaml @@ -0,0 +1,25 @@ +# Created by Guilherme Aldeia (@galdeia) +dataset: first_principles_bode +description: | + The Bode's law is a model for the distance of planets from the sun, given their order in the solar system. + + The governing equation is given by: + + a = 0.4 + 0.3 (2^n) + + Data was taken from bonnet Contemplating Nature 1764, and planets Neptuno and Pluto are skipped. + +source: publication repository https://github.com/MilesCranmer/PySR +publication: Interpretable machine learning for science with PySR and SymbolicRegression, CRANMER, Miles, arXiv preprint arXiv:2305.01582, 2023. +task: regression +keywords: + - symbolic regression + - physics + - first principles +target: + type: continuous + description: semi-major axis, AU +features: + - name: "n" + type: categorical + description: planet index \ No newline at end of file diff --git a/datasets/first_principles_bode/summary_stats.tsv b/datasets/first_principles_bode/summary_stats.tsv new file mode 100644 index 00000000..7bc055b9 --- /dev/null +++ b/datasets/first_principles_bode/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_bode 8 1 0 1 0 continuous 8.0 0.0 regression diff --git a/datasets/first_principles_hubble/README.md b/datasets/first_principles_hubble/README.md new file mode 100644 index 00000000..cf8fee97 --- /dev/null +++ b/datasets/first_principles_hubble/README.md @@ -0,0 +1,6 @@ +# first_principles_hubble + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_hubble.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_hubble/metadata.yaml b/datasets/first_principles_hubble/metadata.yaml new file mode 100644 index 00000000..f5b6fd2c --- /dev/null +++ b/datasets/first_principles_hubble/metadata.yaml @@ -0,0 +1,19 @@ +# Created by Guilherme Aldeia (@galdeia) +dataset: first_principles_hubble +description: | + The Hubble constant is a measure of the rate of expansion of the universe, measured as v = H_0 D, where v is the velocity of a galaxy, D is its distance from us, and H_0 is the Hubble constant (73.3 (km/s)/Mpc). + +source: publication repository https://github.com/MilesCranmer/PySR +publication: Interpretable machine learning for science with PySR and SymbolicRegression, CRANMER, Miles, arXiv preprint arXiv:2305.01582, 2023. +task: regression +keywords: + - symbolic regression + - physics + - first principles +target: + type: continuous + description: velocity, in km/s +features: + - name: D + type: continuous + description: distance of a galaxy from us (in million parsecs, Mpc) \ No newline at end of file diff --git a/datasets/first_principles_hubble/summary_stats.tsv b/datasets/first_principles_hubble/summary_stats.tsv new file mode 100644 index 00000000..8763ea56 --- /dev/null +++ b/datasets/first_principles_hubble/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_hubble 32 1 0 0 1 continuous 32.0 0.0 regression diff --git a/datasets/first_principles_ideal_gas/README.md b/datasets/first_principles_ideal_gas/README.md new file mode 100644 index 00000000..6d2ad97b --- /dev/null +++ b/datasets/first_principles_ideal_gas/README.md @@ -0,0 +1,6 @@ +# first_principles_ideal_gas + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_ideal_gas.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_ideal_gas/metadata.yaml b/datasets/first_principles_ideal_gas/metadata.yaml new file mode 100644 index 00000000..367aa297 --- /dev/null +++ b/datasets/first_principles_ideal_gas/metadata.yaml @@ -0,0 +1,27 @@ +# Created by Guilherme Aldeia (@galdeia) +dataset: first_principles_ideal_gas +description: | + The ideal dass law is a model for the pressure of an ideal gas, given it's temperature and volume. + + Data was generated using the ideal gas law, with a range of parameters using the scripts in the publication repository. + +source: publication repository https://github.com/MilesCranmer/PySR +publication: Interpretable machine learning for science with PySR and SymbolicRegression, CRANMER, Miles, arXiv preprint arXiv:2305.01582, 2023. +task: regression +keywords: + - symbolic regression + - physics + - first principles +target: + type: continuous + description: pressure +features: + - name: "n" + type: continuous + description: number density + - name: T + type: continuous + description: temperature + - name: V + type: continuous + description: volume diff --git a/datasets/first_principles_ideal_gas/summary_stats.tsv b/datasets/first_principles_ideal_gas/summary_stats.tsv new file mode 100644 index 00000000..9ebf1e14 --- /dev/null +++ b/datasets/first_principles_ideal_gas/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_ideal_gas 30 3 0 0 3 continuous 30.0 0.0 regression diff --git a/datasets/first_principles_kepler/README.md b/datasets/first_principles_kepler/README.md new file mode 100644 index 00000000..02924920 --- /dev/null +++ b/datasets/first_principles_kepler/README.md @@ -0,0 +1,6 @@ +# first_principles_kepler + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_kepler.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_kepler/metadata.yaml b/datasets/first_principles_kepler/metadata.yaml new file mode 100644 index 00000000..5341678d --- /dev/null +++ b/datasets/first_principles_kepler/metadata.yaml @@ -0,0 +1,23 @@ +# Created by Guilherme Aldeia (@galdeia) +dataset: first_principles_kepler +description: | + The Keppler dataset is based on the Kepler's third law of planetary motion. The features are the semi-major axis (a) and the period (P) of a planet's orbit, given by the equation: + + P^2 = k a^3 + + Each row corresponds to a planet in the following order: Mercury, Venus, Earth, Mars, Jupiter, Saturn, and it is based on the data used by Kepler in 1618. + +source: publication repository https://github.com/MilesCranmer/PySR +publication: Interpretable machine learning for science with PySR and SymbolicRegression, CRANMER, Miles, arXiv preprint arXiv:2305.01582, 2023. +task: regression +keywords: + - symbolic regression + - physics + - first principles +target: + type: continuous + description: Period in days +features: + - name: a + type: continuous + description: semi-major axis, AU \ No newline at end of file diff --git a/datasets/first_principles_kepler/summary_stats.tsv b/datasets/first_principles_kepler/summary_stats.tsv new file mode 100644 index 00000000..8817da3c --- /dev/null +++ b/datasets/first_principles_kepler/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_kepler 6 1 0 0 1 continuous 6.0 0.0 regression diff --git a/datasets/first_principles_leavitt/README.md b/datasets/first_principles_leavitt/README.md new file mode 100644 index 00000000..f3b09005 --- /dev/null +++ b/datasets/first_principles_leavitt/README.md @@ -0,0 +1,6 @@ +# first_principles_leavitt + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_leavitt.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_leavitt/metadata.yaml b/datasets/first_principles_leavitt/metadata.yaml new file mode 100644 index 00000000..262fe212 --- /dev/null +++ b/datasets/first_principles_leavitt/metadata.yaml @@ -0,0 +1,21 @@ +# Created by Guilherme Aldeia (@galdeia) +dataset: first_principles_leavitt +description: | + The Leavitt dataset models the relationship between the luminosity of a star and its period. The dataset is based on the Leavitt Law. + + The data was taken from 1912 paper by Leavitt. y-axis is magnitude at maxima and minima. x-axis is logarithm of period in days. + +source: publication repository https://github.com/MilesCranmer/PySR +publication: Interpretable machine learning for science with PySR and SymbolicRegression, CRANMER, Miles, arXiv preprint arXiv:2305.01582, 2023. +task: regression +keywords: + - symbolic regression + - physics + - first principles +target: + type: continuous + description: Magnitude +features: + - name: logP + type: continuous + description: Logarithm of period in days \ No newline at end of file diff --git a/datasets/first_principles_leavitt/summary_stats.tsv b/datasets/first_principles_leavitt/summary_stats.tsv new file mode 100644 index 00000000..fffab31e --- /dev/null +++ b/datasets/first_principles_leavitt/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_leavitt 26 1 0 0 1 continuous 26.0 0.0 regression diff --git a/datasets/first_principles_newton/README.md b/datasets/first_principles_newton/README.md new file mode 100644 index 00000000..7c572889 --- /dev/null +++ b/datasets/first_principles_newton/README.md @@ -0,0 +1,6 @@ +# first_principles_newton + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_newton.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_newton/metadata.yaml b/datasets/first_principles_newton/metadata.yaml new file mode 100644 index 00000000..41a417ea --- /dev/null +++ b/datasets/first_principles_newton/metadata.yaml @@ -0,0 +1,27 @@ +# Created by Guilherme Aldeia (@galdeia) +dataset: first_principles_newton +description: | + The Newton's law of gravitation is a model for the gravitational force between two bodies, given their masses and distance. + + Data was synthetically generated using the Newton's law of gravitation, with a range of parameters using the scripts in the publication repository. + +source: publication repository https://github.com/MilesCranmer/PySR +publication: Interpretable machine learning for science with PySR and SymbolicRegression, CRANMER, Miles, arXiv preprint arXiv:2305.01582, 2023. +task: regression +keywords: + - symbolic regression + - physics + - first principles +target: + type: continuous + description: x-component of force +features: + - name: r + type: continuous + description: distance in m + - name: m1 + type: continuous + description: mass of first object, kg + - name: m2 + type: continuous + description: mass of second object, kg diff --git a/datasets/first_principles_newton/summary_stats.tsv b/datasets/first_principles_newton/summary_stats.tsv new file mode 100644 index 00000000..97de34db --- /dev/null +++ b/datasets/first_principles_newton/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_newton 30 3 0 0 3 continuous 30.0 0.0 regression diff --git a/datasets/first_principles_planck/README.md b/datasets/first_principles_planck/README.md new file mode 100644 index 00000000..5fe7fa5c --- /dev/null +++ b/datasets/first_principles_planck/README.md @@ -0,0 +1,6 @@ +# first_principles_planck + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_planck.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_planck/metadata.yaml b/datasets/first_principles_planck/metadata.yaml new file mode 100644 index 00000000..25c74aac --- /dev/null +++ b/datasets/first_principles_planck/metadata.yaml @@ -0,0 +1,24 @@ +# Created by Guilherme Aldeia (@galdeia) +dataset: first_principles_planck +description: | + The Planck's law is a model for the spectral radiance of a black body, given it's temperature and frequency. + + Data was synthetically generated using the Planck's law, with a range of parameters using the scripts in the publication repository. + +source: publication repository https://github.com/MilesCranmer/PySR +publication: Interpretable machine learning for science with PySR and SymbolicRegression, CRANMER, Miles, arXiv preprint arXiv:2305.01582, 2023. +task: regression +keywords: + - symbolic regression + - physics + - first principles +target: + type: continuous + description: spectral radiance +features: + - name: nu + type: continuous + description: frequency in Hz + - name: T + type: continuous + description: temperature \ No newline at end of file diff --git a/datasets/first_principles_planck/summary_stats.tsv b/datasets/first_principles_planck/summary_stats.tsv new file mode 100644 index 00000000..2ddacbac --- /dev/null +++ b/datasets/first_principles_planck/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_planck 100 2 0 0 2 continuous 100.0 0.0 regression diff --git a/datasets/first_principles_rydberg/README.md b/datasets/first_principles_rydberg/README.md new file mode 100644 index 00000000..3b14b427 --- /dev/null +++ b/datasets/first_principles_rydberg/README.md @@ -0,0 +1,6 @@ +# first_principles_rydberg + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_rydberg.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_rydberg/metadata.yaml b/datasets/first_principles_rydberg/metadata.yaml new file mode 100644 index 00000000..b39bc11f --- /dev/null +++ b/datasets/first_principles_rydberg/metadata.yaml @@ -0,0 +1,24 @@ +# Created by Guilherme Aldeia (@galdeia) +dataset: first_principles_rydberg +description: | + The Rydberg formula is a model for the energy levels of an electron in a hydrogen atom. + + Data was synthetically generated using the Rydberg formula, with a range of parameters using the scripts in the publication repository. + +source: publication repository https://github.com/MilesCranmer/PySR +publication: Interpretable machine learning for science with PySR and SymbolicRegression, CRANMER, Miles, arXiv preprint arXiv:2305.01582, 2023. +task: regression +keywords: + - symbolic regression + - physics + - first principles +target: + type: continuous + description: wavelength, m +features: + - name: n_1 + type: categorical + description: principal quantum number + - name: n_2 + type: categorical + description: principal wuantum number after the transition \ No newline at end of file diff --git a/datasets/first_principles_rydberg/summary_stats.tsv b/datasets/first_principles_rydberg/summary_stats.tsv new file mode 100644 index 00000000..a052c301 --- /dev/null +++ b/datasets/first_principles_rydberg/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_rydberg 50 2 0 2 0 continuous 50.0 0.0 regression diff --git a/datasets/first_principles_schechter/README.md b/datasets/first_principles_schechter/README.md new file mode 100644 index 00000000..13740441 --- /dev/null +++ b/datasets/first_principles_schechter/README.md @@ -0,0 +1,6 @@ +# first_principles_schechter + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_schechter.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_schechter/metadata.yaml b/datasets/first_principles_schechter/metadata.yaml new file mode 100644 index 00000000..1549ba0e --- /dev/null +++ b/datasets/first_principles_schechter/metadata.yaml @@ -0,0 +1,21 @@ +# Created by Guilherme Aldeia (@galdeia) +dataset: first_principles_schechter +description: | + The Schechter function is a model for the density of galaxies, given it's luminosity. + + Data was synthetically generated using the Schechter function, with a range of parameters using the scripts in the publication repository. + +source: publication repository https://github.com/MilesCranmer/PySR +publication: Interpretable machine learning for science with PySR and SymbolicRegression, CRANMER, Miles, arXiv preprint arXiv:2305.01582, 2023. +task: regression +keywords: + - symbolic regression + - physics + - first principles +target: + type: continuous + description: number density +features: + - name: L + type: continuous + description: luminosity diff --git a/datasets/first_principles_schechter/summary_stats.tsv b/datasets/first_principles_schechter/summary_stats.tsv new file mode 100644 index 00000000..973ba127 --- /dev/null +++ b/datasets/first_principles_schechter/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_schechter 27 1 0 0 1 continuous 27.0 0.0 regression diff --git a/datasets/first_principles_supernovae_zg/README.md b/datasets/first_principles_supernovae_zg/README.md new file mode 100644 index 00000000..032ed7ca --- /dev/null +++ b/datasets/first_principles_supernovae_zg/README.md @@ -0,0 +1,6 @@ +# first_principles_supernovae_zg + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_supernovae_zg.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_supernovae_zg/metadata.yaml b/datasets/first_principles_supernovae_zg/metadata.yaml new file mode 100644 index 00000000..0a0da2e6 --- /dev/null +++ b/datasets/first_principles_supernovae_zg/metadata.yaml @@ -0,0 +1,18 @@ +# Created by Guilherme Aldeia (@galdeia) +dataset: first_principles_supernovae_zr +description: | + A real-world dataset containing the normalized flux of a supernova explosion from ZTF Data Release 17. The original publication has data for 6 different events, and here we include data from only one of them (`real_data/supernovae/examples/example1.csv`). +source: publication repository https://github.com/erusseil/MvSR-analysis +publication: Etienne Russeil, Fabricio Olivetti de Franca, Konstantin Malanchev, Bogdan Burlacu, Emille Ishida, Marion Leroux, Clément Michelin, Guillaume Moinard, and Emmanuel Gangler. 2024. Multiview Symbolic Regression. In Proceedings of the Genetic and Evolutionary Computation Conference (GECCO '24). Association for Computing Machinery, New York, NY, USA, 961-970. https://doi.org/10.1145/3638529.3654087 +task: regression +keywords: + - symbolic regression + - physics + - first principles +target: + type: continuous + description: Normalized radiation flux over time +features: + - name: Xaxis0 + type: continuous + description: Days after observed peak \ No newline at end of file diff --git a/datasets/first_principles_supernovae_zg/summary_stats.tsv b/datasets/first_principles_supernovae_zg/summary_stats.tsv new file mode 100644 index 00000000..5de6f7ec --- /dev/null +++ b/datasets/first_principles_supernovae_zg/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_supernovae_zg 243 1 0 0 1 continuous 243.0 0.0 regression diff --git a/datasets/first_principles_supernovae_zr/README.md b/datasets/first_principles_supernovae_zr/README.md new file mode 100644 index 00000000..419245e7 --- /dev/null +++ b/datasets/first_principles_supernovae_zr/README.md @@ -0,0 +1,6 @@ +# first_principles_supernovae_zr + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_supernovae_zr.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_supernovae_zr/metadata.yaml b/datasets/first_principles_supernovae_zr/metadata.yaml new file mode 100644 index 00000000..2b101132 --- /dev/null +++ b/datasets/first_principles_supernovae_zr/metadata.yaml @@ -0,0 +1,18 @@ +# Created by Guilherme Aldeia (@galdeia) +dataset: first_principles_supernovae_zr +description: | + A real-world dataset containing the normalized flux of a supernova explosion from ZTF Data Release 17. The original publication has data for 6 different events, and here we include data from only one of them (`real_data/supernovae/examples/example2.csv`). +source: publication repository https://github.com/erusseil/MvSR-analysis +publication: Etienne Russeil, Fabricio Olivetti de Franca, Konstantin Malanchev, Bogdan Burlacu, Emille Ishida, Marion Leroux, Clément Michelin, Guillaume Moinard, and Emmanuel Gangler. 2024. Multiview Symbolic Regression. In Proceedings of the Genetic and Evolutionary Computation Conference (GECCO '24). Association for Computing Machinery, New York, NY, USA, 961-970. https://doi.org/10.1145/3638529.3654087 +task: regression +keywords: + - symbolic regression + - physics + - first principles +target: + type: continuous + description: Normalized radiation flux over time +features: + - name: Xaxis0 + type: continuous + description: Days after observed peak \ No newline at end of file diff --git a/datasets/first_principles_supernovae_zr/summary_stats.tsv b/datasets/first_principles_supernovae_zr/summary_stats.tsv new file mode 100644 index 00000000..16d45243 --- /dev/null +++ b/datasets/first_principles_supernovae_zr/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_supernovae_zr 236 1 0 0 1 continuous 236.0 0.0 regression diff --git a/datasets/first_principles_tully_fisher/README.md b/datasets/first_principles_tully_fisher/README.md new file mode 100644 index 00000000..11aafa73 --- /dev/null +++ b/datasets/first_principles_tully_fisher/README.md @@ -0,0 +1,6 @@ +# first_principles_tully_fisher + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/first_principles_tully_fisher.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/first_principles_tully_fisher/metadata.yaml b/datasets/first_principles_tully_fisher/metadata.yaml new file mode 100644 index 00000000..0ac0cb5f --- /dev/null +++ b/datasets/first_principles_tully_fisher/metadata.yaml @@ -0,0 +1,21 @@ +# Created by Guilherme Aldeia (@galdeia) +dataset: first_principles_tully_fisher +description: | + The Tully-Fisher relation is an empirical correlation between the luminosity of a spiral galaxy and its rotational velocity, named after astronomers Richard Tully and J. Richard Fisher, who first proposed it in 1977, based on physical principles. + The data was extracted from Fig. 5(a) of Tully and Fisher publication. + + The original relation is: L \propto \Delta V(0)^{2.5} +source: publication repository https://github.com/MilesCranmer/PySR +publication: Interpretable machine learning for science with PySR and SymbolicRegression, CRANMER, Miles, arXiv preprint arXiv:2305.01582, 2023. +task: regression +keywords: + - symbolic regression + - physics + - first principles +target: + type: continuous + description: luminosity +features: + - name: DV + type: continuous + description: rotational velocity \ No newline at end of file diff --git a/datasets/first_principles_tully_fisher/summary_stats.tsv b/datasets/first_principles_tully_fisher/summary_stats.tsv new file mode 100644 index 00000000..4cf58f46 --- /dev/null +++ b/datasets/first_principles_tully_fisher/summary_stats.tsv @@ -0,0 +1,2 @@ +dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task +first_principles_tully_fisher 18 1 0 0 1 continuous 18.0 0.0 regression From 253f0dd01c5d72ca7e847ab0a57e51901a2b6132 Mon Sep 17 00:00:00 2001 From: gAldeia Date: Thu, 20 Feb 2025 09:05:33 -0300 Subject: [PATCH 5/8] Updated summary --- pmlb/all_summary_stats.tsv | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pmlb/all_summary_stats.tsv b/pmlb/all_summary_stats.tsv index 9011bd80..8f8e6445 100644 --- a/pmlb/all_summary_stats.tsv +++ b/pmlb/all_summary_stats.tsv @@ -310,6 +310,19 @@ feynman_test_6 100000 7 0 0 7 continuous 100000.0 0.0 regression feynman_test_7 100000 5 0 0 5 continuous 100000.0 0.0 regression feynman_test_8 100000 4 0 0 4 continuous 100000.0 0.0 regression feynman_test_9 100000 5 0 0 5 continuous 100000.0 0.0 regression +first_principles_absorption 14 1 0 0 1 continuous 14.0 0.0 regression +first_principles_bode 8 1 0 1 0 continuous 8.0 0.0 regression +first_principles_hubble 32 1 0 0 1 continuous 32.0 0.0 regression +first_principles_ideal_gas 30 3 0 0 3 continuous 30.0 0.0 regression +first_principles_kepler 6 1 0 0 1 continuous 6.0 0.0 regression +first_principles_leavitt 26 1 0 0 1 continuous 26.0 0.0 regression +first_principles_newton 30 3 0 0 3 continuous 30.0 0.0 regression +first_principles_planck 100 2 0 0 2 continuous 100.0 0.0 regression +first_principles_rydberg 50 2 0 2 0 continuous 50.0 0.0 regression +first_principles_schechter 27 1 0 0 1 continuous 27.0 0.0 regression +first_principles_supernovae_zg 243 1 0 0 1 continuous 243.0 0.0 regression +first_principles_supernovae_zr 236 1 0 0 1 continuous 236.0 0.0 regression +first_principles_tully_fisher 18 1 0 0 1 continuous 18.0 0.0 regression flags 178 43 36 5 2 categorical 5.0 0.04391806590077 classification flare 1066 10 4 6 0 categorical 2.0 0.4336704342653181 classification german 1000 20 3 14 3 categorical 2.0 0.1599999999999999 classification @@ -371,6 +384,18 @@ prnn_crabs 200 7 1 0 6 categorical 2.0 0.0 classification prnn_fglass 205 9 0 0 9 categorical 5.0 0.1061867935752528 classification prnn_synth 250 2 0 0 2 categorical 2.0 0.0 classification profb 672 9 1 2 6 categorical 2.0 0.1111111111111111 classification +rethinking_feynman-i_30_3 10000 3 0 0 3 continuous 10000.0 0.0 regression +rethinking_feynman-i_32_17 10000 4 0 0 4 continuous 10000.0 0.0 regression +rethinking_feynman-i_40_1 10000 4 0 0 4 continuous 10000.0 0.0 regression +rethinking_feynman-i_44_4 10000 4 0 0 4 continuous 10000.0 0.0 regression +rethinking_feynman-ii_11_20 10000 4 0 0 4 continuous 10000.0 0.0 regression +rethinking_feynman-ii_11_27 10000 3 0 0 3 continuous 10000.0 0.0 regression +rethinking_feynman-ii_35_21 10000 4 0 0 4 continuous 10000.0 0.0 regression +rethinking_feynman-ii_36_38 10000 5 0 0 5 continuous 10000.0 0.0 regression +rethinking_feynman-ii_6_15b 10000 3 0 0 3 continuous 10000.0 0.0 regression +rethinking_feynman-iii_10_19 10000 4 0 0 4 continuous 10000.0 0.0 regression +rethinking_feynman-iii_21_20 10000 4 0 0 4 continuous 10000.0 0.0 regression +rethinking_feynman-iii_9_52 10000 5 0 0 5 continuous 10000.0 0.0 regression ring 7400 20 0 0 20 categorical 2.0 9.46676406135857e-05 classification saheart 462 9 1 0 8 categorical 2.0 0.0944697438203931 classification satimage 6435 36 0 0 36 categorical 6.0 0.0276087321122286 classification From 641950a49d3783d93f0fa4434b0e4f6fca93a5e3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 20 Feb 2025 12:08:41 +0000 Subject: [PATCH 6/8] update dataset files Created by https://github.com/gAldeia/pmlb/actions/runs/13434894123\nfrom bdc87c8 on 2025-02-20 --- pmlb/all_summary_stats.tsv | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pmlb/all_summary_stats.tsv b/pmlb/all_summary_stats.tsv index 8f8e6445..8fe13be2 100644 --- a/pmlb/all_summary_stats.tsv +++ b/pmlb/all_summary_stats.tsv @@ -384,18 +384,6 @@ prnn_crabs 200 7 1 0 6 categorical 2.0 0.0 classification prnn_fglass 205 9 0 0 9 categorical 5.0 0.1061867935752528 classification prnn_synth 250 2 0 0 2 categorical 2.0 0.0 classification profb 672 9 1 2 6 categorical 2.0 0.1111111111111111 classification -rethinking_feynman-i_30_3 10000 3 0 0 3 continuous 10000.0 0.0 regression -rethinking_feynman-i_32_17 10000 4 0 0 4 continuous 10000.0 0.0 regression -rethinking_feynman-i_40_1 10000 4 0 0 4 continuous 10000.0 0.0 regression -rethinking_feynman-i_44_4 10000 4 0 0 4 continuous 10000.0 0.0 regression -rethinking_feynman-ii_11_20 10000 4 0 0 4 continuous 10000.0 0.0 regression -rethinking_feynman-ii_11_27 10000 3 0 0 3 continuous 10000.0 0.0 regression -rethinking_feynman-ii_35_21 10000 4 0 0 4 continuous 10000.0 0.0 regression -rethinking_feynman-ii_36_38 10000 5 0 0 5 continuous 10000.0 0.0 regression -rethinking_feynman-ii_6_15b 10000 3 0 0 3 continuous 10000.0 0.0 regression -rethinking_feynman-iii_10_19 10000 4 0 0 4 continuous 10000.0 0.0 regression -rethinking_feynman-iii_21_20 10000 4 0 0 4 continuous 10000.0 0.0 regression -rethinking_feynman-iii_9_52 10000 5 0 0 5 continuous 10000.0 0.0 regression ring 7400 20 0 0 20 categorical 2.0 9.46676406135857e-05 classification saheart 462 9 1 0 8 categorical 2.0 0.0944697438203931 classification satimage 6435 36 0 0 36 categorical 6.0 0.0276087321122286 classification From f02e9f9ce04bfc12b6154f61acbf8d995c2bec83 Mon Sep 17 00:00:00 2001 From: gAldeia Date: Thu, 20 Feb 2025 13:38:35 -0300 Subject: [PATCH 7/8] Fix typo --- datasets/first_principles_supernovae_zg/metadata.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/first_principles_supernovae_zg/metadata.yaml b/datasets/first_principles_supernovae_zg/metadata.yaml index 71153bf1..95a21526 100644 --- a/datasets/first_principles_supernovae_zg/metadata.yaml +++ b/datasets/first_principles_supernovae_zg/metadata.yaml @@ -1,5 +1,5 @@ # Created by Guilherme Aldeia (@galdeia) -dataset: first_principles_supernovae_zr +dataset: first_principles_supernovae_zg description: | A real-world dataset containing the normalized flux of a supernova explosion from ZTF Data Release 17. The original publication has data for 6 different events, and here we include data from only one of them (`real_data/supernovae/examples/example1.csv`). source: publication repository https://github.com/erusseil/MvSR-analysis From f7105105c8eaf6a225fceccac92c15224654048b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 21 Feb 2025 21:44:32 +0000 Subject: [PATCH 8/8] update dataset files Created by https://github.com/gAldeia/pmlb/actions/runs/13465733857\nfrom a226e6b on 2025-02-21 --- .lfs-assets-id | 1 + datasets/auto_insurance_losses/README.md | 6 ++++ .../auto_insurance_losses/summary_stats.tsv | 2 +- datasets/auto_insurance_price/README.md | 6 ++++ .../auto_insurance_price/summary_stats.tsv | 2 +- datasets/auto_insurance_symboling/README.md | 6 ++++ .../summary_stats.tsv | 2 +- .../README.md | 6 ++++ .../summary_stats.tsv | 2 +- .../README.md | 6 ++++ .../summary_stats.tsv | 2 +- datasets/car_evaluation/README.md | 6 ++++ datasets/car_evaluation/summary_stats.tsv | 2 +- .../congressional_voting_records/README.md | 6 ++++ .../summary_stats.tsv | 2 +- datasets/contraceptive_method/README.md | 6 ++++ .../contraceptive_method/summary_stats.tsv | 2 +- datasets/credit_approval_australia/README.md | 6 ++++ .../summary_stats.tsv | 2 +- datasets/credit_approval_germany/README.md | 6 ++++ .../credit_approval_germany/summary_stats.tsv | 2 +- datasets/heart_disease_cleveland/README.md | 6 ++++ .../heart_disease_cleveland/summary_stats.tsv | 2 +- datasets/heart_disease_hungarian/README.md | 6 ++++ .../heart_disease_hungarian/summary_stats.tsv | 2 +- .../heart_disease_va_long_beach/README.md | 6 ++++ .../summary_stats.tsv | 2 +- datasets/heart_disease_zurich/README.md | 6 ++++ .../heart_disease_zurich/summary_stats.tsv | 2 +- datasets/horse_colic_lesion_type/README.md | 6 ++++ .../horse_colic_lesion_type/summary_stats.tsv | 2 +- datasets/horse_colic_outcome/README.md | 6 ++++ .../horse_colic_outcome/summary_stats.tsv | 2 +- datasets/horse_colic_surgery/README.md | 6 ++++ .../horse_colic_surgery/summary_stats.tsv | 2 +- datasets/solar_flare/README.md | 6 ++++ datasets/solar_flare/summary_stats.tsv | 2 +- pmlb/all_summary_stats.tsv | 34 +++++++++---------- 38 files changed, 144 insertions(+), 35 deletions(-) create mode 100644 datasets/auto_insurance_losses/README.md create mode 100644 datasets/auto_insurance_price/README.md create mode 100644 datasets/auto_insurance_symboling/README.md create mode 100644 datasets/breast_cancer_wisconsin_diagnostic/README.md create mode 100644 datasets/breast_cancer_wisconsin_original/README.md create mode 100644 datasets/car_evaluation/README.md create mode 100644 datasets/congressional_voting_records/README.md create mode 100644 datasets/contraceptive_method/README.md create mode 100644 datasets/credit_approval_australia/README.md create mode 100644 datasets/credit_approval_germany/README.md create mode 100644 datasets/heart_disease_cleveland/README.md create mode 100644 datasets/heart_disease_hungarian/README.md create mode 100644 datasets/heart_disease_va_long_beach/README.md create mode 100644 datasets/heart_disease_zurich/README.md create mode 100644 datasets/horse_colic_lesion_type/README.md create mode 100644 datasets/horse_colic_outcome/README.md create mode 100644 datasets/horse_colic_surgery/README.md create mode 100644 datasets/solar_flare/README.md diff --git a/.lfs-assets-id b/.lfs-assets-id index 41b5131b..a2d2a5f0 100644 --- a/.lfs-assets-id +++ b/.lfs-assets-id @@ -44,6 +44,7 @@ 1ee8cf9693351db7afe68f6fc32942845caae3e1030c688efa6c5d0b24229f46 1f5cf829d2e58032e5d9067f1e7bf3fe7644cd5fe2825c81ef7fbaa445f496a7 1f69f25b0168c39018c214ae39f2bb8fe6da97e1df389c2cb88cf9bde2f08ace +21586d500f0961c0d2c8296644e3ec269e2ecf783f3c39c0fdf1dc6159edee0a 21d506c397dfeb3edbbbc253b923f59be6edb516689677ebef535296c6c62242 22053c4cdaaaf6169d90e4ee03f8b994cb682281ab6787bf14c617301db5663b 22baa768886091d61e13e894610dfea3435dfd201f8300fbebe46cd6cf814c0b diff --git a/datasets/auto_insurance_losses/README.md b/datasets/auto_insurance_losses/README.md new file mode 100644 index 00000000..30753b15 --- /dev/null +++ b/datasets/auto_insurance_losses/README.md @@ -0,0 +1,6 @@ +# auto_insurance_losses + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/auto_insurance_losses.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/auto_insurance_losses/summary_stats.tsv b/datasets/auto_insurance_losses/summary_stats.tsv index 4ec2ff83..a0379006 100644 --- a/datasets/auto_insurance_losses/summary_stats.tsv +++ b/datasets/auto_insurance_losses/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -auto_insurance_losses 164 24 3 7 14 continuous 51 0.00844289113622844 regression +auto_insurance_losses 164 24 3 6 15 categorical 51.0 0.008442891136228436 regression diff --git a/datasets/auto_insurance_price/README.md b/datasets/auto_insurance_price/README.md new file mode 100644 index 00000000..790e2c1a --- /dev/null +++ b/datasets/auto_insurance_price/README.md @@ -0,0 +1,6 @@ +# auto_insurance_price + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/auto_insurance_price.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/auto_insurance_price/summary_stats.tsv b/datasets/auto_insurance_price/summary_stats.tsv index 8ddf9f1f..bc1e27f5 100644 --- a/datasets/auto_insurance_price/summary_stats.tsv +++ b/datasets/auto_insurance_price/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -auto_insurance_price 201 23 4 6 13 continuous 186 0.000343181229792947 regression +auto_insurance_price 201 23 4 5 14 categorical 186.0 0.0003431812297929476 regression diff --git a/datasets/auto_insurance_symboling/README.md b/datasets/auto_insurance_symboling/README.md new file mode 100644 index 00000000..c7ad853a --- /dev/null +++ b/datasets/auto_insurance_symboling/README.md @@ -0,0 +1,6 @@ +# auto_insurance_symboling + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/auto_insurance_symboling.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/auto_insurance_symboling/summary_stats.tsv b/datasets/auto_insurance_symboling/summary_stats.tsv index 11c1699a..2f34847d 100644 --- a/datasets/auto_insurance_symboling/summary_stats.tsv +++ b/datasets/auto_insurance_symboling/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -auto_insurance_symboling 205 24 4 6 14 ordinal 6 0.0755788221296847 classification +auto_insurance_symboling 205 24 4 5 15 categorical 6.0 0.07557882212968471 classification diff --git a/datasets/breast_cancer_wisconsin_diagnostic/README.md b/datasets/breast_cancer_wisconsin_diagnostic/README.md new file mode 100644 index 00000000..d234bbe8 --- /dev/null +++ b/datasets/breast_cancer_wisconsin_diagnostic/README.md @@ -0,0 +1,6 @@ +# breast_cancer_wisconsin_diagnostic + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/breast_cancer_wisconsin_diagnostic.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/breast_cancer_wisconsin_diagnostic/summary_stats.tsv b/datasets/breast_cancer_wisconsin_diagnostic/summary_stats.tsv index 7cdefa1b..2f7487e9 100644 --- a/datasets/breast_cancer_wisconsin_diagnostic/summary_stats.tsv +++ b/datasets/breast_cancer_wisconsin_diagnostic/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -breast_cancer_wisconsin_diagnostic 569 30 0 0 30 binary 2 0.064939878490615 classification +breast_cancer_wisconsin_diagnostic 569 30 0 0 30 categorical 2.0 0.06493987849061501 classification diff --git a/datasets/breast_cancer_wisconsin_original/README.md b/datasets/breast_cancer_wisconsin_original/README.md new file mode 100644 index 00000000..f48708a6 --- /dev/null +++ b/datasets/breast_cancer_wisconsin_original/README.md @@ -0,0 +1,6 @@ +# breast_cancer_wisconsin_original + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/breast_cancer_wisconsin_original.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/breast_cancer_wisconsin_original/summary_stats.tsv b/datasets/breast_cancer_wisconsin_original/summary_stats.tsv index a8ce8825..dad51799 100644 --- a/datasets/breast_cancer_wisconsin_original/summary_stats.tsv +++ b/datasets/breast_cancer_wisconsin_original/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -breast_cancer_wisconsin_original 699 9 0 0 9 binary 2 0.0963751609186227 classification +breast_cancer_wisconsin_original 699 9 0 9 0 categorical 2.0 0.09637516091862275 classification diff --git a/datasets/car_evaluation/README.md b/datasets/car_evaluation/README.md new file mode 100644 index 00000000..e2eafe3a --- /dev/null +++ b/datasets/car_evaluation/README.md @@ -0,0 +1,6 @@ +# car_evaluation + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/car_evaluation.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/car_evaluation/summary_stats.tsv b/datasets/car_evaluation/summary_stats.tsv index cc39d15e..e4c9766a 100644 --- a/datasets/car_evaluation/summary_stats.tsv +++ b/datasets/car_evaluation/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -car_evaluation 1728 6 0 6 0 categorical 4 0.390288315900777 classification +car_evaluation 1728 6 0 6 0 categorical 4.0 0.39028831590077734 classification diff --git a/datasets/congressional_voting_records/README.md b/datasets/congressional_voting_records/README.md new file mode 100644 index 00000000..18aa8288 --- /dev/null +++ b/datasets/congressional_voting_records/README.md @@ -0,0 +1,6 @@ +# congressional_voting_records + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/congressional_voting_records.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/congressional_voting_records/summary_stats.tsv b/datasets/congressional_voting_records/summary_stats.tsv index 1600265f..95be8a81 100644 --- a/datasets/congressional_voting_records/summary_stats.tsv +++ b/datasets/congressional_voting_records/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -congressional_voting_records 435 16 0 16 0 binary 2 0.0517954815695601 classification +congressional_voting_records 435 16 0 16 0 categorical 2.0 0.05179548156956005 classification diff --git a/datasets/contraceptive_method/README.md b/datasets/contraceptive_method/README.md new file mode 100644 index 00000000..b38d9ba3 --- /dev/null +++ b/datasets/contraceptive_method/README.md @@ -0,0 +1,6 @@ +# contraceptive_method + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/contraceptive_method.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/contraceptive_method/summary_stats.tsv b/datasets/contraceptive_method/summary_stats.tsv index 295e962e..8eb5382e 100644 --- a/datasets/contraceptive_method/summary_stats.tsv +++ b/datasets/contraceptive_method/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -contraceptive_method 1473 9 3 4 2 categorical 3 0.030700608232641 classification +contraceptive_method 1473 9 3 4 2 categorical 3.0 0.03070060823264104 classification diff --git a/datasets/credit_approval_australia/README.md b/datasets/credit_approval_australia/README.md new file mode 100644 index 00000000..9188ead2 --- /dev/null +++ b/datasets/credit_approval_australia/README.md @@ -0,0 +1,6 @@ +# credit_approval_australia + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/credit_approval_australia.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/credit_approval_australia/summary_stats.tsv b/datasets/credit_approval_australia/summary_stats.tsv index 62aac613..fc62984f 100644 --- a/datasets/credit_approval_australia/summary_stats.tsv +++ b/datasets/credit_approval_australia/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -credit_approval_australia 690 15 4 5 6 binary 2 0.0121319050619618 classification +credit_approval_australia 690 15 4 4 7 categorical 2.0 0.012131905061961762 classification diff --git a/datasets/credit_approval_germany/README.md b/datasets/credit_approval_germany/README.md new file mode 100644 index 00000000..fb8b9682 --- /dev/null +++ b/datasets/credit_approval_germany/README.md @@ -0,0 +1,6 @@ +# credit_approval_germany + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/credit_approval_germany.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/credit_approval_germany/summary_stats.tsv b/datasets/credit_approval_germany/summary_stats.tsv index 5ad62c43..75fde931 100644 --- a/datasets/credit_approval_germany/summary_stats.tsv +++ b/datasets/credit_approval_germany/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -credit_approval_germany 1000 20 3 14 3 binary 2 0.16 classification +credit_approval_germany 1000 20 3 14 3 categorical 2.0 0.15999999999999998 classification diff --git a/datasets/heart_disease_cleveland/README.md b/datasets/heart_disease_cleveland/README.md new file mode 100644 index 00000000..97340127 --- /dev/null +++ b/datasets/heart_disease_cleveland/README.md @@ -0,0 +1,6 @@ +# heart_disease_cleveland + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/heart_disease_cleveland.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/heart_disease_cleveland/summary_stats.tsv b/datasets/heart_disease_cleveland/summary_stats.tsv index dbf7b22c..145b3973 100644 --- a/datasets/heart_disease_cleveland/summary_stats.tsv +++ b/datasets/heart_disease_cleveland/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -heart_disease_cleveland 303 13 3 4 6 binary 2 0.00680761145421473 classification +heart_disease_cleveland 303 13 3 5 5 categorical 2.0 0.006807611454214728 classification diff --git a/datasets/heart_disease_hungarian/README.md b/datasets/heart_disease_hungarian/README.md new file mode 100644 index 00000000..706232dc --- /dev/null +++ b/datasets/heart_disease_hungarian/README.md @@ -0,0 +1,6 @@ +# heart_disease_hungarian + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/heart_disease_hungarian.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/heart_disease_hungarian/summary_stats.tsv b/datasets/heart_disease_hungarian/summary_stats.tsv index d4a97619..0dcc1b6e 100644 --- a/datasets/heart_disease_hungarian/summary_stats.tsv +++ b/datasets/heart_disease_hungarian/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -heart_disease_hungarian 294 13 3 4 6 binary 2 0.0777916608820399 classification +heart_disease_hungarian 294 13 4 4 5 categorical 2.0 0.07779166088203991 classification diff --git a/datasets/heart_disease_va_long_beach/README.md b/datasets/heart_disease_va_long_beach/README.md new file mode 100644 index 00000000..5bd43389 --- /dev/null +++ b/datasets/heart_disease_va_long_beach/README.md @@ -0,0 +1,6 @@ +# heart_disease_va_long_beach + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/heart_disease_va_long_beach.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/heart_disease_va_long_beach/summary_stats.tsv b/datasets/heart_disease_va_long_beach/summary_stats.tsv index fb8c5f36..36f0d3f8 100644 --- a/datasets/heart_disease_va_long_beach/summary_stats.tsv +++ b/datasets/heart_disease_va_long_beach/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -heart_disease_va_long_beach 200 13 3 4 6 binary 2 0.2401 classification +heart_disease_va_long_beach 200 13 3 5 5 categorical 2.0 0.24009999999999998 classification diff --git a/datasets/heart_disease_zurich/README.md b/datasets/heart_disease_zurich/README.md new file mode 100644 index 00000000..ae5e8d5c --- /dev/null +++ b/datasets/heart_disease_zurich/README.md @@ -0,0 +1,6 @@ +# heart_disease_zurich + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/heart_disease_zurich.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/heart_disease_zurich/summary_stats.tsv b/datasets/heart_disease_zurich/summary_stats.tsv index 94d5ce6b..661fdd2b 100644 --- a/datasets/heart_disease_zurich/summary_stats.tsv +++ b/datasets/heart_disease_zurich/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -heart_disease_zurich 123 12 3 4 5 binary 2 0.756758543195188 classification +heart_disease_zurich 123 12 4 4 4 categorical 2.0 0.756758543195188 classification diff --git a/datasets/horse_colic_lesion_type/README.md b/datasets/horse_colic_lesion_type/README.md new file mode 100644 index 00000000..acf0700d --- /dev/null +++ b/datasets/horse_colic_lesion_type/README.md @@ -0,0 +1,6 @@ +# horse_colic_lesion_type + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/horse_colic_lesion_type.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/horse_colic_lesion_type/summary_stats.tsv b/datasets/horse_colic_lesion_type/summary_stats.tsv index 503f4ed3..b90436e4 100644 --- a/datasets/horse_colic_lesion_type/summary_stats.tsv +++ b/datasets/horse_colic_lesion_type/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -horse_colic_lesion_type 368 20 2 11 7 categorical 5 0.113506468572779 classification +horse_colic_lesion_type 368 20 1 12 7 categorical 5.0 0.1135064685727788 classification diff --git a/datasets/horse_colic_outcome/README.md b/datasets/horse_colic_outcome/README.md new file mode 100644 index 00000000..6cf35c7a --- /dev/null +++ b/datasets/horse_colic_outcome/README.md @@ -0,0 +1,6 @@ +# horse_colic_outcome + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/horse_colic_outcome.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/horse_colic_outcome/summary_stats.tsv b/datasets/horse_colic_outcome/summary_stats.tsv index e2af62bb..1329b0c3 100644 --- a/datasets/horse_colic_outcome/summary_stats.tsv +++ b/datasets/horse_colic_outcome/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -horse_colic_outcome 366 21 3 11 7 categorical 3 0.185859535967034 classification +horse_colic_outcome 366 21 2 12 7 categorical 3.0 0.18585953596703395 classification diff --git a/datasets/horse_colic_surgery/README.md b/datasets/horse_colic_surgery/README.md new file mode 100644 index 00000000..b834533f --- /dev/null +++ b/datasets/horse_colic_surgery/README.md @@ -0,0 +1,6 @@ +# horse_colic_surgery + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/horse_colic_surgery.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/horse_colic_surgery/summary_stats.tsv b/datasets/horse_colic_surgery/summary_stats.tsv index fc1add79..319d42f7 100644 --- a/datasets/horse_colic_surgery/summary_stats.tsv +++ b/datasets/horse_colic_surgery/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -horse_colic_surgery 368 20 2 11 7 binary 2 0.0680529300567108 classification +horse_colic_surgery 368 20 1 12 7 categorical 2.0 0.0680529300567108 classification diff --git a/datasets/solar_flare/README.md b/datasets/solar_flare/README.md new file mode 100644 index 00000000..2958bbf6 --- /dev/null +++ b/datasets/solar_flare/README.md @@ -0,0 +1,6 @@ +# solar_flare + +[**Pandas Profiling Report**](https://epistasislab.github.io/pmlb/profile/solar_flare.html) + +[Metadata](metadata.yaml) | [Summary Statistics](summary_stats.tsv) + diff --git a/datasets/solar_flare/summary_stats.tsv b/datasets/solar_flare/summary_stats.tsv index 5ed8e9f4..05edb52d 100644 --- a/datasets/solar_flare/summary_stats.tsv +++ b/datasets/solar_flare/summary_stats.tsv @@ -1,2 +1,2 @@ dataset n_instances n_features n_binary_features n_categorical_features n_continuous_features endpoint_type n_classes imbalance task -solar_flare 1066 10 5 5 0 continuous 8 0.65729049699214 regression +solar_flare 1066 10 4 6 0 categorical 8.0 0.6572904969921398 regression diff --git a/pmlb/all_summary_stats.tsv b/pmlb/all_summary_stats.tsv index 8235952e..88c39350 100644 --- a/pmlb/all_summary_stats.tsv +++ b/pmlb/all_summary_stats.tsv @@ -183,19 +183,19 @@ analcatdata_japansolvent 52 9 0 0 9 categorical 2.0 0.0014792899408284 classific analcatdata_lawsuit 264 4 1 0 3 categorical 2.0 0.7328397612488521 classification ann_thyroid 7200 21 15 0 6 categorical 3.0 0.7904668981481481 classification appendicitis 106 7 0 0 7 categorical 2.0 0.3645425418298327 classification -auto_insurance_losses 164 24 3 7 14 continuous 51.0 0.0084428911362284 regression -auto_insurance_price 201 23 4 6 13 continuous 186.0 0.0003431812297929 regression -auto_insurance_symboling 205 24 4 6 14 ordinal 6.0 0.0755788221296847 classification +auto_insurance_losses 164 24 3 6 15 categorical 51.0 0.0084428911362284 regression +auto_insurance_price 201 23 4 5 14 categorical 186.0 0.0003431812297929 regression +auto_insurance_symboling 205 24 4 5 15 categorical 6.0 0.0755788221296847 classification backache 180 32 22 4 6 categorical 2.0 0.521604938271605 classification balance_scale 625 4 0 4 0 categorical 3.0 0.14622976 classification banana 5300 2 0 0 2 categorical 2.0 0.0106907796368814 classification biomed 209 8 0 1 7 categorical 2.0 0.079691399006433 classification breast_cancer 286 9 2 6 1 categorical 2.0 0.1645068218494792 classification -breast_cancer_wisconsin_diagnostic 569 30 0 0 30 binary 2.0 0.064939878490615 classification -breast_cancer_wisconsin_original 699 9 0 0 9 binary 2.0 0.0963751609186227 classification +breast_cancer_wisconsin_diagnostic 569 30 0 0 30 categorical 2.0 0.064939878490615 classification +breast_cancer_wisconsin_original 699 9 0 9 0 categorical 2.0 0.0963751609186227 classification bupa 345 5 0 0 5 categorical 2.0 0.0004116782188615 classification calendarDOW 399 32 2 18 12 categorical 5.0 0.0140294344884768 classification -car_evaluation 1728 6 0 6 0 categorical 4.0 0.390288315900777 classification +car_evaluation 1728 6 0 6 0 categorical 4.0 0.3902883159007773 classification cars 392 8 0 1 7 categorical 3.0 0.1919968242399 classification chess 3196 36 35 1 0 categorical 2.0 0.0019740727223171 classification churn 5000 20 2 2 16 categorical 2.0 0.5143758400000001 classification @@ -205,12 +205,12 @@ cloud 108 7 1 0 6 categorical 4.0 0.0050297210791037 classification coil2000 9822 85 5 79 1 categorical 2.0 0.7755902757953309 classification collins 485 23 0 1 22 categorical 13.0 0.0219810819428207 classification confidence 72 3 0 0 3 categorical 6.0 0.0 classification -congressional_voting_records 435 16 0 16 0 binary 2.0 0.0517954815695601 classification +congressional_voting_records 435 16 0 16 0 categorical 2.0 0.05179548156956 classification connect_4 67557 42 0 42 0 categorical 3.0 0.2546625519372754 classification contraceptive_method 1473 9 3 4 2 categorical 3.0 0.030700608232641 classification corral 160 6 6 0 0 categorical 2.0 0.015625 classification -credit_approval_australia 690 15 4 5 6 binary 2.0 0.0121319050619618 classification -credit_approval_germany 1000 20 3 14 3 binary 2.0 0.16 classification +credit_approval_australia 690 15 4 4 7 categorical 2.0 0.0121319050619617 classification +credit_approval_germany 1000 20 3 14 3 categorical 2.0 0.1599999999999999 classification dermatology 366 34 1 32 1 categorical 6.0 0.0414583893218668 classification dis 3772 29 19 4 6 categorical 2.0 0.9394399094067016 classification dna 3186 180 180 0 0 categorical 3.0 0.0776847980158012 classification @@ -352,14 +352,14 @@ flags 178 43 36 5 2 categorical 5.0 0.04391806590077 classification glass2 163 9 0 0 9 categorical 2.0 0.0045541796830893 classification haberman 306 3 0 0 3 categorical 2.0 0.2214532871972319 classification hayes_roth 160 4 0 4 0 categorical 3.0 0.0438671874999999 classification -heart_disease_cleveland 303 13 3 4 6 binary 2.0 0.0068076114542147 classification -heart_disease_hungarian 294 13 3 4 6 binary 2.0 0.0777916608820399 classification -heart_disease_va_long_beach 200 13 3 4 6 binary 2.0 0.2401 classification -heart_disease_zurich 123 12 3 4 5 binary 2.0 0.756758543195188 classification +heart_disease_cleveland 303 13 3 5 5 categorical 2.0 0.0068076114542147 classification +heart_disease_hungarian 294 13 4 4 5 categorical 2.0 0.0777916608820399 classification +heart_disease_va_long_beach 200 13 3 5 5 categorical 2.0 0.2400999999999999 classification +heart_disease_zurich 123 12 4 4 4 categorical 2.0 0.756758543195188 classification hepatitis 155 19 3 10 6 categorical 2.0 0.3446826222684704 classification -horse_colic_lesion_type 368 20 2 11 7 categorical 5.0 0.113506468572779 classification -horse_colic_outcome 366 21 3 11 7 categorical 3.0 0.185859535967034 classification -horse_colic_surgery 368 20 2 11 7 binary 2.0 0.0680529300567108 classification +horse_colic_lesion_type 368 20 1 12 7 categorical 5.0 0.1135064685727788 classification +horse_colic_outcome 366 21 2 12 7 categorical 3.0 0.1858595359670339 classification +horse_colic_surgery 368 20 1 12 7 categorical 2.0 0.0680529300567108 classification hypothyroid 3163 25 17 1 7 categorical 2.0 0.8181582834802777 classification ionosphere 351 34 1 1 32 categorical 2.0 0.0795529257067718 classification iris 150 4 0 0 4 categorical 3.0 0.0 classification @@ -412,7 +412,7 @@ schizo 340 14 2 0 12 categorical 3.0 0.1478546712802768 classification segmentation 2310 19 0 1 18 categorical 7.0 0.0 classification shuttle 58000 9 0 0 9 categorical 7.0 0.5852340828378914 classification sleep 105908 13 0 1 12 categorical 5.0 0.1484938125639567 classification -solar_flare 1066 10 5 5 0 continuous 8.0 0.65729049699214 regression +solar_flare 1066 10 4 6 0 categorical 8.0 0.6572904969921398 regression sonar 208 60 0 0 60 categorical 2.0 0.0045303254437869 classification soybean 675 35 1 34 0 categorical 18.0 0.0362678286129266 classification spambase 4601 57 0 0 57 categorical 2.0 0.0449060406200498 classification