|
154 | 154 | "outputs": [], |
155 | 155 | "source": [ |
156 | 156 | "%%capture\n", |
157 | | - "execute_sql('''SET query_band='DEMO=EE_Telco_Customer_Churn_AutoML_Approach.ipynb;' UPDATE FOR SESSION; ''')" |
| 157 | + "execute_sql('''SET query_band='DEMO=Telco_Customer_Churn_AutoML_Approach.ipynb;' UPDATE FOR SESSION; ''')" |
158 | 158 | ] |
159 | 159 | }, |
160 | 160 | { |
|
700 | 700 | "<p style='font-size:16px;font-family:Arial;'>A feature store repository serves as the foundational environment for storing and managing your data features. The owner of the FeatureStore can grant/revoke read only, write only or read and write authorization to other user(s)</p>" |
701 | 701 | ] |
702 | 702 | }, |
| 703 | + { |
| 704 | + "cell_type": "markdown", |
| 705 | + "id": "cc24f86a-a5c5-47d7-86c2-62c18aed4e46", |
| 706 | + "metadata": {}, |
| 707 | + "source": [ |
| 708 | + "<p style='font-size:16px;font-family:Arial'> Create the Feature Store</p>" |
| 709 | + ] |
| 710 | + }, |
703 | 711 | { |
704 | 712 | "cell_type": "code", |
705 | 713 | "execution_count": null, |
706 | 714 | "id": "21a4a190-ec1e-4096-a971-8aaf0c88e1d2", |
707 | 715 | "metadata": {}, |
708 | 716 | "outputs": [], |
709 | 717 | "source": [ |
710 | | - "telco_fs = FeatureStore(repo='TelcoFS')\n", |
711 | | - "telco_fs.setup(perm_size='10e8')" |
| 718 | + "fs = FeatureStore(repo=\"TelcoFS\", data_domain=\"customer_profile\")" |
712 | 719 | ] |
713 | 720 | }, |
714 | 721 | { |
|
719 | 726 | "outputs": [], |
720 | 727 | "source": [ |
721 | 728 | "# List whether FeatureStore is setup or not.\n", |
722 | | - "telco_fs.list_repos()" |
| 729 | + "fs.setup()" |
723 | 730 | ] |
724 | 731 | }, |
725 | 732 | { |
726 | | - "cell_type": "markdown", |
727 | | - "id": "44bfdbc2-bc48-4656-a219-45375b66984b", |
| 733 | + "cell_type": "code", |
| 734 | + "execution_count": null, |
| 735 | + "id": "35bf969c-e438-47df-abc4-1c075182dffe", |
728 | 736 | "metadata": {}, |
| 737 | + "outputs": [], |
729 | 738 | "source": [ |
730 | | - "<hr style=\"height:1px;border:none;\">\n", |
731 | | - "<p style = 'font-size:18px;font-family:Arial;'><b>5.2 Create and Register Entity </b></p>" |
| 739 | + "fs = FeatureStore(repo=\"TelcoFS\", data_domain=\"customer_profile\")" |
| 740 | + ] |
| 741 | + }, |
| 742 | + { |
| 743 | + "cell_type": "code", |
| 744 | + "execution_count": null, |
| 745 | + "id": "83dcbdbe-4086-4723-bfa5-1f3563ead275", |
| 746 | + "metadata": {}, |
| 747 | + "outputs": [], |
| 748 | + "source": [ |
| 749 | + "fs.list_repos()" |
732 | 750 | ] |
733 | 751 | }, |
734 | 752 | { |
735 | 753 | "cell_type": "markdown", |
736 | 754 | "id": "7a3f98cc-084f-4366-8e1d-24f58481172d", |
737 | 755 | "metadata": {}, |
738 | 756 | "source": [ |
739 | | - "<p style = 'font-size:16px;font-family:Arial;'>Let us now start with feature engineering, for which we will create the required columns in the dataframe and than use those columns to register as features in the feature group of feature store created in the step above.</p>" |
| 757 | + "<p style = 'font-size:16px;font-family:Arial'>Let us now start with feature engineering, for which we will create the required columns in the dataframe and than use those columns to register as features in the feature group of feature store created in the step above.</p>" |
740 | 758 | ] |
741 | 759 | }, |
742 | 760 | { |
|
852 | 870 | "df = DataFrame('transformed_data_automl')" |
853 | 871 | ] |
854 | 872 | }, |
| 873 | + { |
| 874 | + "cell_type": "markdown", |
| 875 | + "id": "27771e3d-5c63-4349-b718-0455042ecffa", |
| 876 | + "metadata": {}, |
| 877 | + "source": [ |
| 878 | + "<hr style=\"height:1px;border:none;\">\n", |
| 879 | + "<b style='font-size:18px;font-family:Arial'>5.2 Creating FeatureProcess for Fraud Transactions</b>\n", |
| 880 | + "\n", |
| 881 | + "<p style='font-size:16px;font-family:Arial'>\n", |
| 882 | + "The <code>FeatureProcess</code> captures the transformation logic and metadata of features created from raw data. \n", |
| 883 | + "These transformations are version-controlled so that future updates to the same data source can reuse identical logic for consistency.\n", |
| 884 | + "</p>\n", |
| 885 | + "\n", |
| 886 | + "<p style='font-size:16px;font-family:Arial'>\n", |
| 887 | + "Here, we create a view named <b>telco_view</b>.\n", |
| 888 | + "</p>" |
| 889 | + ] |
| 890 | + }, |
855 | 891 | { |
856 | 892 | "cell_type": "code", |
857 | 893 | "execution_count": null, |
858 | 894 | "id": "e81de7ec-bdf5-46c0-b9c3-6e82e4d17e9a", |
859 | 895 | "metadata": {}, |
860 | 896 | "outputs": [], |
861 | 897 | "source": [ |
862 | | - "# Create entity for DataFrame 'patient_profile_df'\n", |
863 | | - "entity=Entity(name='CustId', columns=df.CustomerID)" |
| 898 | + "telco_view = df.create_view(view_name=\"telco_view\")" |
864 | 899 | ] |
865 | 900 | }, |
866 | 901 | { |
|
870 | 905 | "metadata": {}, |
871 | 906 | "outputs": [], |
872 | 907 | "source": [ |
873 | | - "# Register the Entity.\n", |
874 | | - "telco_fs.apply(entity)" |
| 908 | + "fs_customer = FeatureStore(repo=\"TelcoFS\", data_domain=\"customer_profile\")" |
875 | 909 | ] |
876 | 910 | }, |
877 | 911 | { |
|
881 | 915 | "metadata": {}, |
882 | 916 | "outputs": [], |
883 | 917 | "source": [ |
884 | | - "# Look at existing Entities after registering the Entity.\n", |
885 | | - "telco_fs.list_entities()" |
886 | | - ] |
887 | | - }, |
888 | | - { |
889 | | - "cell_type": "markdown", |
890 | | - "id": "f5672211-fdb8-40e8-b26d-e0c076232b89", |
891 | | - "metadata": {}, |
892 | | - "source": [ |
893 | | - "<hr style=\"height:1px;border:none;\">\n", |
894 | | - "<p style = 'font-size:18px;font-family:Arial;'><b>5.3 Create and Register FeatureGroup </b></p>\n", |
895 | | - "<li style = 'font-size:16px;font-family:Arial;'>FeatureGroup can be created using Teradata DataFrame.</li>\n", |
896 | | - "<li style = 'font-size:16px;font-family:Arial;'>FeatureGroup can be created using SQL Query. </li>\n", |
897 | | - "<li style = 'font-size:16px;font-family:Arial;'>FeatureGroup can be created using objects of Feature, Entity, DataSource. </li>\n" |
898 | | - ] |
899 | | - }, |
900 | | - { |
901 | | - "cell_type": "markdown", |
902 | | - "id": "eb3ad9ab-22b7-4c49-b2fa-50edd93ec6cd", |
903 | | - "metadata": {}, |
904 | | - "source": [ |
905 | | - "<p style = 'font-size:16px;font-family:Arial;'><b>Creating a FeatureGroup from Teradata DataFrame\n", |
906 | | - "</b></p>" |
| 918 | + "telco_fp = FeatureProcess(repo=\"TelcoFS\",\n", |
| 919 | + " data_domain=\"customer_profile\",\n", |
| 920 | + " object=telco_view, \n", |
| 921 | + " entity='CustomerID',\n", |
| 922 | + " features=df.columns,\n", |
| 923 | + " description='Ingesting features in profile DD')\n", |
| 924 | + "\n", |
| 925 | + "telco_fp.run()" |
907 | 926 | ] |
908 | 927 | }, |
909 | 928 | { |
|
913 | 932 | "metadata": {}, |
914 | 933 | "outputs": [], |
915 | 934 | "source": [ |
916 | | - "telco_fg = FeatureGroup.from_DataFrame(\n", |
917 | | - " name='TelcoFG', \n", |
918 | | - " entity_columns='CustomerID', \n", |
919 | | - " df=df\n", |
920 | | - ")" |
| 935 | + "# List the feature process.\n", |
| 936 | + "fs_customer.list_feature_processes()" |
921 | 937 | ] |
922 | 938 | }, |
923 | 939 | { |
924 | | - "cell_type": "code", |
925 | | - "execution_count": null, |
926 | | - "id": "57bdc2a5-b441-4ca3-937b-6604020508b0", |
| 940 | + "cell_type": "markdown", |
| 941 | + "id": "aafb502e-306c-4ee9-8010-293dcbd30a50", |
927 | 942 | "metadata": {}, |
928 | | - "outputs": [], |
929 | 943 | "source": [ |
930 | | - "# Let's look at Properties.\n", |
931 | | - "telco_fg.features, telco_fg.entity, telco_fg.data_source, telco_fg.description" |
| 944 | + "<hr style=\"height:1px;border:none;\">\n", |
| 945 | + "<b style='font-size:18px;font-family:Arial'>5.3 Building a Curated Dataset</b>\n", |
| 946 | + "\n", |
| 947 | + "<p style='font-size:16px;font-family:Arial'>\n", |
| 948 | + "The <code>DatasetCatalog</code> allows us to build curated datasets by combining selected features from one or multiple feature processes. \n", |
| 949 | + "This dataset serves as the primary input for AutoML training.\n", |
| 950 | + "</p>\n", |
| 951 | + "\n", |
| 952 | + "<p style='font-size:16px;font-family:Arial'>\n", |
| 953 | + "We select relevant features from <b>telco_fp</b> and construct a unified dataset for downstream modeling.\n", |
| 954 | + "</p>" |
932 | 955 | ] |
933 | 956 | }, |
934 | 957 | { |
935 | 958 | "cell_type": "code", |
936 | 959 | "execution_count": null, |
937 | | - "id": "9069c2fc-6e47-4d44-b5bf-f1c3efa9ab00", |
| 960 | + "id": "57bdc2a5-b441-4ca3-937b-6604020508b0", |
938 | 961 | "metadata": {}, |
939 | 962 | "outputs": [], |
940 | 963 | "source": [ |
941 | | - "telco_fs.apply(telco_fg)" |
| 964 | + "dc_customer = fs_customer.get_dataset_catalog()\n", |
| 965 | + "\n", |
| 966 | + "selected_profile_features = {\n", |
| 967 | + " 'Gender': telco_fp.process_id,\n", |
| 968 | + " 'SeniorCitizen': telco_fp.process_id,\n", |
| 969 | + " 'Partner': telco_fp.process_id,\n", |
| 970 | + " 'Dependents': telco_fp.process_id,\n", |
| 971 | + " 'Tenure': telco_fp.process_id,\n", |
| 972 | + " 'PhoneService': telco_fp.process_id,\n", |
| 973 | + " 'MultipleLines': telco_fp.process_id,\n", |
| 974 | + " 'InternetService': telco_fp.process_id,\n", |
| 975 | + " 'OnlineSecurity': telco_fp.process_id,\n", |
| 976 | + " 'OnlineBackup': telco_fp.process_id,\n", |
| 977 | + " 'DeviceProtection': telco_fp.process_id,\n", |
| 978 | + " 'TechSupport': telco_fp.process_id,\n", |
| 979 | + " 'StreamingTV': telco_fp.process_id,\n", |
| 980 | + " 'StreamingMovies': telco_fp.process_id,\n", |
| 981 | + " 'Contract': telco_fp.process_id,\n", |
| 982 | + " 'PaperlessBilling': telco_fp.process_id,\n", |
| 983 | + " 'PaymentMethod': telco_fp.process_id,\n", |
| 984 | + " 'MonthlyCharges': telco_fp.process_id,\n", |
| 985 | + " 'TotalCharges': telco_fp.process_id,\n", |
| 986 | + " 'Churn': telco_fp.process_id\n", |
| 987 | + " }\n", |
| 988 | + "\n", |
| 989 | + "tdf = dc_customer.build_dataset(entity='CustomerID',\n", |
| 990 | + " selected_features=selected_profile_features,\n", |
| 991 | + " view_name=\"telco_churn_profile_dataset\",\n", |
| 992 | + " description=\"Curated dataset for telco customer profile\")" |
942 | 993 | ] |
943 | 994 | }, |
944 | 995 | { |
945 | 996 | "cell_type": "code", |
946 | 997 | "execution_count": null, |
947 | | - "id": "b8867efb-13ab-4cf7-9924-60b31f911d64", |
| 998 | + "id": "9069c2fc-6e47-4d44-b5bf-f1c3efa9ab00", |
948 | 999 | "metadata": {}, |
949 | 1000 | "outputs": [], |
950 | 1001 | "source": [ |
951 | | - "telco_fs.list_features()" |
| 1002 | + "fs_customer.list_dataset_catalogs()" |
952 | 1003 | ] |
953 | 1004 | }, |
954 | 1005 | { |
955 | 1006 | "cell_type": "markdown", |
956 | | - "id": "29bf5904-1dcc-46c1-bd3d-5be5f1001ec0", |
| 1007 | + "id": "27b0d98e-b4dc-4250-bb2f-d7c14e079079", |
957 | 1008 | "metadata": {}, |
958 | 1009 | "source": [ |
959 | | - "<hr style=\"height:1px;border:none;\">\n", |
960 | | - "<b style = 'font-size:18px;font-family:Arial;'>5.4 Reuse features from Enterprise Feature Store with teradataml analytic functions for AutoML processing.</b>\n" |
961 | | - ] |
962 | | - }, |
963 | | - { |
964 | | - "cell_type": "markdown", |
965 | | - "id": "3be7cff1-4b1b-466c-8836-5e35d61763cd", |
966 | | - "metadata": {}, |
967 | | - "source": [ |
968 | | - "<p style = 'font-size:16px;font-family:Arial'>Since FeatureStore stores DataSource also, you can retrive Teradata DataFrame from FeatureStore. <br> `FeatureStore.get_dataset()` get's Teradata DataFrame from FeatureGroup.</p>" |
| 1010 | + "<p style='font-size:16px;font-family:Arial'>\n", |
| 1011 | + "The above <b>mind map</b> visually represents the relationship between the raw data, the feature process, and the final dataset.\n", |
| 1012 | + "It provides complete lineage for auditability and reproducibility, which is a key advantage of EFS integration.\n", |
| 1013 | + "</p>" |
969 | 1014 | ] |
970 | 1015 | }, |
971 | 1016 | { |
972 | 1017 | "cell_type": "code", |
973 | 1018 | "execution_count": null, |
974 | | - "id": "6a13015d-d3d1-4bc5-9d13-cf813e03dda2", |
975 | | - "metadata": {}, |
| 1019 | + "id": "b8867efb-13ab-4cf7-9924-60b31f911d64", |
| 1020 | + "metadata": { |
| 1021 | + "tags": [] |
| 1022 | + }, |
976 | 1023 | "outputs": [], |
977 | 1024 | "source": [ |
978 | | - "# Get DataSet for FeatureGroup TelcoFG. \n", |
979 | | - "df = telco_fs.get_dataset('TelcoFG')\n", |
980 | | - "df" |
| 1025 | + "fs_customer.mind_map()" |
981 | 1026 | ] |
982 | 1027 | }, |
983 | 1028 | { |
|
1007 | 1052 | "outputs": [], |
1008 | 1053 | "source": [ |
1009 | 1054 | "# Performing sampling to get 80% for trainning and 20% for testing\n", |
1010 | | - "tdf_sample = df.sample(frac = [0.8, 0.2])\n", |
| 1055 | + "tdf_sample = tdf.sample(frac = [0.8, 0.2])\n", |
1011 | 1056 | "\n", |
1012 | 1057 | "# Fetching train and test data\n", |
1013 | 1058 | "tdf_train= tdf_sample[tdf_sample['sampleid'] == 1].drop('sampleid', axis=1)\n", |
|
1084 | 1129 | "\n", |
1085 | 1130 | "aml = AutoClassifier(\n", |
1086 | 1131 | " exclude = ['knn','svm'],\n", |
1087 | | - " verbose = 2,\n", |
| 1132 | + " verbose = 1,\n", |
1088 | 1133 | " max_runtime_secs = 600\n", |
1089 | 1134 | ")" |
1090 | 1135 | ] |
|
1212 | 1257 | "cell_type": "code", |
1213 | 1258 | "execution_count": null, |
1214 | 1259 | "id": "d31562da-23fa-468c-97da-c8993fb70b0f", |
1215 | | - "metadata": {}, |
| 1260 | + "metadata": { |
| 1261 | + "tags": [] |
| 1262 | + }, |
1216 | 1263 | "outputs": [], |
1217 | 1264 | "source": [ |
1218 | 1265 | "# Fetching prediction and metrics on test data\n", |
|
1392 | 1439 | "source": [ |
1393 | 1440 | "<hr style=\"height:2px;border:none;\">\n", |
1394 | 1441 | "<b style = 'font-size:20px;font-family:Arial'>8. Cleanup</b></p>\n", |
1395 | | - "<p style = 'font-size:18px;font-family:Arial'> <b>Work Tables </b></p>" |
| 1442 | + "<p style = 'font-size:18px;font-family:Arial'> <b>Work Tables and Views</b></p>" |
1396 | 1443 | ] |
1397 | 1444 | }, |
1398 | 1445 | { |
1399 | 1446 | "cell_type": "code", |
1400 | 1447 | "execution_count": null, |
1401 | | - "id": "9ff3fb9b-4d13-4628-988d-f82463d96537", |
| 1448 | + "id": "abecc15b-269d-47d3-b727-c09cf3e4d8af", |
1402 | 1449 | "metadata": {}, |
1403 | 1450 | "outputs": [], |
1404 | 1451 | "source": [ |
1405 | | - "tables = ['transformed_data']\n", |
1406 | | - "\n", |
1407 | | - "# Loop through the list of tables and execute the drop table command for each table\n", |
1408 | | - "for table in tables:\n", |
| 1452 | + "views = ['telco_view']\n", |
| 1453 | + "# Loop through the list of views and execute the drop view command for each view\n", |
| 1454 | + "for view in views:\n", |
1409 | 1455 | " try:\n", |
1410 | | - " db_drop_table(table_name=table)\n", |
| 1456 | + " db_drop_view(view_name=view)\n", |
1411 | 1457 | " except:\n", |
1412 | 1458 | " pass" |
1413 | 1459 | ] |
1414 | 1460 | }, |
1415 | 1461 | { |
1416 | 1462 | "cell_type": "code", |
1417 | 1463 | "execution_count": null, |
1418 | | - "id": "1a6f56a5-e38b-45d7-b532-001de783705a", |
| 1464 | + "id": "9ff3fb9b-4d13-4628-988d-f82463d96537", |
1419 | 1465 | "metadata": {}, |
1420 | 1466 | "outputs": [], |
1421 | 1467 | "source": [ |
1422 | | - "telco_fs.archive_feature_group(feature_group='TelcoFG')" |
| 1468 | + "tables = ['transformed_data_automl']\n", |
| 1469 | + "\n", |
| 1470 | + "# Loop through the list of tables and execute the drop table command for each table\n", |
| 1471 | + "for table in tables:\n", |
| 1472 | + " try:\n", |
| 1473 | + " db_drop_table(table_name=table)\n", |
| 1474 | + " except:\n", |
| 1475 | + " pass" |
1423 | 1476 | ] |
1424 | 1477 | }, |
1425 | 1478 | { |
|
1429 | 1482 | "metadata": {}, |
1430 | 1483 | "outputs": [], |
1431 | 1484 | "source": [ |
1432 | | - "telco_fs.delete_feature_group(feature_group='TelcoFG')" |
| 1485 | + "fs = FeatureStore(repo=\"TelcoFS\")\n", |
| 1486 | + "fs.delete()" |
1433 | 1487 | ] |
1434 | 1488 | }, |
1435 | 1489 | { |
|
1573 | 1627 | "name": "python", |
1574 | 1628 | "nbconvert_exporter": "python", |
1575 | 1629 | "pygments_lexer": "ipython3", |
1576 | | - "version": "3.9.10" |
| 1630 | + "version": "3.11.14" |
1577 | 1631 | } |
1578 | 1632 | }, |
1579 | 1633 | "nbformat": 4, |
|
0 commit comments