Skip to content

Commit 420b86d

Browse files
committed
Added the data validation
1 parent 1093df7 commit 420b86d

File tree

14 files changed

+11511
-115
lines changed

14 files changed

+11511
-115
lines changed

app.py

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,54 @@
11
import sys
2+
import os
3+
4+
# --- Debug Info ---
5+
print("--- Debug Info ---")
6+
print("Python Executable:", sys.executable)
7+
print("Python Path (sys.path):")
8+
for p in sys.path:
9+
print(f" - {p}")
10+
print("Current Working Directory (os.getcwd()):", os.getcwd())
11+
print("------------------")
12+
# --- End Debug Info ---
13+
214
from networksecurity.components.data_ingestion import DataIngestion
15+
from networksecurity.components.data_validation import DataValidation
316
from networksecurity.logger.customlogger import Custom_Logger
417
from networksecurity.exception.exception import CustomException
5-
from networksecurity.entity.config_entity import DataIngestionConfigEntity
6-
from networksecurity.entity.config_entity import TrainingPipelineConfigEntity
18+
from networksecurity.entity.config_entity import (
19+
DataIngestionConfigEntity,
20+
DataValidationConfigEntity,
21+
TrainingPipelineConfigEntity,
22+
)
23+
from networksecurity.entity.artifact_entity import DataIngestionArtifact
24+
25+
logger = Custom_Logger().get_logger()
726

827
if __name__ == "__main__":
928
try:
10-
# Initialize configuration
29+
# Configurations
1130
training_pipeline_config = TrainingPipelineConfigEntity()
1231
data_ingestion_config = DataIngestionConfigEntity(training_pipeline_config=training_pipeline_config)
32+
data_validation_config = DataValidationConfigEntity(training_pipeline_config=training_pipeline_config)
1333

14-
# Create Data Ingestion instance
34+
# Data Ingestion
1535
data_ingestion = DataIngestion(data_ingestion_config)
36+
data_ingestion_artifact: DataIngestionArtifact = data_ingestion.initiate_data_ingestion()
37+
logger.info("✅ Data ingestion completed successfully.")
38+
logger.info(f"📦 Data Ingestion Artifact: {data_ingestion_artifact}")
1639

17-
# Start data ingestion process
18-
data_ingestion_artifact = data_ingestion.initiate_data_ingestion()
19-
20-
# Log the artifact paths
21-
logger = Custom_Logger().get_logger()
22-
logger.info(f"Data Ingestion Artifact: {data_ingestion_artifact}")
40+
# Data Validation
41+
data_validation = DataValidation(
42+
data_validation_config=data_validation_config,
43+
data_ingestion_artifact=data_ingestion_artifact,
44+
)
45+
data_validation_artifact = data_validation.initiate_data_validation()
46+
logger.info("✅ Data validation completed successfully.")
47+
print(f"📄 Data Validation Artifact: {data_validation_artifact}")
2348

2449
except CustomException as e:
25-
raise CustomException(e, sys)
50+
logger.error(f"❌ Custom error in main pipeline: {e}")
51+
raise e
52+
except Exception as e:
53+
logger.error(f"❌ Unexpected error in main pipeline: {e}")
54+
raise CustomException(e, sys)
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
Abnormal_URL:
2+
drift_status: &id001 !!python/object/apply:numpy.core.multiarray.scalar
3+
- !!python/object/apply:numpy.dtype
4+
args:
5+
- b1
6+
- false
7+
- true
8+
state: !!python/tuple
9+
- 3
10+
- '|'
11+
- null
12+
- null
13+
- null
14+
- -1
15+
- -1
16+
- 0
17+
- !!binary |
18+
AA==
19+
p_value: 1.0
20+
DNSRecord:
21+
drift_status: *id001
22+
p_value: 1.0
23+
Domain_registeration_length:
24+
drift_status: *id001
25+
p_value: 0.9958579353974752
26+
Favicon:
27+
drift_status: *id001
28+
p_value: 1.0
29+
Google_Index:
30+
drift_status: *id001
31+
p_value: 0.9999953843756324
32+
HTTPS_token:
33+
drift_status: *id001
34+
p_value: 1.0
35+
Iframe:
36+
drift_status: *id001
37+
p_value: 1.0
38+
Links_in_tags:
39+
drift_status: *id001
40+
p_value: 0.9995152509457711
41+
Links_pointing_to_page:
42+
drift_status: *id001
43+
p_value: 0.9996208033369954
44+
Page_Rank:
45+
drift_status: *id001
46+
p_value: 0.9876237011526984
47+
Prefix_Suffix:
48+
drift_status: *id001
49+
p_value: 0.9995152509457711
50+
Redirect:
51+
drift_status: *id001
52+
p_value: 1.0
53+
Request_URL:
54+
drift_status: *id001
55+
p_value: 0.2689286131324479
56+
Result:
57+
drift_status: *id001
58+
p_value: 0.9086653866244705
59+
RightClick:
60+
drift_status: *id001
61+
p_value: 1.0
62+
SFH:
63+
drift_status: *id001
64+
p_value: 1.0
65+
SSLfinal_State:
66+
drift_status: *id001
67+
p_value: 0.45991477918984003
68+
Shortining_Service:
69+
drift_status: *id001
70+
p_value: 1.0
71+
Statistical_report:
72+
drift_status: *id001
73+
p_value: 1.0
74+
Submitting_to_email:
75+
drift_status: *id001
76+
p_value: 0.9999953843756324
77+
URL_Length:
78+
drift_status: *id001
79+
p_value: 0.9999971010402764
80+
URL_of_Anchor:
81+
drift_status: *id001
82+
p_value: 0.9997752665277037
83+
age_of_domain:
84+
drift_status: *id001
85+
p_value: 0.9914952304622426
86+
double_slash_redirecting:
87+
drift_status: *id001
88+
p_value: 0.9999999998789996
89+
having_At_Symbol:
90+
drift_status: *id001
91+
p_value: 1.0
92+
having_IP_Address:
93+
drift_status: *id001
94+
p_value: 1.0
95+
having_Sub_Domain:
96+
drift_status: *id001
97+
p_value: 0.9999999999598222
98+
on_mouseover:
99+
drift_status: *id001
100+
p_value: 0.9999999999598222
101+
popUpWidnow:
102+
drift_status: *id001
103+
p_value: 0.9999999991050752
104+
port:
105+
drift_status: *id001
106+
p_value: 1.0
107+
web_traffic:
108+
drift_status: *id001
109+
p_value: 0.5733052694150307
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
test:
2+
Abnormal_URL: 330
3+
DNSRecord: 0
4+
Domain_registeration_length: 0
5+
Favicon: 418
6+
Google_Index: 320
7+
HTTPS_token: 366
8+
Iframe: 206
9+
Links_in_tags: 0
10+
Links_pointing_to_page: 0
11+
Page_Rank: 552
12+
Prefix_Suffix: 278
13+
Redirect: 258
14+
Request_URL: 0
15+
Result: 0
16+
RightClick: 95
17+
SFH: 527
18+
SSLfinal_State: 0
19+
Shortining_Service: 286
20+
Statistical_report: 307
21+
Submitting_to_email: 415
22+
URL_Length: 407
23+
URL_of_Anchor: 0
24+
age_of_domain: 0
25+
double_slash_redirecting: 295
26+
having_At_Symbol: 325
27+
having_IP_Address: 0
28+
having_Sub_Domain: 0
29+
on_mouseover: 254
30+
popUpWidnow: 437
31+
port: 298
32+
web_traffic: 0
33+
train:
34+
Abnormal_URL: 1299
35+
DNSRecord: 0
36+
Domain_registeration_length: 0
37+
Favicon: 1635
38+
Google_Index: 1219
39+
HTTPS_token: 1430
40+
Iframe: 806
41+
Links_in_tags: 0
42+
Links_pointing_to_page: 0
43+
Page_Rank: 0
44+
Prefix_Suffix: 1187
45+
Redirect: 1021
46+
Request_URL: 0
47+
Result: 0
48+
RightClick: 381
49+
SFH: 2088
50+
SSLfinal_State: 0
51+
Shortining_Service: 1158
52+
Statistical_report: 1243
53+
Submitting_to_email: 1599
54+
URL_Length: 1688
55+
URL_of_Anchor: 0
56+
age_of_domain: 0
57+
double_slash_redirecting: 1134
58+
having_At_Symbol: 1330
59+
having_IP_Address: 0
60+
having_Sub_Domain: 0
61+
on_mouseover: 1061
62+
popUpWidnow: 1700
63+
port: 1204
64+
web_traffic: 0
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
test:
2+
Abnormal_URL: 330
3+
DNSRecord: 0
4+
Domain_registeration_length: 0
5+
Favicon: 418
6+
Google_Index: 320
7+
HTTPS_token: 366
8+
Iframe: 206
9+
Links_in_tags: 0
10+
Links_pointing_to_page: 0
11+
Page_Rank: 552
12+
Prefix_Suffix: 278
13+
Redirect: 258
14+
Request_URL: 0
15+
Result: 0
16+
RightClick: 95
17+
SFH: 527
18+
SSLfinal_State: 0
19+
Shortining_Service: 286
20+
Statistical_report: 307
21+
Submitting_to_email: 415
22+
URL_Length: 407
23+
URL_of_Anchor: 0
24+
age_of_domain: 0
25+
double_slash_redirecting: 295
26+
having_At_Symbol: 325
27+
having_IP_Address: 0
28+
having_Sub_Domain: 0
29+
on_mouseover: 254
30+
popUpWidnow: 437
31+
port: 298
32+
web_traffic: 0
33+
train:
34+
Abnormal_URL: 1299
35+
DNSRecord: 0
36+
Domain_registeration_length: 0
37+
Favicon: 1635
38+
Google_Index: 1219
39+
HTTPS_token: 1430
40+
Iframe: 806
41+
Links_in_tags: 0
42+
Links_pointing_to_page: 0
43+
Page_Rank: 0
44+
Prefix_Suffix: 1187
45+
Redirect: 1021
46+
Request_URL: 0
47+
Result: 0
48+
RightClick: 381
49+
SFH: 2088
50+
SSLfinal_State: 0
51+
Shortining_Service: 1158
52+
Statistical_report: 1243
53+
Submitting_to_email: 1599
54+
URL_Length: 1688
55+
URL_of_Anchor: 0
56+
age_of_domain: 0
57+
double_slash_redirecting: 1134
58+
having_At_Symbol: 1330
59+
having_IP_Address: 0
60+
having_Sub_Domain: 0
61+
on_mouseover: 1061
62+
popUpWidnow: 1700
63+
port: 1204
64+
web_traffic: 0

0 commit comments

Comments
 (0)