Skip to content

Commit 447a211

Browse files
committed
Added the data transformation pipeline
1 parent c771647 commit 447a211

File tree

3 files changed

+43
-1
lines changed

3 files changed

+43
-1
lines changed

networksecurity/constants/training_pipeline/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,16 @@
4040
DATA_VALIDATION_DRIFT_REPORT_DIR = "drift_report"
4141
DATA_VALIDATION_DRIFT_REPORT_FILE_NAME = "drift_report.html"
4242
DATA_VALIDATION_DRIFT_SUMMARY_FILE_NAME = "drift_summary.yaml"
43+
44+
"""
45+
Data transformation related constants
46+
"""
47+
DATA_TRANSFORMATION_DIR_NAME = "data_transformation"
48+
DATA_TRANSFORMATION_TRANSFORMED_DATA_DIR : str = "transformed"
49+
DATA_TRANSFORMATION_TRANSFORMED_OBJECT_DIR : str = "transformed_object"
50+
51+
DATA_TRANSFORMATION_IMPUTER_PARAMS : dict = {
52+
"missing_values": np.nan,
53+
"n-neighbors": 3,
54+
"weights": "uniform",
55+
}

networksecurity/entity/artifact_entity.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,10 @@ class DataValidationArtifact:
1414
invalid_test_file_path: str
1515
drift_report_file_path: str
1616
drift_summary_file_path: str
17+
18+
@dataclass
19+
class DataTransformationArtifact:
20+
transformed_object_file_path: str
21+
transformed_train_file_path: str
22+
transformed_test_file_path: str
23+

networksecurity/entity/config_entity.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,26 @@ def __post_init__(self):
7979
self.data_validation_dir,
8080
training_pipeline.DATA_VALIDATION_DRIFT_REPORT_DIR,
8181
training_pipeline.DATA_VALIDATION_DRIFT_SUMMARY_FILE_NAME
82-
)
82+
)
83+
84+
@dataclass
85+
class DataTransformationConfigEntity:
86+
training_pipeline_config: TrainingPipelineConfigEntity
87+
88+
def __post_init__(self):
89+
self.data_transformation_dir: str = os.path.join(
90+
self.training_pipeline_config.artifact_dir,
91+
training_pipeline.DATA_TRANSFORMATION_DIR_NAME
92+
)
93+
self.transformed_train_file_path: str = os.path.join(
94+
self.data_transformation_dir,
95+
training_pipeline.DATA_TRANSFORMATION_TRANSFORMED_DIR,
96+
training_pipeline.TRAIN_FILE_NAME)
97+
self.transformed_test_file_path: str = os.path.join(
98+
self.data_transformation_dir,
99+
training_pipeline.DATA_TRANSFORMATION_TRANSFORMED_DIR,
100+
training_pipeline.TEST_FILE_NAME)
101+
self.transformed_object_file_path: str = os.path.join(
102+
self.data_transformation_dir,
103+
training_pipeline.DATA_TRANSFORMATION_TRANSFORMED_DIR ,
104+
training_pipeline.PREPROCESSING_OBJECT_FILE_NAME)

0 commit comments

Comments
 (0)