12
12
from sklearn .externals import six
13
13
from sklearn .tree import _tree
14
14
15
- from .rule import Rule
15
+ from .rule import Rule , replace_feature_name
16
16
17
17
INTEGER_TYPES = (numbers .Integral , np .integer )
18
-
18
+ BASE_FEATURE_NAME = "__C__"
19
19
20
20
class SkopeRules (BaseEstimator ):
21
21
""" An easy-interpretable classifier optimizing simple logical rules.
@@ -249,11 +249,17 @@ def fit(self, X, y, sample_weight=None):
249
249
self .estimators_samples_ = []
250
250
self .estimators_features_ = []
251
251
252
- # default columns names of the form ['c0', 'c1', ...]:
253
- feature_names_ = (self .feature_names if self .feature_names is not None
254
- else ['c' + x for x in
255
- np .arange (X .shape [1 ]).astype (str )])
252
+ # default columns names :
253
+ feature_names_ = [BASE_FEATURE_NAME + x for x in
254
+ np .arange (X .shape [1 ]).astype (str )]
255
+ if self .feature_names is not None :
256
+ self .feature_dict_ = {BASE_FEATURE_NAME + str (i ): feat
257
+ for i , feat in enumerate (self .feature_names )}
258
+ else :
259
+ self .feature_dict_ = {BASE_FEATURE_NAME + str (i ): feat
260
+ for i , feat in enumerate (feature_names_ )}
256
261
self .feature_names_ = feature_names_
262
+
257
263
clfs = []
258
264
regs = []
259
265
@@ -356,6 +362,10 @@ def fit(self, X, y, sample_weight=None):
356
362
for rule in
357
363
[Rule (r , args = args ) for r , args in rules_ ]]
358
364
365
+
366
+
367
+
368
+
359
369
# keep only rules verifying precision_min and recall_min:
360
370
for rule , score in rules_ :
361
371
if score [0 ] >= self .precision_min and score [1 ] >= self .recall_min :
@@ -377,7 +387,14 @@ def fit(self, X, y, sample_weight=None):
377
387
# Deduplicate the rule using semantic tree
378
388
if self .max_depth_duplication is not None :
379
389
self .rules_ = self .deduplicate (self .rules_ )
390
+
380
391
self .rules_ = sorted (self .rules_ , key = lambda x : - self .f1_score (x ))
392
+ self .rules_without_feature_names_ = self .rules_
393
+
394
+ # Replace generic feature names by real feature names
395
+ self .rules_ = [(replace_feature_name (rule , self .feature_dict_ ), perf )
396
+ for rule , perf in self .rules_ ]
397
+
381
398
return self
382
399
383
400
def predict (self , X ):
@@ -432,7 +449,7 @@ def decision_function(self, X):
432
449
% (X .shape [1 ], self .n_features_ ))
433
450
434
451
df = pandas .DataFrame (X , columns = self .feature_names_ )
435
- selected_rules = self .rules_
452
+ selected_rules = self .rules_without_feature_names_
436
453
437
454
scores = np .zeros (X .shape [0 ])
438
455
for (r , w ) in selected_rules :
0 commit comments