Skip to content

Commit 68a201c

Browse files
committed
Fix ruff linting issues: imports and line lengths
1 parent 81d6719 commit 68a201c

File tree

1 file changed

+73
-43
lines changed

1 file changed

+73
-43
lines changed

machine_learning/apriori_algorithm.py

Lines changed: 73 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -5,74 +5,95 @@
55
- Frequent itemsets
66
- Association rules with minimum confidence and lift
77
8-
WIKI:https://en.wikipedia.org/wiki/Apriori_algorithm
8+
WIKI: https://en.wikipedia.org/wiki/Apriori_algorithm
99
"""
1010

1111
from collections import defaultdict
1212
from itertools import combinations
13+
from typing import List, Dict, Tuple, Set
1314

14-
def load_data() -> list[list[str]]:
15+
16+
def load_data() -> List[List[str]]:
1517
"""
1618
Returns a sample transaction dataset.
1719
18-
>>> load_data()
19-
[['milk'], ['milk', 'butter'], ['milk', 'bread'], ['milk', 'bread', 'chips']]
20+
>>> data = load_data()
21+
>>> len(data)
22+
4
23+
>>> 'milk' in data[0]
24+
True
2025
"""
2126
return [["milk"], ["milk", "butter"], ["milk", "bread"], ["milk", "bread", "chips"]]
2227

2328

2429
class Apriori:
2530
"""Apriori algorithm class with support, confidence, and lift filtering."""
2631

27-
def __init__(self, transactions, min_support=0.25, min_confidence=0.5, min_lift=1.0):
28-
self.transactions = [set(t) for t in transactions]
29-
self.min_support = min_support
30-
self.min_confidence = min_confidence
31-
self.min_lift = min_lift
32-
self.itemsets = []
33-
self.rules = []
32+
def __init__(
33+
self,
34+
transactions: List[List[str]],
35+
min_support: float = 0.25,
36+
min_confidence: float = 0.5,
37+
min_lift: float = 1.0,
38+
) -> None:
39+
self.transactions: List[Set[str]] = [set(t) for t in transactions]
40+
self.min_support: float = min_support
41+
self.min_confidence: float = min_confidence
42+
self.min_lift: float = min_lift
43+
self.itemsets: List[Dict[frozenset, float]] = []
44+
self.rules: List[Tuple[frozenset, frozenset, float, float]] = []
3445

3546
self.find_frequent_itemsets()
3647
self.generate_association_rules()
3748

3849
def _get_support(self, itemset: frozenset) -> float:
3950
"""Return support of an itemset."""
40-
return sum(1 for t in self.transactions if itemset.issubset(t)) / len(self.transactions)
51+
return sum(1 for t in self.transactions if itemset.issubset(t)) / len(
52+
self.transactions
53+
)
4154

4255
def confidence(self, antecedent: frozenset, consequent: frozenset) -> float:
4356
"""Calculate confidence of a rule A -> B."""
44-
support_antecedent = self._get_support(antecedent)
45-
support_both = self._get_support(antecedent | consequent)
46-
return support_both / support_antecedent if support_antecedent > 0 else 0
57+
support_antecedent: float = self._get_support(antecedent)
58+
support_both: float = self._get_support(antecedent | consequent)
59+
return support_both / support_antecedent if support_antecedent > 0 else 0.0
4760

4861
def lift(self, antecedent: frozenset, consequent: frozenset) -> float:
4962
"""Calculate lift of a rule A -> B."""
50-
support_consequent = self._get_support(consequent)
51-
conf = self.confidence(antecedent, consequent)
52-
return conf / support_consequent if support_consequent > 0 else 0
63+
support_consequent: float = self._get_support(consequent)
64+
conf: float = self.confidence(antecedent, consequent)
65+
return conf / support_consequent if support_consequent > 0 else 0.0
5366

54-
def find_frequent_itemsets(self):
67+
def find_frequent_itemsets(self) -> List[Dict[frozenset, float]]:
5568
"""Generate all frequent itemsets."""
56-
item_counts = defaultdict(int)
69+
item_counts: Dict[frozenset, int] = defaultdict(int)
5770
for t in self.transactions:
5871
for item in t:
5972
item_counts[frozenset([item])] += 1
6073

61-
total = len(self.transactions)
62-
current_itemsets = {k: v / total for k, v in item_counts.items() if v / total >= self.min_support}
63-
self.itemsets.append(current_itemsets)
74+
total: int = len(self.transactions)
75+
current_itemsets: Dict[frozenset, float] = {
76+
k: v / total for k, v in item_counts.items() if v / total >= self.min_support
77+
}
78+
if current_itemsets:
79+
self.itemsets.append(current_itemsets)
6480

65-
k = 2
81+
k: int = 2
6682
while current_itemsets:
67-
candidates = set()
68-
keys = list(current_itemsets.keys())
83+
candidates: Set[frozenset] = set()
84+
keys: List[frozenset] = list(current_itemsets.keys())
6985
for i in range(len(keys)):
7086
for j in range(i + 1, len(keys)):
7187
union = keys[i] | keys[j]
72-
if len(union) == k and all(frozenset(sub) in current_itemsets for sub in combinations(union, k - 1)):
73-
candidates.add(union)
74-
75-
freq_candidates = {c: self._get_support(c) for c in candidates if self._get_support(c) >= self.min_support}
88+
if len(union) == k and all(
89+
frozenset(sub) in current_itemsets
90+
for sub in combinations(union, k - 1)
91+
):
92+
candidates.add(union)
93+
94+
freq_candidates: Dict[frozenset, float] = {
95+
c: self._get_support(c) for c in candidates if self._get_support(c) >= self.min_support
96+
}
7697
if not freq_candidates:
7798
break
7899

@@ -82,20 +103,26 @@ def find_frequent_itemsets(self):
82103

83104
return self.itemsets
84105

85-
def generate_association_rules(self):
106+
def generate_association_rules(self) -> List[Tuple[frozenset, frozenset, float, float]]:
86107
"""Generate association rules with min confidence and lift."""
87108
for level in self.itemsets:
88109
for itemset in level:
89110
if len(itemset) < 2:
90111
continue
91112
for i in range(1, len(itemset)):
92113
for antecedent in combinations(itemset, i):
93-
antecedent = frozenset(antecedent)
94-
consequent = itemset - antecedent
95-
conf = self.confidence(antecedent, consequent)
96-
lft = self.lift(antecedent, consequent)
97-
if conf >= self.min_confidence and lft >= self.min_lift:
98-
self.rules.append((antecedent, consequent, conf, lft))
114+
antecedent_set: frozenset = frozenset(antecedent)
115+
consequent_set: frozenset = itemset - antecedent_set
116+
conf: float = self.confidence(antecedent_set, consequent_set)
117+
lft: float = self.lift(antecedent_set, consequent_set)
118+
rule: Tuple[frozenset, frozenset, float, float] = (
119+
antecedent_set,
120+
consequent_set,
121+
conf,
122+
lft,
123+
)
124+
if rule not in self.rules and conf >= self.min_confidence and lft >= self.min_lift:
125+
self.rules.append(rule)
99126
return self.rules
100127

101128

@@ -104,8 +131,10 @@ def generate_association_rules(self):
104131

105132
doctest.testmod()
106133

107-
transactions = load_data()
108-
model = Apriori(transactions, min_support=0.25, min_confidence=0.1, min_lift=0.0)
134+
transactions: List[List[str]] = load_data()
135+
model: Apriori = Apriori(
136+
transactions, min_support=0.25, min_confidence=0.1, min_lift=0.0
137+
)
109138

110139
print("Frequent itemsets:")
111140
for level in model.itemsets:
@@ -114,7 +143,8 @@ def generate_association_rules(self):
114143

115144
print("\nAssociation Rules:")
116145
for rule in model.rules:
117-
antecedent, consequent, conf, lift = rule
118-
print(f"{set(antecedent)} -> {set(consequent)}, conf={conf:.2f}, lift={lift:.2f}")
119-
120-
146+
antecedent, consequent, conf, lift_value = rule
147+
print(
148+
f"{set(antecedent)} -> {set(consequent)}, "
149+
f"conf={conf:.2f}, lift={lift_value:.2f}"
150+
)

0 commit comments

Comments
 (0)