Fix: Apriori algorithm line lengths and type hints for ruff

simoderyouch · simoderyouch · commit eca4fdb4bb7d · 2025-09-06T19:56:48.000+01:00
diff --git a/machine_learning/apriori_algorithm.py b/machine_learning/apriori_algorithm.py
@@ -10,18 +10,14 @@
 
 from collections import defaultdict
 from itertools import combinations
-from typing import List, Dict, Tuple, Set
 
 
-def load_data() -> List[List[str]]:
+def load_data() -> list[list[str]]:
     """
     Returns a sample transaction dataset.
 
-    >>> data = load_data()
-    >>> len(data)
-    4
-    >>> 'milk' in data[0]
-    True
+    >>> load_data()
+    [['milk'], ['milk', 'butter'], ['milk', 'bread'], ['milk', 'bread', 'chips']]
     """
     return [["milk"], ["milk", "butter"], ["milk", "bread"], ["milk", "bread", "chips"]]
 
@@ -31,17 +27,17 @@ class Apriori:
 
     def __init__(
         self,
-        transactions: List[List[str]],
+        transactions: list[list[str]],
         min_support: float = 0.25,
         min_confidence: float = 0.5,
         min_lift: float = 1.0,
     ) -> None:
-        self.transactions: List[Set[str]] = [set(t) for t in transactions]
+        self.transactions: list[set[str]] = [set(t) for t in transactions]
         self.min_support: float = min_support
         self.min_confidence: float = min_confidence
         self.min_lift: float = min_lift
-        self.itemsets: List[Dict[frozenset, float]] = []
-        self.rules: List[Tuple[frozenset, frozenset, float, float]] = []
+        self.itemsets: list[dict[frozenset, float]] = []
+        self.rules: list[tuple[frozenset, frozenset, float, float]] = []
 
         self.find_frequent_itemsets()
         self.generate_association_rules()
@@ -54,34 +50,36 @@ def _get_support(self, itemset: frozenset) -> float:
 
     def confidence(self, antecedent: frozenset, consequent: frozenset) -> float:
         """Calculate confidence of a rule A -> B."""
-        support_antecedent: float = self._get_support(antecedent)
-        support_both: float = self._get_support(antecedent | consequent)
+        support_antecedent = self._get_support(antecedent)
+        support_both = self._get_support(antecedent | consequent)
         return support_both / support_antecedent if support_antecedent > 0 else 0.0
 
     def lift(self, antecedent: frozenset, consequent: frozenset) -> float:
         """Calculate lift of a rule A -> B."""
-        support_consequent: float = self._get_support(consequent)
-        conf: float = self.confidence(antecedent, consequent)
+        support_consequent = self._get_support(consequent)
+        conf = self.confidence(antecedent, consequent)
         return conf / support_consequent if support_consequent > 0 else 0.0
 
-    def find_frequent_itemsets(self) -> List[Dict[frozenset, float]]:
+    def find_frequent_itemsets(self) -> list[dict[frozenset, float]]:
         """Generate all frequent itemsets."""
-        item_counts: Dict[frozenset, int] = defaultdict(int)
+        item_counts: dict[frozenset, int] = defaultdict(int)
         for t in self.transactions:
             for item in t:
                 item_counts[frozenset([item])] += 1
 
         total: int = len(self.transactions)
-        current_itemsets: Dict[frozenset, float] = {
-            k: v / total for k, v in item_counts.items() if v / total >= self.min_support
+        current_itemsets: dict[frozenset, float] = {
+            k: v / total
+            for k, v in item_counts.items()
+            if v / total >= self.min_support
         }
         if current_itemsets:
             self.itemsets.append(current_itemsets)
 
         k: int = 2
         while current_itemsets:
-            candidates: Set[frozenset] = set()
-            keys: List[frozenset] = list(current_itemsets.keys())
+            candidates: set[frozenset] = set()
+            keys: list[frozenset] = list(current_itemsets.keys())
             for i in range(len(keys)):
                 for j in range(i + 1, len(keys)):
                     union = keys[i] | keys[j]
@@ -91,8 +89,10 @@ def find_frequent_itemsets(self) -> List[Dict[frozenset, float]]:
                     ):
                         candidates.add(union)
 
-            freq_candidates: Dict[frozenset, float] = {
-                c: self._get_support(c) for c in candidates if self._get_support(c) >= self.min_support
+            freq_candidates: dict[frozenset, float] = {
+                c: self._get_support(c)
+                for c in candidates
+                if self._get_support(c) >= self.min_support
             }
             if not freq_candidates:
                 break
@@ -103,26 +103,24 @@ def find_frequent_itemsets(self) -> List[Dict[frozenset, float]]:
 
         return self.itemsets
 
-    def generate_association_rules(self) -> List[Tuple[frozenset, frozenset, float, float]]:
+    def generate_association_rules(
+        self,
+    ) -> list[tuple[frozenset, frozenset, float, float]]:
         """Generate association rules with min confidence and lift."""
         for level in self.itemsets:
             for itemset in level:
                 if len(itemset) < 2:
                     continue
                 for i in range(1, len(itemset)):
                     for antecedent in combinations(itemset, i):
-                        antecedent_set: frozenset = frozenset(antecedent)
-                        consequent_set: frozenset = itemset - antecedent_set
-                        conf: float = self.confidence(antecedent_set, consequent_set)
-                        lft: float = self.lift(antecedent_set, consequent_set)
-                        rule: Tuple[frozenset, frozenset, float, float] = (
-                            antecedent_set,
-                            consequent_set,
-                            conf,
-                            lft,
-                        )
-                        if rule not in self.rules and conf >= self.min_confidence and lft >= self.min_lift:
-                            self.rules.append(rule)
+                        antecedent_set = frozenset(antecedent)
+                        consequent_set = itemset - antecedent_set
+                        conf = self.confidence(antecedent_set, consequent_set)
+                        lft = self.lift(antecedent_set, consequent_set)
+                        if conf >= self.min_confidence and lft >= self.min_lift:
+                            self.rules.append(
+                                (antecedent_set, consequent_set, conf, lft)
+                            )
         return self.rules
 
 
@@ -131,10 +129,8 @@ def generate_association_rules(self) -> List[Tuple[frozenset, frozenset, float,
 
     doctest.testmod()
 
-    transactions: List[List[str]] = load_data()
-    model: Apriori = Apriori(
-        transactions, min_support=0.25, min_confidence=0.1, min_lift=0.0
-    )
+    transactions = load_data()
+    model = Apriori(transactions, min_support=0.25, min_confidence=0.1, min_lift=0.0)
 
     print("Frequent itemsets:")
     for level in model.itemsets:
@@ -143,8 +139,7 @@ def generate_association_rules(self) -> List[Tuple[frozenset, frozenset, float,
 
     print("\nAssociation Rules:")
     for rule in model.rules:
-        antecedent, consequent, conf, lift_value = rule
+        antecedent, consequent, conf, lift = rule
         print(
-            f"{set(antecedent)} -> {set(consequent)}, "
-            f"conf={conf:.2f}, lift={lift_value:.2f}"
+            f"{set(antecedent)} -> {set(consequent)}, conf={conf:.2f}, lift={lift:.2f}"
         )