from itertools import combinations
from io import BytesIO # 在Python 2.7中使用BytesIO代替StringIO
import csv
class Apriori:
def __init__(self, min_support=0.01, min_confidence=0.5):
self.min_support = min_support
self.min_confidence = min_confidence
self.frequent_itemsets = None
self.association_rules = None
def fit(self, transactions):
"""执行Apriori算法"""
items = self._get_unique_items(transactions)
frequent_itemsets_1 = self._get_frequent_itemsets(transactions, items, 1)
frequent_itemsets = [frequent_itemsets_1]
k = 2
while True:
candidates = self._generate_candidates(frequent_itemsets[k-2], k)
if not candidates:
break
freq_itemsets_k = self._get_frequent_itemsets(transactions, candidates, k)
if not freq_itemsets_k:
break
frequent_itemsets.append(freq_itemsets_k)
k += 1
self.frequent_itemsets = frequent_itemsets
return self
def generate_rules(self):
"""生成关联规则"""
if self.frequent_itemsets is None:
raise ValueError("请先执行fit方法")
rules = []
for itemset_level in self.frequent_itemsets[1:]:
for itemset, support in itemset_level.items():
itemset_list = list(itemset)
for i in range(1, len(itemset_list)):
for antecedent in combinations(itemset_list, i):
antecedent = frozenset(antecedent)
consequent = itemset - antecedent
confidence = support / self.frequent_itemsets[len(antecedent)-1][antecedent]
if confidence >= self.min_confidence:
rules.append((antecedent, consequent, support, confidence))
self.association_rules = rules
return rules
def _get_unique_items(self, transactions):
"""获取所有唯一的项"""
items = set()
for transaction in transactions:
for item in transaction:
items.add(item)
return [frozenset([item]) for item in items]
def _get_frequent_itemsets(self, transactions, candidates, k):
"""计算候选项集的支持度并筛选频繁项集"""
itemset_counts = {}
for transaction in transactions:
transaction_set = set(transaction)
for candidate in candidates:
if candidate.issubset(transaction_set):
itemset_counts[candidate] = itemset_counts.get(candidate, 0) + 1
num_transactions = len(transactions)
frequent_itemsets = {}
for itemset, count in itemset_counts.items():
support = float(count) / num_transactions
if support >= self.min_support:
frequent_itemsets[itemset] = support
return frequent_itemsets
def _generate_candidates(self, frequent_itemsets_prev, k):
"""生成候选k项集"""
candidates = set()
items = list(frequent_itemsets_prev.keys())
for i in range(len(items)):
for j in range(i+1, len(items)):
union_set = items[i].union(items[j])
if len(union_set) == k:
subsets = combinations(union_set, k-1)
if all(frozenset(subset) in frequent_itemsets_prev for subset in subsets):
candidates.add(union_set)
return candidates
def main():
# 模拟CSV数据(使用普通字符串)
csv_data = """milk,bread,butter
bread,eggs
milk,bread,eggs
milk,eggs
bread,butter
milk,bread,butter,eggs
milk,bread
bread,eggs,butter
milk,eggs,butter
bread,butter"""
# Python 2.7兼容方案:使用BytesIO + 手动解码
transactions = []
for line in csv_data.split('\n'):
transactions.append(line.strip().split(','))
# 初始化Apriori算法
apriori = Apriori(min_support=0.3, min_confidence=0.7)
# 执行算法
apriori.fit(transactions)
rules = apriori.generate_rules()
# 打印结果(Python 2.7使用print语句)
print "频繁项集:"
for level, itemsets in enumerate(apriori.frequent_itemsets, 1):
print "\n%d-项集 (数量: %d)" % (level, len(itemsets))
for itemset, support in itemsets.items():
print "%s: 支持度=%.4f" % (tuple(itemset), support)
print "\n关联规则:"
for rule in rules:
antecedent, consequent, support, confidence = rule
print "%s => %s: 支持度=%.4f, 置信度=%.4f" % (
tuple(antecedent), tuple(consequent), support, confidence)
if __name__ == "__main__":
main()
ZnJvbSBpdGVydG9vbHMgaW1wb3J0IGNvbWJpbmF0aW9ucwpmcm9tIGlvIGltcG9ydCBCeXRlc0lPICAjIOWcqFB5dGhvbiAyLjfkuK3kvb/nlKhCeXRlc0lP5Luj5pu/U3RyaW5nSU8KaW1wb3J0IGNzdgoKY2xhc3MgQXByaW9yaToKICAgIGRlZiBfX2luaXRfXyhzZWxmLCBtaW5fc3VwcG9ydD0wLjAxLCBtaW5fY29uZmlkZW5jZT0wLjUpOgogICAgICAgIHNlbGYubWluX3N1cHBvcnQgPSBtaW5fc3VwcG9ydAogICAgICAgIHNlbGYubWluX2NvbmZpZGVuY2UgPSBtaW5fY29uZmlkZW5jZQogICAgICAgIHNlbGYuZnJlcXVlbnRfaXRlbXNldHMgPSBOb25lCiAgICAgICAgc2VsZi5hc3NvY2lhdGlvbl9ydWxlcyA9IE5vbmUKICAgIAogICAgZGVmIGZpdChzZWxmLCB0cmFuc2FjdGlvbnMpOgogICAgICAgICIiIuaJp+ihjEFwcmlvcmnnrpfms5UiIiIKICAgICAgICBpdGVtcyA9IHNlbGYuX2dldF91bmlxdWVfaXRlbXModHJhbnNhY3Rpb25zKQogICAgICAgIGZyZXF1ZW50X2l0ZW1zZXRzXzEgPSBzZWxmLl9nZXRfZnJlcXVlbnRfaXRlbXNldHModHJhbnNhY3Rpb25zLCBpdGVtcywgMSkKICAgICAgICBmcmVxdWVudF9pdGVtc2V0cyA9IFtmcmVxdWVudF9pdGVtc2V0c18xXQogICAgICAgIAogICAgICAgIGsgPSAyCiAgICAgICAgd2hpbGUgVHJ1ZToKICAgICAgICAgICAgY2FuZGlkYXRlcyA9IHNlbGYuX2dlbmVyYXRlX2NhbmRpZGF0ZXMoZnJlcXVlbnRfaXRlbXNldHNbay0yXSwgaykKICAgICAgICAgICAgaWYgbm90IGNhbmRpZGF0ZXM6CiAgICAgICAgICAgICAgICBicmVhawogICAgICAgICAgICAgICAgCiAgICAgICAgICAgIGZyZXFfaXRlbXNldHNfayA9IHNlbGYuX2dldF9mcmVxdWVudF9pdGVtc2V0cyh0cmFuc2FjdGlvbnMsIGNhbmRpZGF0ZXMsIGspCiAgICAgICAgICAgIGlmIG5vdCBmcmVxX2l0ZW1zZXRzX2s6CiAgICAgICAgICAgICAgICBicmVhawogICAgICAgICAgICAgICAgCiAgICAgICAgICAgIGZyZXF1ZW50X2l0ZW1zZXRzLmFwcGVuZChmcmVxX2l0ZW1zZXRzX2spCiAgICAgICAgICAgIGsgKz0gMQogICAgICAgICAgICAKICAgICAgICBzZWxmLmZyZXF1ZW50X2l0ZW1zZXRzID0gZnJlcXVlbnRfaXRlbXNldHMKICAgICAgICByZXR1cm4gc2VsZgogICAgCiAgICBkZWYgZ2VuZXJhdGVfcnVsZXMoc2VsZik6CiAgICAgICAgIiIi55Sf5oiQ5YWz6IGU6KeE5YiZIiIiCiAgICAgICAgaWYgc2VsZi5mcmVxdWVudF9pdGVtc2V0cyBpcyBOb25lOgogICAgICAgICAgICByYWlzZSBWYWx1ZUVycm9yKCLor7flhYjmiafooYxmaXTmlrnms5UiKQogICAgICAgICAgICAKICAgICAgICBydWxlcyA9IFtdCiAgICAgICAgZm9yIGl0ZW1zZXRfbGV2ZWwgaW4gc2VsZi5mcmVxdWVudF9pdGVtc2V0c1sxOl06CiAgICAgICAgICAgIGZvciBpdGVtc2V0LCBzdXBwb3J0IGluIGl0ZW1zZXRfbGV2ZWwuaXRlbXMoKToKICAgICAgICAgICAgICAgIGl0ZW1zZXRfbGlzdCA9IGxpc3QoaXRlbXNldCkKICAgICAgICAgICAgICAgIGZvciBpIGluIHJhbmdlKDEsIGxlbihpdGVtc2V0X2xpc3QpKToKICAgICAgICAgICAgICAgICAgICBmb3IgYW50ZWNlZGVudCBpbiBjb21iaW5hdGlvbnMoaXRlbXNldF9saXN0LCBpKToKICAgICAgICAgICAgICAgICAgICAgICAgYW50ZWNlZGVudCA9IGZyb3plbnNldChhbnRlY2VkZW50KQogICAgICAgICAgICAgICAgICAgICAgICBjb25zZXF1ZW50ID0gaXRlbXNldCAtIGFudGVjZWRlbnQKICAgICAgICAgICAgICAgICAgICAgICAgY29uZmlkZW5jZSA9IHN1cHBvcnQgLyBzZWxmLmZyZXF1ZW50X2l0ZW1zZXRzW2xlbihhbnRlY2VkZW50KS0xXVthbnRlY2VkZW50XQogICAgICAgICAgICAgICAgICAgICAgICBpZiBjb25maWRlbmNlID49IHNlbGYubWluX2NvbmZpZGVuY2U6CiAgICAgICAgICAgICAgICAgICAgICAgICAgICBydWxlcy5hcHBlbmQoKGFudGVjZWRlbnQsIGNvbnNlcXVlbnQsIHN1cHBvcnQsIGNvbmZpZGVuY2UpKQogICAgICAgIAogICAgICAgIHNlbGYuYXNzb2NpYXRpb25fcnVsZXMgPSBydWxlcwogICAgICAgIHJldHVybiBydWxlcwogICAgCiAgICBkZWYgX2dldF91bmlxdWVfaXRlbXMoc2VsZiwgdHJhbnNhY3Rpb25zKToKICAgICAgICAiIiLojrflj5bmiYDmnInllK/kuIDnmoTpobkiIiIKICAgICAgICBpdGVtcyA9IHNldCgpCiAgICAgICAgZm9yIHRyYW5zYWN0aW9uIGluIHRyYW5zYWN0aW9uczoKICAgICAgICAgICAgZm9yIGl0ZW0gaW4gdHJhbnNhY3Rpb246CiAgICAgICAgICAgICAgICBpdGVtcy5hZGQoaXRlbSkKICAgICAgICByZXR1cm4gW2Zyb3plbnNldChbaXRlbV0pIGZvciBpdGVtIGluIGl0ZW1zXQogICAgCiAgICBkZWYgX2dldF9mcmVxdWVudF9pdGVtc2V0cyhzZWxmLCB0cmFuc2FjdGlvbnMsIGNhbmRpZGF0ZXMsIGspOgogICAgICAgICIiIuiuoeeul+WAmemAiemhuembhueahOaUr+aMgeW6puW5tuetm+mAiemikee5gemhuembhiIiIgogICAgICAgIGl0ZW1zZXRfY291bnRzID0ge30KICAgICAgICBmb3IgdHJhbnNhY3Rpb24gaW4gdHJhbnNhY3Rpb25zOgogICAgICAgICAgICB0cmFuc2FjdGlvbl9zZXQgPSBzZXQodHJhbnNhY3Rpb24pCiAgICAgICAgICAgIGZvciBjYW5kaWRhdGUgaW4gY2FuZGlkYXRlczoKICAgICAgICAgICAgICAgIGlmIGNhbmRpZGF0ZS5pc3N1YnNldCh0cmFuc2FjdGlvbl9zZXQpOgogICAgICAgICAgICAgICAgICAgIGl0ZW1zZXRfY291bnRzW2NhbmRpZGF0ZV0gPSBpdGVtc2V0X2NvdW50cy5nZXQoY2FuZGlkYXRlLCAwKSArIDEKICAgICAgICAKICAgICAgICBudW1fdHJhbnNhY3Rpb25zID0gbGVuKHRyYW5zYWN0aW9ucykKICAgICAgICBmcmVxdWVudF9pdGVtc2V0cyA9IHt9CiAgICAgICAgZm9yIGl0ZW1zZXQsIGNvdW50IGluIGl0ZW1zZXRfY291bnRzLml0ZW1zKCk6CiAgICAgICAgICAgIHN1cHBvcnQgPSBmbG9hdChjb3VudCkgLyBudW1fdHJhbnNhY3Rpb25zCiAgICAgICAgICAgIGlmIHN1cHBvcnQgPj0gc2VsZi5taW5fc3VwcG9ydDoKICAgICAgICAgICAgICAgIGZyZXF1ZW50X2l0ZW1zZXRzW2l0ZW1zZXRdID0gc3VwcG9ydAogICAgICAgICAgICAgICAgCiAgICAgICAgcmV0dXJuIGZyZXF1ZW50X2l0ZW1zZXRzCiAgICAKICAgIGRlZiBfZ2VuZXJhdGVfY2FuZGlkYXRlcyhzZWxmLCBmcmVxdWVudF9pdGVtc2V0c19wcmV2LCBrKToKICAgICAgICAiIiLnlJ/miJDlgJnpgIlr6aG56ZuGIiIiCiAgICAgICAgY2FuZGlkYXRlcyA9IHNldCgpCiAgICAgICAgaXRlbXMgPSBsaXN0KGZyZXF1ZW50X2l0ZW1zZXRzX3ByZXYua2V5cygpKQogICAgICAgIAogICAgICAgIGZvciBpIGluIHJhbmdlKGxlbihpdGVtcykpOgogICAgICAgICAgICBmb3IgaiBpbiByYW5nZShpKzEsIGxlbihpdGVtcykpOgogICAgICAgICAgICAgICAgdW5pb25fc2V0ID0gaXRlbXNbaV0udW5pb24oaXRlbXNbal0pCiAgICAgICAgICAgICAgICBpZiBsZW4odW5pb25fc2V0KSA9PSBrOgogICAgICAgICAgICAgICAgICAgIHN1YnNldHMgPSBjb21iaW5hdGlvbnModW5pb25fc2V0LCBrLTEpCiAgICAgICAgICAgICAgICAgICAgaWYgYWxsKGZyb3plbnNldChzdWJzZXQpIGluIGZyZXF1ZW50X2l0ZW1zZXRzX3ByZXYgZm9yIHN1YnNldCBpbiBzdWJzZXRzKToKICAgICAgICAgICAgICAgICAgICAgICAgY2FuZGlkYXRlcy5hZGQodW5pb25fc2V0KQogICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICByZXR1cm4gY2FuZGlkYXRlcwoKZGVmIG1haW4oKToKICAgICMg5qih5oufQ1NW5pWw5o2u77yI5L2/55So5pmu6YCa5a2X56ym5Liy77yJCiAgICBjc3ZfZGF0YSA9ICIiIm1pbGssYnJlYWQsYnV0dGVyCmJyZWFkLGVnZ3MKbWlsayxicmVhZCxlZ2dzCm1pbGssZWdncwpicmVhZCxidXR0ZXIKbWlsayxicmVhZCxidXR0ZXIsZWdncwptaWxrLGJyZWFkCmJyZWFkLGVnZ3MsYnV0dGVyCm1pbGssZWdncyxidXR0ZXIKYnJlYWQsYnV0dGVyIiIiCgogICAgIyBQeXRob24gMi435YW85a655pa55qGI77ya5L2/55SoQnl0ZXNJTyArIOaJi+WKqOino+eggQogICAgdHJhbnNhY3Rpb25zID0gW10KICAgIGZvciBsaW5lIGluIGNzdl9kYXRhLnNwbGl0KCdcbicpOgogICAgICAgIHRyYW5zYWN0aW9ucy5hcHBlbmQobGluZS5zdHJpcCgpLnNwbGl0KCcsJykpCiAgICAKICAgICMg5Yid5aeL5YyWQXByaW9yaeeul+azlQogICAgYXByaW9yaSA9IEFwcmlvcmkobWluX3N1cHBvcnQ9MC4zLCBtaW5fY29uZmlkZW5jZT0wLjcpCiAgICAKICAgICMg5omn6KGM566X5rOVCiAgICBhcHJpb3JpLmZpdCh0cmFuc2FjdGlvbnMpCiAgICBydWxlcyA9IGFwcmlvcmkuZ2VuZXJhdGVfcnVsZXMoKQogICAgCiAgICAjIOaJk+WNsOe7k+aenO+8iFB5dGhvbiAyLjfkvb/nlKhwcmludOivreWPpe+8iQogICAgcHJpbnQgIumikee5gemhuembhjoiCiAgICBmb3IgbGV2ZWwsIGl0ZW1zZXRzIGluIGVudW1lcmF0ZShhcHJpb3JpLmZyZXF1ZW50X2l0ZW1zZXRzLCAxKToKICAgICAgICBwcmludCAiXG4lZC3pobnpm4YgKOaVsOmHjzogJWQpIiAlIChsZXZlbCwgbGVuKGl0ZW1zZXRzKSkKICAgICAgICBmb3IgaXRlbXNldCwgc3VwcG9ydCBpbiBpdGVtc2V0cy5pdGVtcygpOgogICAgICAgICAgICBwcmludCAiJXM6IOaUr+aMgeW6pj0lLjRmIiAlICh0dXBsZShpdGVtc2V0KSwgc3VwcG9ydCkKICAgIAogICAgcHJpbnQgIlxu5YWz6IGU6KeE5YiZOiIKICAgIGZvciBydWxlIGluIHJ1bGVzOgogICAgICAgIGFudGVjZWRlbnQsIGNvbnNlcXVlbnQsIHN1cHBvcnQsIGNvbmZpZGVuY2UgPSBydWxlCiAgICAgICAgcHJpbnQgIiVzID0+ICVzOiDmlK/mjIHluqY9JS40Ziwg572u5L+h5bqmPSUuNGYiICUgKAogICAgICAgICAgICB0dXBsZShhbnRlY2VkZW50KSwgdHVwbGUoY29uc2VxdWVudCksIHN1cHBvcnQsIGNvbmZpZGVuY2UpCgppZiBfX25hbWVfXyA9PSAiX19tYWluX18iOgogICAgbWFpbigp