001/*-------------------------------------------------------------------------+ 002 | | 003 | Copyright 2005-2011 The ConQAT Project | 004 | | 005 | Licensed under the Apache License, Version 2.0 (the "License"); | 006 | you may not use this file except in compliance with the License. | 007 | You may obtain a copy of the License at | 008 | | 009 | http://www.apache.org/licenses/LICENSE-2.0 | 010 | | 011 | Unless required by applicable law or agreed to in writing, software | 012 | distributed under the License is distributed on an "AS IS" BASIS, | 013 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 014 | See the License for the specific language governing permissions and | 015 | limitations under the License. | 016 +-------------------------------------------------------------------------*/ 017package org.conqat.lib.commons.datamining; 018 019import java.util.HashMap; 020import java.util.HashSet; 021import java.util.Map; 022import java.util.Set; 023 024import org.conqat.lib.commons.collections.IdentityHashSet; 025 026/** 027 * Mines association rules from a set of shopping baskets. Uses Apriori 028 * algorithm. See http://en.wikipedia.org/wiki/Apriori_algorithm. 029 * 030 * @param <T> 031 * the item type; must support hashing. 032 */ 033public class AssociationRuleMiner<T> { 034 035 /** Threshold for confidence */ 036 private final float confidenceThreshold; 037 038 /** Miner for frequent item sets */ 039 private final FrequentItemSetMiner<T> itemSetMiner; 040 041 /** 042 * Constructor. 043 * 044 * @param supportThreshold 045 * the support threshold [0..1], i.e. the fraction of the baskets 046 * in which a frequent item set must be present in order to be 047 * considered. 048 * @param confidenceThreshold 049 * the minimal confidence of the mined rules [0..1]. 050 */ 051 public AssociationRuleMiner(float supportThreshold, float confidenceThreshold) { 052 this.confidenceThreshold = confidenceThreshold; 053 itemSetMiner = new FrequentItemSetMiner<T>(supportThreshold); 054 } 055 056 /** Mines frequent item sets from the given shopping baskets. */ 057 public Set<AssociationRule<T>> mineAssociationRules(Set<Set<T>> baskets) { 058 059 Set<AssociationRule<T>> result = new IdentityHashSet<AssociationRule<T>>(); 060 061 Set<FrequentItemSet<T>> frequentItemSets = itemSetMiner.mineFrequentItemSets(baskets); 062 063 Map<Set<T>, Double> supportMap = new HashMap<Set<T>, Double>(); 064 065 for (FrequentItemSet<T> frequentItemset : frequentItemSets) { 066 supportMap.put(frequentItemset.getItems(), frequentItemset.getSupport()); 067 } 068 069 for (FrequentItemSet<T> frequentItemSet : frequentItemSets) { 070 Set<T> items = frequentItemSet.getItems(); 071 if (items.size() > 1) { 072 for (T item : items) { 073 Set<T> reducedItemSet = new HashSet<T>(items); 074 reducedItemSet.remove(item); 075 076 double confidence = frequentItemSet.getSupport() / supportMap.get(reducedItemSet); 077 if (confidence >= confidenceThreshold) { 078 result.add(new AssociationRule<T>(reducedItemSet, item, confidence)); 079 } 080 } 081 } 082 } 083 return result; 084 } 085 086}