001/*-------------------------------------------------------------------------+
002 |                                                                          |
003 | Copyright 2005-2011 The ConQAT Project                                   |
004 |                                                                          |
005 | Licensed under the Apache License, Version 2.0 (the "License");          |
006 | you may not use this file except in compliance with the License.         |
007 | You may obtain a copy of the License at                                  |
008 |                                                                          |
009 |    http://www.apache.org/licenses/LICENSE-2.0                            |
010 |                                                                          |
011 | Unless required by applicable law or agreed to in writing, software      |
012 | distributed under the License is distributed on an "AS IS" BASIS,        |
013 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
014 | See the License for the specific language governing permissions and      |
015 | limitations under the License.                                           |
016 +-------------------------------------------------------------------------*/
017package org.conqat.lib.commons.datamining;
018
019import java.util.HashMap;
020import java.util.HashSet;
021import java.util.Map;
022import java.util.Set;
023
024import org.conqat.lib.commons.collections.IdentityHashSet;
025
026/**
027 * Mines association rules from a set of shopping baskets. Uses Apriori
028 * algorithm. See http://en.wikipedia.org/wiki/Apriori_algorithm.
029 * 
030 * @param <T>
031 *            the item type; must support hashing.
032 */
033public class AssociationRuleMiner<T> {
034
035        /** Threshold for confidence */
036        private final float confidenceThreshold;
037
038        /** Miner for frequent item sets */
039        private final FrequentItemSetMiner<T> itemSetMiner;
040
041        /**
042         * Constructor.
043         * 
044         * @param supportThreshold
045         *            the support threshold [0..1], i.e. the fraction of the baskets
046         *            in which a frequent item set must be present in order to be
047         *            considered.
048         * @param confidenceThreshold
049         *            the minimal confidence of the mined rules [0..1].
050         */
051        public AssociationRuleMiner(float supportThreshold, float confidenceThreshold) {
052                this.confidenceThreshold = confidenceThreshold;
053                itemSetMiner = new FrequentItemSetMiner<T>(supportThreshold);
054        }
055
056        /** Mines frequent item sets from the given shopping baskets. */
057        public Set<AssociationRule<T>> mineAssociationRules(Set<Set<T>> baskets) {
058
059                Set<AssociationRule<T>> result = new IdentityHashSet<AssociationRule<T>>();
060
061                Set<FrequentItemSet<T>> frequentItemSets = itemSetMiner.mineFrequentItemSets(baskets);
062
063                Map<Set<T>, Double> supportMap = new HashMap<Set<T>, Double>();
064
065                for (FrequentItemSet<T> frequentItemset : frequentItemSets) {
066                        supportMap.put(frequentItemset.getItems(), frequentItemset.getSupport());
067                }
068
069                for (FrequentItemSet<T> frequentItemSet : frequentItemSets) {
070                        Set<T> items = frequentItemSet.getItems();
071                        if (items.size() > 1) {
072                                for (T item : items) {
073                                        Set<T> reducedItemSet = new HashSet<T>(items);
074                                        reducedItemSet.remove(item);
075
076                                        double confidence = frequentItemSet.getSupport() / supportMap.get(reducedItemSet);
077                                        if (confidence >= confidenceThreshold) {
078                                                result.add(new AssociationRule<T>(reducedItemSet, item, confidence));
079                                        }
080                                }
081                        }
082                }
083                return result;
084        }
085
086}