From 3aaa88cb9cdc2dcaec81c62f5e662f5f0d83f1da Mon Sep 17 00:00:00 2001 From: Fabian Schmengler Date: Tue, 5 Sep 2023 15:52:41 +0200 Subject: [PATCH 1/2] Add configuration to limit length of rules --- src/FPGrowth.php | 15 +++++++++++++-- src/FPTree.php | 25 ++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/FPGrowth.php b/src/FPGrowth.php index c832aa2..9461032 100644 --- a/src/FPGrowth.php +++ b/src/FPGrowth.php @@ -10,6 +10,7 @@ class FPGrowth { protected int $support = 3; protected float $confidence = 0.7; + private int $maxLength = 0; private $patterns; private $rules; @@ -50,6 +51,15 @@ public function setConfidence(float $confidence): self return $this; } + public function getMaxLength(): int + { + return $this->maxLength; + } + + public function setMaxLength(int $maxLength): void + { + $this->maxLength = $maxLength; + } /** * @return mixed */ @@ -71,10 +81,11 @@ public function getRules() * @param int $support 1, 2, 3 ... * @param float $confidence 0 ... 1 */ - public function __construct(int $support, float $confidence) + public function __construct(int $support, float $confidence, int $maxLength = 0) { $this->setSupport($support); $this->setConfidence($confidence); + $this->setMaxLength($maxLength); } /** @@ -93,7 +104,7 @@ public function run(array $transactions) */ protected function findFrequentPatterns(array $transactions): array { - $tree = new FPTree($transactions, $this->support, null, 0); + $tree = new FPTree($transactions, $this->support, null, 0, $this->maxLength); return $tree->minePatterns($this->support); } diff --git a/src/FPTree.php b/src/FPTree.php index dfa04f5..fd56efd 100644 --- a/src/FPTree.php +++ b/src/FPTree.php @@ -16,6 +16,10 @@ class FPTree private FPNode $root; + private int $maxLength = 0; + + private int $depth = 0; + /** * Initialize the tree. * @param array $transactions @@ -23,11 +27,12 @@ class FPTree * @param $rootValue * @param int $rootCount */ - public function __construct(array $transactions, int $threshold, $rootValue, int $rootCount) + public function __construct(array $transactions, int $threshold, $rootValue, int $rootCount, $maxLength = 0) { $this->frequent = $this->findFrequentItems($transactions, $threshold); $this->headers = $this->buildHeaderTable(); $this->root = $this->buildFPTree($transactions, $rootValue, $rootCount, $this->frequent); + $this->maxLength = $maxLength; } /** @@ -168,6 +173,8 @@ public function minePatterns(int $threshold): array { if ($this->treeHasSinglePath($this->root)) { return $this->generatePatternList(); + } elseif ($this->maxLength && $this->maxLength <= $this->getDepth()) { + return []; } return $this->zipPatterns($this->mineSubTrees($threshold)); @@ -211,7 +218,13 @@ protected function generatePatternList(): array $patterns[$this->root->value] = $this->root->count; } - for ($i = 1; $i <= count($items); $i++) { + // limit length of combinations to remaining length + $count = count($items); + if ($this->maxLength) { + $count = min($count, $this->maxLength - $this->getDepth()); + } + + for ($i = 1; $i <= $count; $i++) { $combinations = new Combinations($items,$i); foreach ($combinations->generator() as $subset) { $pattern = $this->root->value !== null ? array_merge($subset, [$this->root->value]) : $subset; @@ -270,7 +283,8 @@ protected function mineSubTrees(int $threshold): array } // Now we have the input for a subtree, so construct it and grab the patterns. - $subtree = new FPTree($conditionalTreeInput, $threshold, $item, $this->frequent[$item]); + $subtree = new FPTree($conditionalTreeInput, $threshold, $item, $this->frequent[$item], $this->maxLength); + $subtree->depth = $this->depth + 1; $subtreePatterns = $subtree->minePatterns($threshold); // Insert subtree patterns into main patterns dictionary. @@ -285,4 +299,9 @@ protected function mineSubTrees(int $threshold): array return $patterns; } + + private function getDepth(): int + { + return $this->depth; + } } From bf7a505125bbd35e3f25a0a52123641274e81e89 Mon Sep 17 00:00:00 2001 From: Fabian Schmengler Date: Wed, 10 Jan 2024 19:33:07 +0100 Subject: [PATCH 2/2] Make separator for itemsets configurable The separator is only used internally, but a character must be used that is not part of any item itself. The default is changed from comma to null-byte. --- src/FPGrowth.php | 14 ++++++++------ src/FPTree.php | 13 ++++++++----- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/FPGrowth.php b/src/FPGrowth.php index 9461032..5446acf 100644 --- a/src/FPGrowth.php +++ b/src/FPGrowth.php @@ -11,8 +11,9 @@ class FPGrowth protected int $support = 3; protected float $confidence = 0.7; private int $maxLength = 0; - + private $itemsetSeparator; private $patterns; + private $rules; /** @@ -81,11 +82,12 @@ public function getRules() * @param int $support 1, 2, 3 ... * @param float $confidence 0 ... 1 */ - public function __construct(int $support, float $confidence, int $maxLength = 0) + public function __construct(int $support, float $confidence, int $maxLength = 0, string $itemsetSeparator = "\0") { $this->setSupport($support); $this->setConfidence($confidence); $this->setMaxLength($maxLength); + $this->itemsetSeparator = $itemsetSeparator; } /** @@ -104,7 +106,7 @@ public function run(array $transactions) */ protected function findFrequentPatterns(array $transactions): array { - $tree = new FPTree($transactions, $this->support, null, 0, $this->maxLength); + $tree = new FPTree($transactions, $this->support, null, 0, $this->maxLength, $this->itemsetSeparator); return $tree->minePatterns($this->support); } @@ -116,16 +118,16 @@ protected function generateAssociationRules(array $patterns): array { $rules = []; foreach (array_keys($patterns) as $pattern) { - $itemSet = explode(',', $pattern); + $itemSet = explode($this->itemsetSeparator, $pattern); $upperSupport = $patterns[$pattern]; for ($i = 1; $i < count($itemSet); $i++) { $combinations = new Combinations($itemSet, $i); foreach ($combinations->generator() as $antecedent) { sort($antecedent); - $antecedentStr = implode(',', $antecedent); + $antecedentStr = implode($this->itemsetSeparator, $antecedent); $consequent = array_diff($itemSet, $antecedent); sort($consequent); - $consequentStr = implode(',', $consequent); + $consequentStr = implode($this->itemsetSeparator, $consequent); if (isset($patterns[$antecedentStr])) { $lowerSupport = $patterns[$antecedentStr]; $confidence = floatval($upperSupport) / $lowerSupport; diff --git a/src/FPTree.php b/src/FPTree.php index fd56efd..3763818 100644 --- a/src/FPTree.php +++ b/src/FPTree.php @@ -18,7 +18,9 @@ class FPTree private int $maxLength = 0; - private int $depth = 0; + private string $itemsetSeparator; + + private int $depth = 0; /** * Initialize the tree. @@ -27,12 +29,13 @@ class FPTree * @param $rootValue * @param int $rootCount */ - public function __construct(array $transactions, int $threshold, $rootValue, int $rootCount, $maxLength = 0) + public function __construct(array $transactions, int $threshold, $rootValue, int $rootCount, $maxLength = 0, string $itemsetSeparator = "\0") { $this->frequent = $this->findFrequentItems($transactions, $threshold); $this->headers = $this->buildHeaderTable(); $this->root = $this->buildFPTree($transactions, $rootValue, $rootCount, $this->frequent); $this->maxLength = $maxLength; + $this->itemsetSeparator = $itemsetSeparator; } /** @@ -195,10 +198,10 @@ protected function zipPatterns(array $patterns): array // We are in a conditional tree. $newPatterns = []; foreach (array_keys($patterns) as $strKey) { - $key = explode(',', $strKey); + $key = explode($this->itemsetSeparator, $strKey); $key[] = $this->root->value; sort($key); - $newPatterns[implode(',', $key)] = $patterns[$strKey]; + $newPatterns[implode($this->itemsetSeparator, $key)] = $patterns[$strKey]; } return $newPatterns; @@ -236,7 +239,7 @@ protected function generatePatternList(): array $min = $this->frequent[$x]; } } - $patterns[implode(',', $pattern)] = $min; + $patterns[implode($this->itemsetSeparator, $pattern)] = $min; } }