Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix periodic df handling #450

Merged
merged 4 commits into from
Jun 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions PAMI/db.txt

This file was deleted.

23 changes: 12 additions & 11 deletions PAMI/extras/syntheticDataGenerator/generateTemporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,15 +193,17 @@ def tuning(self, array, sumRes) -> list:
"""

while np.sum(array) != sumRes:
# print(sum(array), sumRes)
# get index of largest value
randIndex = np.random.randint(0, len(array))
# randIndex = np.random.randint(0, len(array))
# if sum is too large, decrease the largest value
if np.sum(array) > sumRes:
array[randIndex] -= 1
maxIndex = np.argmax(array)
array[maxIndex] -= 1
# if sum is too small, increase the smallest value
else:
minIndex = np.argmin(array)
array[randIndex] += 1
array[minIndex] += 1
return array


Expand Down Expand Up @@ -229,28 +231,27 @@ def generateArray(self, nums, avg, maxItems, sumRes) -> list:
# generate n random values
values = np.random.randint(1, maxItems, nums)

# sumRes = nums * avg
if maxItems * len(values) < sumRes:
print(maxItems * len(values), sumRes)
raise ValueError('Try modifiying the values of avgLenOfTransactions and numOfTransactions')

self.tuning(values, sumRes)

# if any value is less than 1, increase it and tune the array again
while np.any(values < 1):
for i in range(nums):
if values[i] < 1:
values[i] += 1
values[i] = 1
self.tuning(values, sumRes)

# if any value is greater than maxItems, decrease it and tune the array again
while np.any(values > maxItems):
for i in range(nums):
if values[i] > maxItems:
values[i] -= 1
values[i] -= maxItems
self.tuning(values, sumRes)



# if all values are same then randomly increase one value and decrease another
while np.all(values == values[0]):
values[np.random.randint(0, nums)] += 1
self.tuning(values, sumRes)

return values

Expand Down
20 changes: 17 additions & 3 deletions PAMI/periodicFrequentPattern/basic/PFECLAT.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,11 @@ def _creatingItemSets(self) -> None:
if 'Transactions' in i:
data = self._iFile['Transactions'].tolist()
for i in range(len(data)):
tr = [ts[i][0]]
tr = tr + data[i]
self._Database.append(tr)
if data[i]:
tr = [str(ts[i])] + [x for x in data[i].split(self._sep)]
self._Database.append(tr)
else:
self._Database.append([str(ts[i])])

if isinstance(self._iFile, str):
if _ab._validators.url(self._iFile):
Expand Down Expand Up @@ -400,6 +402,9 @@ def printResults(self) -> None:


if __name__ == "__main__":



_ap = str()
if len(_ab._sys.argv) == 5 or len(_ab._sys.argv) == 6:
if len(_ab._sys.argv) == 6:
Expand All @@ -415,3 +420,12 @@ def printResults(self) -> None:
print("Total ExecutionTime in ms:", _ap.getRuntime())
else:
print("Error! The number of input parameters do not match the total number of parameters provided")

file = "idk.txt"
minSup = 0.01
maxPer = 0.01
obj = PFECLAT(file, minSup, maxPer)
obj.mine()
obj.printResults()
# for k,v in obj.getPatterns().items():
# print(k, v)
18 changes: 15 additions & 3 deletions PAMI/periodicFrequentPattern/basic/PFPGrowth.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,9 +251,11 @@ def _creatingItemSets(self) -> None:
if 'Transactions' in i:
data = self._iFile['Transactions'].tolist()
for i in range(len(data)):
tr = [ts[i][0]]
tr = tr + data[i]
self._Database.append(tr)
if data[i]:
tr = [str(ts[i])] + [x for x in data[i].split(self._sep)]
self._Database.append(tr)
else:
self._Database.append([str(ts[i])])

if isinstance(self._iFile, str):
if _ab._validators.url(self._iFile):
Expand Down Expand Up @@ -594,3 +596,13 @@ def printResults(self) -> None:
print("Total ExecutionTime in ms:", _ap.getRuntime())
else:
print("Error! The number of input parameters do not match the total number of parameters provided")


file = "idk.txt"
minSup = 0.01
maxPer = 0.01
obj = PFPGrowth(file, minSup, maxPer)
obj.mine()
obj.printResults()
for k,v in obj.getPatterns().items():
print(k, v)
51 changes: 46 additions & 5 deletions PAMI/periodicFrequentPattern/basic/PFPGrowthPlus.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,9 +453,12 @@ def _creatingItemSets(self) -> None:
if 'Transactions' in i:
data = self._iFile['Transactions'].tolist()
for i in range(len(data)):
tr = [ts[i][0]]
tr = tr + data[i]
self._Database.append(tr)
if data[i]:
tr = [str(ts[i])] + [x for x in data[i].split(self._sep)]
self._Database.append(tr)
else:
self._Database.append([str(ts[i])])

if isinstance(self._iFile, str):
if _ab._validators.url(self._iFile):
data = _ab._urlopen(self._iFile)
Expand All @@ -477,6 +480,9 @@ def _creatingItemSets(self) -> None:
print("File Not Found")
quit()

maxNos = [int(x[0]) for x in self._Database]
self._lno = max(maxNos)

def _periodicFrequentOneItem(self) -> Tuple[Dict, List]:
"""
calculates the support of each item in the dataset and assign the ranks to the items
Expand Down Expand Up @@ -578,8 +584,8 @@ def _convert(self, value) -> Union[int, float]:
else:
value = int(value)
return value

def startMine(self) -> None:
def mine(self) -> None:
"""
Main method where the patterns are mined by constructing tree.
:return: None
Expand Down Expand Up @@ -613,6 +619,41 @@ def startMine(self) -> None:
self._memoryRSS = process.memory_info().rss
print("periodic-frequent patterns were generated successfully using PFPGrowth++ algorithm ")

def startMine(self) -> None:
"""
Main method where the patterns are mined by constructing tree.
:return: None
"""
self.mine()
# global _minSup, _maxPer, _lno
# self._startTime = _ab._time.time()
# if self._iFile is None:
# raise Exception("Please enter the file path or file name:")
# if self._minSup is None:
# raise Exception("Please enter the Minimum Support")
# self._creatingItemSets()
# self._minSup = self._convert(self._minSup)
# self._maxPer = self._convert(self._maxPer)
# _minSup, _maxPer, _lno = self._minSup, self._maxPer, len(self._Database)
# generatedItems, pfList = self._periodicFrequentOneItem()
# updatedTransactions = self._updateTransactions(generatedItems)
# for x, y in self._rank.items():
# self._rankedUp[y] = x
# info = {self._rank[k]: v for k, v in generatedItems.items()}
# Tree = self._buildTree(updatedTransactions, info)
# patterns = Tree.generatePatterns([])
# self._finalPatterns = {}
# for i in patterns:
# x = self._savePeriodic(i[0])
# self._finalPatterns[x] = i[1]
# self._endTime = _ab._time.time()
# process = _ab._psutil.Process(_ab._os.getpid())
# self._memoryRSS = float()
# self._memoryUSS = float()
# self._memoryUSS = process.memory_full_info().uss
# self._memoryRSS = process.memory_info().rss
# print("periodic-frequent patterns were generated successfully using PFPGrowth++ algorithm ")

def getMemoryUSS(self) -> float:
"""
Total amount of USS memory consumed by the mining process will be retrieved from this function
Expand Down
33 changes: 29 additions & 4 deletions PAMI/periodicFrequentPattern/basic/PFPMC.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,9 +311,12 @@ def _creatingOneItemSets(self) -> list:
if 'Transactions' in i:
data = self._iFile['Transactions'].tolist()
for i in range(len(data)):
tr = [ts[i][0]]
tr = tr + data[i]
Database.append(tr)
if data[i]:
tr = [str(ts[i])] + [x for x in data[i].split(self._sep)]
Database.append(tr)
else:
Database.append([str(ts[i])])

if isinstance(self._iFile, str):
if _ab._validators.url(self._iFile):
data = _ab._urlopen(self._iFile)
Expand Down Expand Up @@ -346,6 +349,9 @@ def _creatingOneItemSets(self) -> list:
else:
itemsets[item] = {tid}

maxNos = [int(x[0]) for x in Database]
self._lno = max(maxNos)

self._dbSize = len(Database)
self._lastTid = max(self._tidSet)
self._minSup = self._convert(self._minSup)
Expand Down Expand Up @@ -384,7 +390,7 @@ def _generateDiffsetEclat(self, candidates: list) -> None:
if len(new_freqList) > 0:
self._generateDiffsetEclat(new_freqList)

def startMine(self) -> None:
def mine(self) -> None:
"""
Mining process will start from this function
:return: None
Expand All @@ -402,6 +408,25 @@ def startMine(self) -> None:
self._memoryRSS = process.memory_info().rss
print("Periodic-Frequent patterns were generated successfully using PFPDiffset ECLAT algorithm ")

def startMine(self) -> None:
"""
Mining process will start from this function
:return: None
"""
self.mine()
# # print(f"Optimized {type(self).__name__}")
# self._startTime = _ab._time.time()
# self._finalPatterns = {}
# frequentSets = self._creatingOneItemSets()
# self._generateDiffsetEclat(frequentSets)
# self._endTime = _ab._time.time()
# process = _ab._psutil.Process(_ab._os.getpid())
# self._memoryRSS = float()
# self._memoryUSS = float()
# self._memoryUSS = process.memory_full_info().uss
# self._memoryRSS = process.memory_info().rss
# print("Periodic-Frequent patterns were generated successfully using PFPDiffset ECLAT algorithm ")

def getMemoryUSS(self) -> float:
"""
Total amount of USS memory consumed by the mining process will be retrieved from this function
Expand Down
12 changes: 7 additions & 5 deletions PAMI/periodicFrequentPattern/basic/PSGrowth.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ def check(self) -> int:
k = v
return -1

def generatePatterns(self, prefix) -> None:
def generatePatterns(self, prefix):
"""
Generating the patterns from the tree

Expand Down Expand Up @@ -751,9 +751,11 @@ def _creatingItemSets(self) -> None:
if 'Transactions' in i:
data = self._iFile['Transactions'].tolist()
for i in range(len(data)):
tr = [ts[i][0]]
tr = tr + data[i]
self._Database.append(tr)
if data[i]:
tr = [str(ts[i])] + [x for x in data[i].split(self._sep)]
self._Database.append(tr)
else:
self._Database.append([str(ts[i])])

if isinstance(self._iFile, str):
if _ab._validators.url(self._iFile):
Expand Down Expand Up @@ -859,7 +861,7 @@ def startMine(self) -> None:
self._memoryRSS = process.memory_info().rss
print("Periodic-Frequent patterns were generated successfully using PS-Growth algorithm ")

def Mine(self) -> None:
def mine(self) -> None:
"""
Mining process will start from this function
:return: None
Expand Down