diff --git a/PAMI/db.txt b/PAMI/db.txt new file mode 100644 index 00000000..06fc9629 --- /dev/null +++ b/PAMI/db.txt @@ -0,0 +1,10 @@ +(3, 7),(1, 6),(1, 9),(3, 5),(2, 8),(2, 6) +(1, 9),(1, 9),(3, 6),(2, 6),(2, 8),(1, 6),(3, 5),(1, 8),(3, 7) +(3, 7),(2, 8),(2, 6) +(1, 9),(1, 9),(1, 6),(3, 7) +(2, 6),(1, 9),(3, 5),(3, 7),(1, 8),(3, 6) +(3, 5),(1, 6),(1, 8),(3, 7),(1, 9) +(1, 8),(1, 9),(1, 6),(3, 7) +(1, 9),(2, 6),(2, 6),(3, 6),(1, 8),(3, 7) +(1, 6),(1, 9),(2, 8) +(2, 6),(1, 9),(3, 5),(1, 6) diff --git a/PAMI/extras/generateDatabase/_generateSpatioTemporalDatabase.py b/PAMI/extras/generateDatabase/_generateSpatioTemporalDatabase.py new file mode 100644 index 00000000..96578d04 --- /dev/null +++ b/PAMI/extras/generateDatabase/_generateSpatioTemporalDatabase.py @@ -0,0 +1,159 @@ +# generateSpatioTemporalDatabase is a code used to convert the database into SpatioTemporal database. +# +# **Importing this algorithm into a python program** +# -------------------------------------------------------- +# +# from PAMI.extras.generateDatabase import generateSpatioTemporalDatabase as db +# +# obj = db.generateSpatioTemporalDatabase(0, 100, 0, 100, 10, 10, 0.5, 0.9, 0.5, 0.9) +# +# obj.save() +# +# obj.createPoint(0,100,0,100) # values can be according to the size of data +# +# obj.saveAsFile("outputFileName") # To create a file +# + + + + +__copyright__ = """ +Copyright (C) 2021 Rage Uday Kiran + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" +import random as rand +from typing import List, Dict, Tuple, Set, Union, Any, Generator +import pandas +import sys + +class spatioTemporalDatabaseGenerator(): + """ + + :Description: generateSpatioTemporalDatabase is a code used to convert the database into SpatioTemporal database. + + :param xmin: int : + To give minimum value for x + :param xmax: int : + To give maximum value for x + :param ymin: int : + To give minimum value for y + :param ymax: int : + To give maximum value for y + :param maxTimeStamp: int : + maximum Time Stamp for the database + :param numberOfItems: int : + number of items in the database + :param itemChanceLow: int or float : + least chance for item in the database + :param itemChanceHigh: int or float : + highest chance for item in the database + :param timeStampChanceLow: int or float : + lowest time stamp value + :param timeStampChanceHigh: int or float: + highest time stamp value + + **Importing this algorithm into a python program** + -------------------------------------------------------- + .. code-block:: python + + from PAMI.extras.generateDatabase import generateSpatioTemporalDatabase as db + + obj = db.generateSpatioTemporalDatabase(0, 100, 0, 100, 10, 10, 0.5, 0.9, 0.5, 0.9) + + obj.save(oFile) + + obj.createPoint(0,100,0,100) # values can be according to the size of data + + obj.saveAsFile("outputFileName") # To create a file + + """ + + coinFlip = [True, False] + timestamp = list() + items = list() + alreadyAdded = set() + outFileName="" + + def createPoint(self, xmin: int, xmax: int, ymin: int, ymax: int) -> Tuple[int, int]: + x = rand.randint(xmin, xmax) + y = rand.randint(ymin, ymax) + coordinate = tuple([x, y]) + return coordinate + + def __init__(self,xmin: int,xmax: int,ymin: int,ymax: int,maxTimeStamp: int,numberOfItems: int, itemChanceLow: float, + itemChanceHigh: float, timeStampChanceLow: float, + timeStampChanceHigh: float) -> None: + coinFlip = [True, False] + timeStamp = 1 + self.timeStampList = list() + self.itemList = list() + + while timeStamp != maxTimeStamp + 1: + itemSet=list() + for i in range(1, numberOfItems+1): + #rand1=rand.rand(itemChanceLow,itemChanceHigh) + #rand2 = rand.rand(timeStampChanceLow, timeStampChanceHigh) + if rand.choices(coinFlip, weights=[itemChanceLow,itemChanceHigh], k=1)[0]: + coordinate=self.createPoint(xmin, xmax, ymin, ymax) + coordinate=tuple(coordinate) + if coordinate not in self.alreadyAdded: + coordinate=list(coordinate) + itemSet.append(coordinate) + coordinate=tuple(coordinate) + self.alreadyAdded.add(coordinate) + if itemSet != []: + self.timeStampList.append( + timeStamp) + self.itemList.append( + itemSet) + if rand.choices(coinFlip, weights=[itemChanceLow,itemChanceHigh], k=1)[0]: + timeStamp += 1 + self.outFileName = "temporal_" + str(maxTimeStamp // 1000) + \ + "KI" + str(numberOfItems) + "C" + str(itemChanceLow) + "T" + str(timeStampChanceLow) + ".csv" + + + + + def saveAsFile(self, outFileName="", sep="\t") -> None: + if outFileName != "": + self.outFileName = outFileName + + file = open( + self.outFileName, "w") + + for i in range(len(self.timeStampList)): + file.write( + str(self.timeStampList[i])) + for j in range(len(self.itemList[i])): + file.write( + sep + str(self.itemList[i][j])) + file.write('\n') + + file.close() + + +if __name__ == "__main__": + xmin=0 + xmax=100 + ymin=0 + ymax=100 + maxTimeStamp = 10 + numberOfItems = 10 + itemChanceLow = 0.5 + itemChanceHigh = 0.9 + timeStampChanceLow = 0.5 + timeStampChanceHigh = 0.9 + obj = spatioTemporalDatabaseGenerator(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) + obj.saveAsFile(sys.argv[5]) diff --git a/PAMI/extras/generateDatabase/_generateTemporalDatabase.py b/PAMI/extras/generateDatabase/_generateTemporalDatabase.py new file mode 100644 index 00000000..7efc5856 --- /dev/null +++ b/PAMI/extras/generateDatabase/_generateTemporalDatabase.py @@ -0,0 +1,280 @@ +# generateTemporalDatabase is a code used to convert the database into Temporal database. +# +# **Importing this algorithm into a python program** +# -------------------------------------------------------- +# +# from PAMI.extras.generateDatabase import generateTemporalDatabase as db +# +# obj = db.generateTemporalDatabase(100, 10, 6, oFile, %, "\t") +# +# obj.save() +# +# obj.getFileName("outputFileName") # to create a file +# +# obj.getDatabaseAsDataFrame("outputFileName") # to convert database into dataframe +# +# obj.createTemporalFile("outputFileName") # to get outputfile +# + + + + +__copyright__ = """ +Copyright (C) 2021 Rage Uday Kiran + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" + +from typing import Tuple, List, Union +import pandas as pd +import numpy as np +import random +import sys +import os + +class generateTemporalDatabase: + """ + :Description: generateTemporalDatabase creates a temporal database and outputs a database or a frame depending on input + + :Attributes: + :param numOfTransactions: int + number of transactions + :param avgLenOfTransactions: int + average length of transactions + :param numItems: int + number of items + :param outputFile: str + output file name + :param percentage: int + percentage of coinToss for TID of temporalDatabase + :param sep: str + seperator for database output file + :param typeOfFile: str + specify database or dataframe to get corresponding output + + :Methods: + getFileName(): + returns filename + createTemporalFile(): + creates temporal database file or dataframe + getDatabaseAsDataFrame: + returns dataframe + performCoinFlip(): + Perform a coin flip with the given probability + tuning(): + Tune the arrayLength to match avgLenOfTransactions + createTemporalFile(): + create Temporal database or dataframe depending on input + + **Importing this algorithm into a python program** + -------------------------------------------------------- + .. code-block:: python + + from PAMI.extras.generateDatabase import generateTemporalDatabase as db + + numOfTransactions = 100 + numItems = 15 + avgTransactionLength = 6 + outFileName = 'temporal_ot.txt' + sep = '\t' + percent = 75 + frameOrBase = "dataframe" # if you want to get dataframe as output + frameOrBase = "database" # if you want to get database/csv/file as output + + temporalDB = db.generateTemporalDatabase(numOfTransactions, avgTransactionLength, numItems, outFileName, percent, sep, frameOrBase ) + temporalDB.createTemporalFile() + print(temporalDB.getDatabaseAsDataFrame()) + + """ + def __init__(self, numOfTransactions: int, avgLenOfTransactions: int, + numItems: int, outputFile: str, percentage: int=50, + sep: str='\t', typeOfFile: str="Database") -> None: + + """ + :Description: Initialize the generateTemporalDatabase class + + :Attributes: + :param numOfTransactions: int + number of transactions + :param avgLenOfTransactions: int + average length of transactions + :param numItems: int + number of items + :param outputFile: str + output file name + :param percentage: int + percentage of coinToss for TID of temporalDatabase + :param sep: str + seperator for database output file + :param typeOfFile: str + specify database or dataframe to get corresponding output + + :Methods: + getFileName(): + returns filename + createTemporalFile(): + creates temporal database file or dataframe + getDatabaseAsDataFrame: + returns dataframe + performCoinFlip(): + Perform a coin flip with the given probability + tuning(): + Tune the arrayLength to match avgLenOfTransactions + createTemporalFile(): + create Temporal database or dataframe depending on input + + """ + + self.numOfTransactions = numOfTransactions + self.avgLenOfTransactions = avgLenOfTransactions + self.numItems = numItems + self.outputFile = outputFile + if percentage > 1: + self.percentage = percentage / 100 + else: + self.percentage = percentage + self.sep = sep + self.typeOfFile = typeOfFile.lower() + + def getFileName(self) -> str: + """ + return filename + :return: filename + :rtype: str + """ + return self.outputFile + + def getDatabaseAsDataFrame(self) -> pd.DataFrame: + """ + return dataframe + :return: dataframe + :rtype: pd.DataFrame + """ + return self.df + + def performCoinFlip(self, probability: float) -> bool: + """ + Perform a coin flip with the given probability. + :param probability: probability to perform coin flip + :type probability: float + :return: True if coin flip is performed, False otherwise + :rtype: bool + """ + result = np.random.choice([0, 1], p=[1 - probability, probability]) + return result == 1 + + + def tuning(self, array, sumRes) -> list: + """ + Tune the array so that the sum of the values is equal to sumRes + + :param array: list of values + + :type array: list + + :param sumRes: target sum + + :type sumRes: int + + :return: list of values with the sum equal to sumRes after tuning + + :rtype: list + """ + + # first generate a random array of length n whose values average to m + values = np.random.randint(1, self.numItems, len(array)) + + while np.sum(values) != sumRes: + # get index of largest value + # if sum is too large, decrease the largest value + if np.sum(values) > sumRes: + maxIndex = np.argmax(values) + values[maxIndex] -= 1 + # if sum is too small, increase the smallest value + else: + minIndex = np.argmin(values) + values[minIndex] += 1 + + # get location of all values greater than numItems + + for i in range(len(array)): + array[i][1] = values[i] + + return array + + def createTemporalFile(self) -> None: + """ + create Temporal database or dataframe depending on input + :return: None + """ + + db = [] + lineSize = [] + for i in range(self.numOfTransactions): + db.append([i]) + if self.performCoinFlip(self.percentage): + lineSize.append([i,0]) + + # make it so that sum of lineSize[1] equal to numTransactions * avgLenOfTransactions + sumRes = self.numOfTransactions * self.avgLenOfTransactions + self.tuning(lineSize, sumRes) + + for i in range(len(lineSize)): + if lineSize[i][1] > self.numItems: + raise ValueError("Error: Either increase numItems or decrease avgLenOfTransactions or modify percentage") + line = np.random.choice(range(1, self.numItems + 1), lineSize[i][1], replace=False) + db[lineSize[i][0]].extend(line) + + if self.typeOfFile == "database": + with open(self.outputFile, "w") as outFile: + for line in db: + outFile.write(self.sep.join(map(str, line)) + '\n') + outFile.close() + + if self.typeOfFile == "dataframe": + data = { + 'timestamp': [line[0] for line in db], + 'transactions': pd.Series([line[1:] for line in db]) + } + self.df = pd.DataFrame(data) + + print("Temporal database created successfully") + + +if __name__ == '__main__': + numOfTransactions = 100 + numItems = 20 + avgTransactionLength = 6 + outFileName = 'temporal_out.txt' + sep = '\t' + frameOrBase = "database" + + temporalDB = generateTemporalDatabase(numOfTransactions, avgTransactionLength, numItems, outFileName) + + temporalDB.createTemporalFile() + + numOfTransactions = 100 + numItems = 15 + avgTransactionLength = 6 + outFileName = 'temporal_ot.txt' + sep = '\t' + percent = 75 + frameOrBase = "dataframe" + + temporalDB = generateTemporalDatabase(numOfTransactions, avgTransactionLength, numItems, outFileName, percent, sep, frameOrBase ) + temporalDB.createTemporalFile() + print(temporalDB.getDatabaseAsDataFrame()) + + obj = generateTemporalDatabase(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) + obj.createTemporalFile(sys.argv[5]) diff --git a/PAMI/extras/generateDatabase/generateSpatioTemporalDatabase.py b/PAMI/extras/generateDatabase/generateSpatioTemporalDatabase.py index 96578d04..56d459c6 100644 --- a/PAMI/extras/generateDatabase/generateSpatioTemporalDatabase.py +++ b/PAMI/extras/generateDatabase/generateSpatioTemporalDatabase.py @@ -1,17 +1,19 @@ -# generateSpatioTemporalDatabase is a code used to convert the database into SpatioTemporal database. +# generateTemporalDatabase is a code used to convert the database into Temporal database. # -# **Importing this algorithm into a python program** -# -------------------------------------------------------- +# **Importing this algorithm into a python program** +# -------------------------------------------------------- # -# from PAMI.extras.generateDatabase import generateSpatioTemporalDatabase as db +# from PAMI.extras.generateDatabase import generateTemporalDatabase as db # -# obj = db.generateSpatioTemporalDatabase(0, 100, 0, 100, 10, 10, 0.5, 0.9, 0.5, 0.9) +# obj = db.generateTemporalDatabase(100, 10, 6, oFile, %, "\t") # # obj.save() # -# obj.createPoint(0,100,0,100) # values can be according to the size of data +# obj.getFileName("outputFileName") # to create a file # -# obj.saveAsFile("outputFileName") # To create a file +# obj.getDatabaseAsDataFrame("outputFileName") # to convert database into dataframe +# +# obj.createTemporalFile("outputFileName") # to get outputfile # @@ -33,127 +35,294 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . """ -import random as rand -from typing import List, Dict, Tuple, Set, Union, Any, Generator -import pandas + +from typing import Tuple, List, Union +import pandas as pd +import numpy as np +import random import sys +import os -class spatioTemporalDatabaseGenerator(): +class generateTemporalDatabase: """ + :Description: generateTemporalDatabase creates a temporal database and outputs a database or a frame depending on input - :Description: generateSpatioTemporalDatabase is a code used to convert the database into SpatioTemporal database. - - :param xmin: int : - To give minimum value for x - :param xmax: int : - To give maximum value for x - :param ymin: int : - To give minimum value for y - :param ymax: int : - To give maximum value for y - :param maxTimeStamp: int : - maximum Time Stamp for the database - :param numberOfItems: int : - number of items in the database - :param itemChanceLow: int or float : - least chance for item in the database - :param itemChanceHigh: int or float : - highest chance for item in the database - :param timeStampChanceLow: int or float : - lowest time stamp value - :param timeStampChanceHigh: int or float: - highest time stamp value + :Attributes: + :param numOfTransactions: int + number of transactions + :param avgLenOfTransactions: int + average length of transactions + :param numItems: int + number of items + :param outputFile: str + output file name + :param percentage: int + percentage of coinToss for TID of temporalDatabase + :param sep: str + seperator for database output file + :param typeOfFile: str + specify database or dataframe to get corresponding output + + :Methods: + getFileName(): + returns filename + createTemporalFile(): + creates temporal database file or dataframe + getDatabaseAsDataFrame: + returns dataframe + performCoinFlip(): + Perform a coin flip with the given probability + tuning(): + Tune the arrayLength to match avgLenOfTransactions + createTemporalFile(): + create Temporal database or dataframe depending on input **Importing this algorithm into a python program** -------------------------------------------------------- .. code-block:: python - from PAMI.extras.generateDatabase import generateSpatioTemporalDatabase as db + from PAMI.extras.generateDatabase import generateTemporalDatabase as db - obj = db.generateSpatioTemporalDatabase(0, 100, 0, 100, 10, 10, 0.5, 0.9, 0.5, 0.9) + numOfTransactions = 100 + numItems = 15 + avgTransactionLength = 6 + outFileName = 'temporal_ot.txt' + sep = '\t' + percent = 75 + frameOrBase = "dataframe" # if you want to get dataframe as output + frameOrBase = "database" # if you want to get database/csv/file as output - obj.save(oFile) + temporalDB = db.generateTemporalDatabase(numOfTransactions, avgTransactionLength, numItems, outFileName, percent, sep, frameOrBase ) + temporalDB.createTemporalFile() + print(temporalDB.getDatabaseAsDataFrame()) - obj.createPoint(0,100,0,100) # values can be according to the size of data + """ - obj.saveAsFile("outputFileName") # To create a file + def getPoint(self, x1, y1, x2, y2): + return (np.random.randint(x1, x2), np.random.randint(y1, y2)) - """ + def __init__(self, numOfTransactions: int, avgLenOfTransactions: int, + numItems: int, outputFile: str, x1, y1, x2, y2, percentage: int=50, + sep: str='\t', typeOfFile: str="Database") -> None: + + """ + :Description: Initialize the generateTemporalDatabase class + + :Attributes: + :param numOfTransactions: int + number of transactions + :param avgLenOfTransactions: int + average length of transactions + :param numItems: int + number of items + :param outputFile: str + output file name + :param percentage: int + percentage of coinToss for TID of temporalDatabase + :param sep: str + seperator for database output file + :param typeOfFile: str + specify database or dataframe to get corresponding output + + :Methods: + getFileName(): + returns filename + createTemporalFile(): + creates temporal database file or dataframe + getDatabaseAsDataFrame: + returns dataframe + performCoinFlip(): + Perform a coin flip with the given probability + tuning(): + Tune the arrayLength to match avgLenOfTransactions + createTemporalFile(): + create Temporal database or dataframe depending on input + + """ + + self.numOfTransactions = numOfTransactions + self.avgLenOfTransactions = avgLenOfTransactions + self.numItems = numItems + self.outputFile = outputFile + if percentage > 1: + self.percentage = percentage / 100 + else: + self.percentage = percentage + self.sep = sep + self.typeOfFile = typeOfFile.lower() + self.x1 = x1 + self.y1 = y1 + self.x2 = x2 + self.y2 = y2 + + numPoints = (x2 - x1) * (y2 - y1) + if numItems > numPoints: + raise ValueError("Number of points is less than the number of lines * average items per line") + + self.itemPoint = {} + usedPoints = set() + + for i in range(1, numItems + 1): + # self.itemPoint[i] = (np.random.randint(x1, x2), np.random.randint(y1, y2)) + point = self.getPoint(x1, y1, x2, y2) + while point in usedPoints: + point = self.getPoint(x1, y1, x2, y2) + self.itemPoint[i] = point + + def getFileName(self) -> str: + """ + return filename + :return: filename + :rtype: str + """ + return self.outputFile + + def getDatabaseAsDataFrame(self) -> pd.DataFrame: + """ + return dataframe + :return: dataframe + :rtype: pd.DataFrame + """ + return self.df + + def performCoinFlip(self, probability: float) -> bool: + """ + Perform a coin flip with the given probability. + :param probability: probability to perform coin flip + :type probability: float + :return: True if coin flip is performed, False otherwise + :rtype: bool + """ + result = np.random.choice([0, 1], p=[1 - probability, probability]) + return result == 1 + + + def tuning(self, array, sumRes) -> list: + """ + Tune the array so that the sum of the values is equal to sumRes + + :param array: list of values + + :type array: list + + :param sumRes: the sum of the values in the array to be tuned + + :type sumRes: int + + :return: list of values with the tuned values and the sum of the values in the array to be tuned and sumRes is equal to sumRes + + :rtype: list + """ + + while np.sum(array) != sumRes: + # get index of largest value + randIndex = np.random.randint(0, len(array)) + # if sum is too large, decrease the largest value + if np.sum(array) > sumRes: + array[randIndex] -= 1 + # if sum is too small, increase the smallest value + else: + minIndex = np.argmin(array) + array[randIndex] += 1 + return array + + + def generateArray(self, nums, avg, maxItems, sumRes) -> list: + """ + Generate a random array of length n whose values average to m + + :param nums: number of values + + :type nums: list + + :param avg: average value + + :type avg: float + + :param maxItems: maximum value + + :type maxItems: int + + :return: random array + + :rtype: list + """ + + # generate n random values + values = np.random.randint(1, maxItems, nums) + + # sumRes = nums * avg + + self.tuning(values, sumRes) + + # if any value is less than 1, increase it and tune the array again + while np.any(values < 1): + for i in range(nums): + if values[i] < 1: + values[i] += 1 + self.tuning(values, sumRes) + + while np.any(values > maxItems): + for i in range(nums): + if values[i] > maxItems: + values[i] -= 1 + self.tuning(values, sumRes) + + + # if all values are same then randomly increase one value and decrease another + while np.all(values == values[0]): + values[np.random.randint(0, nums)] += 1 + self.tuning(values, sumRes) + + return values + + def createTemporalFile(self) -> None: + """ + create Temporal database or dataframe depending on input + :return: None + """ + + lines = [i for i in range(self.numOfTransactions) if self.performCoinFlip(self.percentage)] + values = self.generateArray(len(lines), self.avgLenOfTransactions, self.numItems, self.avgLenOfTransactions * self.numOfTransactions) + # print(values, sum(values), self.avgLenOfTransactions * self.numOfTransactions, sum(values)/self.numOfTransactions) + # print(lines) + + form = list(zip(lines, values)) + + database = [[] for i in range(self.numOfTransactions)] + + for i in range(len(form)): + database[form[i][0]] = np.random.choice(range(1, self.numItems + 1), form[i][1], replace=False).tolist() + + database[form[i][0]] = [str(self.itemPoint[i]) for i in database[form[i][0]]] + database[form[i][0]] = self.sep.join(database[form[i][0]]) + + self.df = pd.DataFrame({'Timestamp': [i+1 for i in range(self.numOfTransactions)], 'Transactions': database}) + print(self.df) + +if __name__ == '__main__': + numOfTransactions = 100 + numItems = 20 + avgTransactionLength = 6 + outFileName = 'temporal_out.txt' + sep = '\t' + frameOrBase = "database" + + temporalDB = generateTemporalDatabase(numOfTransactions, avgTransactionLength, numItems, outFileName,1,1,10,10) + + temporalDB.createTemporalFile() + + # numOfTransactions = 100 + # numItems = 15 + # avgTransactionLength = 6 + # outFileName = 'temporal_ot.txt' + # sep = '\t' + # percent = 75 + # frameOrBase = "dataframe" + + # temporalDB = generateTemporalDatabase(numOfTransactions, avgTransactionLength, numItems, outFileName, percent, sep, frameOrBase ) + # temporalDB.createTemporalFile() + # print(temporalDB.getDatabaseAsDataFrame()) - coinFlip = [True, False] - timestamp = list() - items = list() - alreadyAdded = set() - outFileName="" - - def createPoint(self, xmin: int, xmax: int, ymin: int, ymax: int) -> Tuple[int, int]: - x = rand.randint(xmin, xmax) - y = rand.randint(ymin, ymax) - coordinate = tuple([x, y]) - return coordinate - - def __init__(self,xmin: int,xmax: int,ymin: int,ymax: int,maxTimeStamp: int,numberOfItems: int, itemChanceLow: float, - itemChanceHigh: float, timeStampChanceLow: float, - timeStampChanceHigh: float) -> None: - coinFlip = [True, False] - timeStamp = 1 - self.timeStampList = list() - self.itemList = list() - - while timeStamp != maxTimeStamp + 1: - itemSet=list() - for i in range(1, numberOfItems+1): - #rand1=rand.rand(itemChanceLow,itemChanceHigh) - #rand2 = rand.rand(timeStampChanceLow, timeStampChanceHigh) - if rand.choices(coinFlip, weights=[itemChanceLow,itemChanceHigh], k=1)[0]: - coordinate=self.createPoint(xmin, xmax, ymin, ymax) - coordinate=tuple(coordinate) - if coordinate not in self.alreadyAdded: - coordinate=list(coordinate) - itemSet.append(coordinate) - coordinate=tuple(coordinate) - self.alreadyAdded.add(coordinate) - if itemSet != []: - self.timeStampList.append( - timeStamp) - self.itemList.append( - itemSet) - if rand.choices(coinFlip, weights=[itemChanceLow,itemChanceHigh], k=1)[0]: - timeStamp += 1 - self.outFileName = "temporal_" + str(maxTimeStamp // 1000) + \ - "KI" + str(numberOfItems) + "C" + str(itemChanceLow) + "T" + str(timeStampChanceLow) + ".csv" - - - - - def saveAsFile(self, outFileName="", sep="\t") -> None: - if outFileName != "": - self.outFileName = outFileName - - file = open( - self.outFileName, "w") - - for i in range(len(self.timeStampList)): - file.write( - str(self.timeStampList[i])) - for j in range(len(self.itemList[i])): - file.write( - sep + str(self.itemList[i][j])) - file.write('\n') - - file.close() - - -if __name__ == "__main__": - xmin=0 - xmax=100 - ymin=0 - ymax=100 - maxTimeStamp = 10 - numberOfItems = 10 - itemChanceLow = 0.5 - itemChanceHigh = 0.9 - timeStampChanceLow = 0.5 - timeStampChanceHigh = 0.9 - obj = spatioTemporalDatabaseGenerator(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) - obj.saveAsFile(sys.argv[5]) + # obj = generateTemporalDatabase(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) + # obj.createTemporalFile(sys.argv[5]) diff --git a/PAMI/extras/generateDatabase/generateSpatioTransactionalDatabase.py b/PAMI/extras/generateDatabase/generateSpatioTransactionalDatabase.py new file mode 100644 index 00000000..e6c01117 --- /dev/null +++ b/PAMI/extras/generateDatabase/generateSpatioTransactionalDatabase.py @@ -0,0 +1,242 @@ +# generateTransactionalDatabase is a code used to convert the database into Temporal database. +# +# **Importing this algorithm into a python program** +# -------------------------------------------------------- +# from PAMI.extras.generateDatabase import generateTransactionalDatabase as db +# obj = db(10, 5, 10) +# obj.create() +# obj.save('db.txt') +# print(obj.getTransactions()) to get the transactional database as a pandas dataframe + +# **Running the code from the command line** +# -------------------------------------------------------- +# python generateDatabase.py 10 5 10 db.txt +# cat db.txt +# + + + + +__copyright__ = """ +Copyright (C) 2021 Rage Uday Kiran + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" + + +import numpy as np +import pandas as pd +import sys + + +class generateSpatioTransactionalDatabase: + """ + :Description Generate a transactional database with the given number of lines, average number of items per line, and total number of items + + :Attributes: + numLines: int + - number of lines + avgItemsPerLine: int + - average number of items per line + numItems: int + - total number of items + + :Methods: + create: + Generate the transactional database + save: + Save the transactional database to a file + getTransactions: + Get the transactional database + + + + + """ + def getPoint(self, x1, y1, x2, y2): + + return (np.random.randint(x1, x2), np.random.randint(y1, y2)) + + def __init__(self, numLines, avgItemsPerLine, numItems, x1, y1, x2, y2) -> None: + """ + Initialize the transactional database with the given parameters + + Parameters: + numLines: int - number of lines + avgItemsPerLine: int - average number of items per line + numItems: int - total number of items + """ + + self.numLines = numLines + self.avgItemsPerLine = avgItemsPerLine + self.numItems = numItems + self.db = [] + self.x1 = x1 + self.y1 = y1 + self.x2 = x2 + self.y2 = y2 + + numPoints = (x2 - x1) * (y2 - y1) + if numItems > numPoints: + raise ValueError("Number of points is less than the number of lines * average items per line") + + self.itemPoint = {} + usedPoints = set() + + for i in range(1, numItems + 1): + # self.itemPoint[i] = (np.random.randint(x1, x2), np.random.randint(y1, y2)) + point = self.getPoint(x1, y1, x2, y2) + while point in usedPoints: + point = self.getPoint(x1, y1, x2, y2) + self.itemPoint[i] = point + + def tuning(self, array, sumRes) -> list: + """ + Tune the array so that the sum of the values is equal to sumRes + + :param array: list of values + + :type array: list + + :param sumRes: the sum of the values in the array to be tuned + + :type sumRes: int + + :return: list of values with the tuned values and the sum of the values in the array to be tuned and sumRes is equal to sumRes + + :rtype: list + """ + + while np.sum(array) != sumRes: + # get index of largest value + randIndex = np.random.randint(0, len(array)) + # if sum is too large, decrease the largest value + if np.sum(array) > sumRes: + array[randIndex] -= 1 + # if sum is too small, increase the smallest value + else: + minIndex = np.argmin(array) + array[randIndex] += 1 + return array + + + def generateArray(self, nums, avg, maxItems) -> list: + """ + Generate a random array of length n whose values average to m + + :param nums: number of values + + :type nums: list + + :param avg: average value + + :type avg: float + + :param maxItems: maximum value + + :type maxItems: int + + :return: random array + + :rtype: list + """ + + # generate n random values + values = np.random.randint(1, maxItems, nums) + + sumRes = nums * avg + + self.tuning(values, sumRes) + + # if any value is less than 1, increase it and tune the array again + while np.any(values < 1): + for i in range(nums): + if values[i] < 1: + values[i] += 1 + self.tuning(values, sumRes) + + while np.any(values > maxItems): + for i in range(nums): + if values[i] > maxItems: + values[i] -= 1 + self.tuning(values, sumRes) + + + # if all values are same then randomly increase one value and decrease another + while np.all(values == values[0]): + values[np.random.randint(0, nums)] += 1 + self.tuning(values, sumRes) + + return values + + def create(self) -> None: + """ + Generate the transactional database + :return: None + """ + db = set() + + values = self.generateArray(self.numLines, self.avgItemsPerLine, self.numItems) + + for value in values: + line = np.random.choice(range(1, self.numItems + 1), value, replace=False) + nline = [self.itemPoint[i] for i in line] + # print(line, nline) + # for i in range(len(line)): + # print(line[i], self.itemPoint[line[i]]) + # line[i] = self.itemPoint[line[i]] + self.db.append(nline) + # self.db.append(line) + + def save(self, filename) -> None: + """ + Save the transactional database to a file + + :param filename: name of the file + + :type filename: str + + :return: None + """ + + with open(filename, 'w') as f: + for line in self.db: + # f.write(','.join(map(str, line)) + '\n') + line = list(map(str, line)) + f.write(','.join(line) + '\n') + + def getTransactions(self) -> pd.DataFrame: + """ + Get the transactional database + + :return: the transactional database + + :rtype: pd.DataFrame + """ + df = pd.DataFrame(['\t'.join(map(str, line)) for line in self.db]) + return df + + +if __name__ == "__main__": + # test the class + db = generateSpatioTransactionalDatabase(10, 5, 10, 1,5,5,10) + db.create() + db.save('db.txt') + print(db.getTransactions()) + + obj = generateSpatioTransactionalDatabase(sys.argv[1], sys.argv[2], sys.argv[3]) + obj.create() + obj.save(sys.argv[4]) + # print(obj.getTransactions()) + \ No newline at end of file diff --git a/PAMI/extras/generateDatabase/generateTemporalDatabase.py b/PAMI/extras/generateDatabase/generateTemporalDatabase.py index 7efc5856..8465b23d 100644 --- a/PAMI/extras/generateDatabase/generateTemporalDatabase.py +++ b/PAMI/extras/generateDatabase/generateTemporalDatabase.py @@ -183,35 +183,76 @@ def tuning(self, array, sumRes) -> list: :type array: list - :param sumRes: target sum + :param sumRes: the sum of the values in the array to be tuned :type sumRes: int - :return: list of values with the sum equal to sumRes after tuning + :return: list of values with the tuned values and the sum of the values in the array to be tuned and sumRes is equal to sumRes :rtype: list """ - # first generate a random array of length n whose values average to m - values = np.random.randint(1, self.numItems, len(array)) - - while np.sum(values) != sumRes: + while np.sum(array) != sumRes: # get index of largest value + randIndex = np.random.randint(0, len(array)) # if sum is too large, decrease the largest value - if np.sum(values) > sumRes: - maxIndex = np.argmax(values) - values[maxIndex] -= 1 + if np.sum(array) > sumRes: + array[randIndex] -= 1 # if sum is too small, increase the smallest value else: - minIndex = np.argmin(values) - values[minIndex] += 1 - - # get location of all values greater than numItems + minIndex = np.argmin(array) + array[randIndex] += 1 + return array - for i in range(len(array)): - array[i][1] = values[i] - return array + def generateArray(self, nums, avg, maxItems, sumRes) -> list: + """ + Generate a random array of length n whose values average to m + + :param nums: number of values + + :type nums: list + + :param avg: average value + + :type avg: float + + :param maxItems: maximum value + + :type maxItems: int + + :return: random array + + :rtype: list + """ + + # generate n random values + values = np.random.randint(1, maxItems, nums) + + # sumRes = nums * avg + + self.tuning(values, sumRes) + + # if any value is less than 1, increase it and tune the array again + while np.any(values < 1): + for i in range(nums): + if values[i] < 1: + values[i] += 1 + self.tuning(values, sumRes) + + while np.any(values > maxItems): + for i in range(nums): + if values[i] > maxItems: + values[i] -= 1 + self.tuning(values, sumRes) + + + # if all values are same then randomly increase one value and decrease another + while np.all(values == values[0]): + values[np.random.randint(0, nums)] += 1 + self.tuning(values, sumRes) + + return values def createTemporalFile(self) -> None: """ @@ -219,38 +260,21 @@ def createTemporalFile(self) -> None: :return: None """ - db = [] - lineSize = [] - for i in range(self.numOfTransactions): - db.append([i]) - if self.performCoinFlip(self.percentage): - lineSize.append([i,0]) - - # make it so that sum of lineSize[1] equal to numTransactions * avgLenOfTransactions - sumRes = self.numOfTransactions * self.avgLenOfTransactions - self.tuning(lineSize, sumRes) - - for i in range(len(lineSize)): - if lineSize[i][1] > self.numItems: - raise ValueError("Error: Either increase numItems or decrease avgLenOfTransactions or modify percentage") - line = np.random.choice(range(1, self.numItems + 1), lineSize[i][1], replace=False) - db[lineSize[i][0]].extend(line) + lines = [i for i in range(self.numOfTransactions) if self.performCoinFlip(self.percentage)] + values = self.generateArray(len(lines), self.avgLenOfTransactions, self.numItems, self.avgLenOfTransactions * self.numOfTransactions) + # print(values, sum(values), self.avgLenOfTransactions * self.numOfTransactions, sum(values)/self.numOfTransactions) + # print(lines) - if self.typeOfFile == "database": - with open(self.outputFile, "w") as outFile: - for line in db: - outFile.write(self.sep.join(map(str, line)) + '\n') - outFile.close() + form = list(zip(lines, values)) - if self.typeOfFile == "dataframe": - data = { - 'timestamp': [line[0] for line in db], - 'transactions': pd.Series([line[1:] for line in db]) - } - self.df = pd.DataFrame(data) + database = [[] for i in range(self.numOfTransactions)] - print("Temporal database created successfully") + for i in range(len(form)): + database[form[i][0]] = np.random.choice(range(1, self.numItems + 1), form[i][1], replace=False).tolist().sort() + database[form[i][0]] = self.sep.join([str(i) for i in database[form[i][0]]]) + self.df = pd.DataFrame({'Timestamp': [i+1 for i in range(self.numOfTransactions)], 'Transactions': database}) + print(self.df) if __name__ == '__main__': numOfTransactions = 100 @@ -264,17 +288,17 @@ def createTemporalFile(self) -> None: temporalDB.createTemporalFile() - numOfTransactions = 100 - numItems = 15 - avgTransactionLength = 6 - outFileName = 'temporal_ot.txt' - sep = '\t' - percent = 75 - frameOrBase = "dataframe" + # numOfTransactions = 100 + # numItems = 15 + # avgTransactionLength = 6 + # outFileName = 'temporal_ot.txt' + # sep = '\t' + # percent = 75 + # frameOrBase = "dataframe" - temporalDB = generateTemporalDatabase(numOfTransactions, avgTransactionLength, numItems, outFileName, percent, sep, frameOrBase ) - temporalDB.createTemporalFile() - print(temporalDB.getDatabaseAsDataFrame()) + # temporalDB = generateTemporalDatabase(numOfTransactions, avgTransactionLength, numItems, outFileName, percent, sep, frameOrBase ) + # temporalDB.createTemporalFile() + # print(temporalDB.getDatabaseAsDataFrame()) - obj = generateTemporalDatabase(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) - obj.createTemporalFile(sys.argv[5]) + # obj = generateTemporalDatabase(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) + # obj.createTemporalFile(sys.argv[5]) diff --git a/PAMI/extras/generateDatabase/generateTransactionalDatabase.py b/PAMI/extras/generateDatabase/generateTransactionalDatabase.py index 956e0302..fc648d2a 100644 --- a/PAMI/extras/generateDatabase/generateTransactionalDatabase.py +++ b/PAMI/extras/generateDatabase/generateTransactionalDatabase.py @@ -172,7 +172,7 @@ def create(self) -> None: line = np.random.choice(range(1, self.numItems + 1), value, replace=False) self.db.append(line) - def save(self, filename) -> None: + def save(self, sep, filename) -> None: """ Save the transactional database to a file @@ -185,7 +185,7 @@ def save(self, filename) -> None: with open(filename, 'w') as f: for line in self.db: - f.write(','.join(map(str, line)) + '\n') + f.write(sep.join(map(str, line)) + '\n') def getTransactions(self) -> pd.DataFrame: """