-
Notifications
You must be signed in to change notification settings - Fork 0
/
randomizeText10.py
executable file
·65 lines (56 loc) · 1.6 KB
/
randomizeText10.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/python3 -W all
"""
randomizeText10.py: randomize line order of a text
usage: randomizeText10.py < file
20171120 erikt(at)xs4all.nl
"""
import random
import sys
COMMAND = sys.argv.pop(0)
N = 10
def getLabel(line):
return(line.split()[0])
def readData():
data = []
labelCount = {}
for line in sys.stdin:
label = getLabel(line)
if not label in labelCount: labelCount[label] = 0
labelCount[label] += 1
data.append(line.strip())
return(data,labelCount)
def randomizeList(listIn):
random.seed()
listOut = []
while listIn:
i = random.randint(0,len(listIn)-1)
listOut.append(listIn[i])
listIn[i] = listIn[-1]
listIn.pop(-1)
return(listOut)
def divideData(data,labelCount):
buckets = []
bucketLengths = {}
currentBuckets = {}
for i in range(0,N): buckets.append([])
for label in labelCount:
currentBuckets[label] = 0
bucketLengths[label] = 0
for d in data:
label = getLabel(d)
buckets[currentBuckets[label]].append(d)
bucketLengths[label] += 1
if int((bucketLengths[label]-1)*N/labelCount[label]) < \
int(bucketLengths[label]*N/labelCount[label]):
currentBuckets[label] += 1
return(buckets)
def printData(data):
for i in range(0,len(data)): print(data[i])
def main(argv):
data,labelCount = readData()
buckets = divideData(randomizeList(data),labelCount)
for i in range(0,len(buckets)):
printData(randomizeList(buckets[i]))
sys.exit(0)
if __name__ == "__main__":
sys.exit(main(sys.argv))