forked from damlaren/jade
-
Notifications
You must be signed in to change notification settings - Fork 3
/
randomsplit.py
27 lines (21 loc) · 861 Bytes
/
randomsplit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import random
labels = ["ope","con","ext","agr","neu","SWL","gender","age"]
input_files = [open("pic5_cleaned/all_" + label + ".txt","r") for label in labels]
filenames = ["pic5_cleaned/all_" + label + ".txt" for label in labels]
numfiles = len(input_files)
TRAINING_DIR = "pic5_cleaned/train/"
DEV_DIR = "pic5_cleaned/dev/"
TEST_DIR = "pic5_cleaned/test/"
for i in xrange(numfiles):
lines = input_files[i].readlines()
random.shuffle(lines)
f1 = open(TRAINING_DIR + "train_" + labels[i] + ".txt", 'w')
f2 = open(DEV_DIR + "dev_" + labels[i] + ".txt", 'w')
f3 = open(TEST_DIR + "test_" + labels[i] + ".txt", 'w')
temp = len(lines)
for line in lines[:int(0.7*temp)]:
f1.write(line)
for line in lines[int(0.7*temp):int(0.85*temp)]:
f2.write(line)
for line in lines[int(0.85*temp):]:
f3.write(line)