'''This python script is used to split the .arff file got from marsyas to .csv files, which is easier to be read into the matlab for further processs.
Create on 2012-7-13
@auther: mainred
'''
import xlrd, xlwt
import time
import re
import os
file_name = raw_input('please input name of the arff file:\n')
path = os.getcwd()
file_abs_path = path + '\\' + file_name + '.arff'
if not os.path.exists(file_abs_path):
print "There is no %s.xls in such directory" % file_name
time.sleep(2)
exit()
data = xlrd.open_workbook(file_name+'.arff')
table = data.sheets()[0]
nrows = table.nrows
ncols = table.ncols
file = xlwt.Workbook()
pattern1 = r'[^\\]*$'
name = re.search(pattern1,table.row_values(68)[0]).group(0).split('.')[0]
tablew = file.add_sheet(name)
redundancy = ''
j = 0
pas = False
for i in range(70,nrows):
if pas:
pas = False
continue
elif table.row_values(i)[0].startswith('% filename'):
name = name + '.csv'
file.save(name)
file = xlwt.Workbook()
name = re.search(pattern1,table.row_values(i)[0]).group(0).split('.')[0]
print name
tablew = file.add_sheet(name)
pas = True
j = 0
continue
elif table.row_values(i)[0] == '':
continue
else:
if j == 0:
pattern2 = r'[^,]+?$'
redundancy = re.search(pattern2, table.row_values(i)[0]).group(0)
print redundancy
tablew.write(j,0,table.row_values(i)[0].replace(redundancy,''))
j = j + 1
name = name + '.csv'
file.save(name)