""" functions for create single tasks from a distance matrix and a cast matrix
(0 = not present; 1 = positive train; 2 = negative train; 3 = positive test; 4 = negative test).
PTr NTr PTe NTe
--------- ---------
PTr | 1 | 2 | PTr PTr | 1 | 2 | PTr
--------- ---------
NTr | 3 | 4 | NTr NTr | 3 | 4 | NTr
--------- ---------
PTr NTr PTe NTe
"""
from numpy import *
from scipy import *
def gettaskID(cm, nfam = int(0)):
'''seleziono le IDs corrispendonti alla colonna (task) nfam'''
ids1=[]
ids2=[]
ids3=[]
ids4=[]
for j in range(0,131):
if(cm[j][nfam]==1):
ids1.extend([j])
if(cm[j][nfam]==2):
ids2.extend([j])
if(cm[j][nfam]==3):
ids3.extend([j])
if(cm[j][nfam]==4):
ids4.extend([j])
return ids1,ids2,ids3,ids4
def get_labels_old(pos_threshold = 11, lnum = 74):
labels = []
for i in range(0,74):
if(i<=pos_threshold):
labels.append(1)
i = i + 1
else:
labels.append(0)
return labels
def get_labels_train(cm, nfam = int(0)):
labels = []
ptrain = 0
ntrain = 0
ptest = 0
ntest = 0
for j in range(0,len(cm)):
if(cm[j][nfam]==1):
ptrain = ptrain + 1
if(cm[j][nfam]==2):
ntrain = ntrain + 1
if(cm[j][nfam]==3):
ptest = ptest + 1
if(cm[j][nfam]==4):
ptest = ptest + 1
for i in range(0,ptrain + ntrain):
if(i<=ptrain):
labels.append(1)
i = i + 1
else:
labels.append(0)
return labels
def get_labels_test(cm, nfam = int(0)):
labels = []
ptest = 0
ntest = 0
for j in range(0,len(cm)):
if(cm[j][nfam]==3):
ptest = ptest + 1
if(cm[j][nfam]==4):
ntest = ntest + 1
for i in range(0,ptest + ntest):
if(i<=ptest):
labels.append(1)
i = i + 1
else:
labels.append(0)
return labels
def train_builder(dm, ids):
''' Creo il training set a partire dagli IDs nella cm '''
train1 = []
train2 = []
train3 = []
train4 = []
for i in ids[0]:
for j in ids[0]:
train1.extend([dm[i][j]])
atrain1 = array(train1)
atrain1 = atrain1.reshape((len(ids[0]),len(ids[0])))
for i in ids[0]:
for j in ids[1]:
train2.extend([dm[i][j]])
atrain2 = array(train2)
atrain2 = atrain2.reshape((len(ids[0]),len(ids[1])))
for i in ids[1]:
for j in ids[0]:
train3.extend([dm[i][j]])
atrain3 = array(train3)
atrain3 = atrain3.reshape((len(ids[1]),len(ids[0])))
for i in ids[1]:
for j in ids[1]:
train4.extend([dm[i][j]])
atrain4 = array(train4)
atrain4 = atrain4.reshape((len(ids[1]),len(ids[1])))
atrain12 = hstack((atrain1,atrain2))
atrain34 = hstack((atrain3,atrain4))
atrain = vstack((atrain12,atrain34))
return atrain
def test_builder(dm, ids):
''' Creo il test set a partire dagli IDs nella cm '''
test1 = []
test2 = []
test3 = []
test4 = []
for i in ids[0]:
for j in ids[2]:
test1.extend([dm[i][j]])
atest1 = array(test1)
atest1 = atest1.reshape((len(ids[0]),len(ids[2])))
for i in ids[0]:
for j in ids[3]:
test2.extend([dm[i][j]])
atest2 = array(test2)
atest2 = atest2.reshape((len(ids[0]),len(ids[3])))
for i in ids[1]:
for j in ids[2]:
test3.extend([dm[i][j]])
atest3 = array(test3)
atest3 = atest3.reshape((len(ids[1]),len(ids[2])))
for i in ids[1]:
for j in ids[3]:
test4.extend([dm[i][j]])
atest4 = array(test4)
atest4 = atest4.reshape((len(ids[1]),len(ids[3])))
atest12 = hstack((atest1,atest2))
atest34 = hstack((atest3,atest4))
atest = vstack((atest12,atest34))
return atest
''' reading the files in as arrays - 2 way '''
dm_filename = ('../data/3PGK_DNA_BLAST_nolabels.txt')
cm_filename = ("../data/3PGK_30_nolabels.txt")
scipydati = io.array_import.read_array(dm_filename)
def read_array(filename):
f = open(filename,'rb')
dati = []
for line in f.readlines():
numbers = map(int, line.split())
dati.append(numbers)
f.close()
return dati
''' Task Creation for task = NFAM'''
NFAM = 0
cm = read_array(cm_filename)
IDs = gettaskID(cm, nfam = NFAM)
dm = read_array(dm_filename)
trainset = train_builder(scipydati, ids=IDs)
testset = test_builder(scipydati, ids=IDs)
trainlabels = get_labels_train(cm, nfam = NFAM)
testlabels = get_labels_test(cm, nfam = NFAM)
''' write the trainset and the testset to a csv file'''
import csv
flag = 0
if (flag==1):
trainsetplus = vstack((trainset, trainlabels))
testsetplus = vstack((testset, testlabels))
trainsetplus_tr = transpose(trainsetplus)
testsetplus_tr = transpose(testsetplus)
writer = csv.writer(open("trainset_"+str(NFAM)+".csv", "wb"))
writer.writerows(trainsetplus_tr)
writer = csv.writer(open("testset_"+str(NFAM)+".csv", "wb"))
writer.writerows(testsetplus_tr)
else: print"non salvo nulla"
from svm import *
problem = svm_problem(trainlabels,trainset)
param_rbf_10 = svm_parameter(kernel_type = RBF, C = 10, svm_type = C_SVC)
m_rbf = svm_model(problem, param_rbf_10)
size = len(trainset)
kernels = [LINEAR, POLY, RBF]
kname = ['linear','polynomial','rbf']
param = svm_parameter(C = 10,svm_type = C_SVC)
for k in kernels:
param.kernel_type = k;
model = svm_model(problem,param)
errors = 0
for i in range(size):
prediction = model.predict(trainset[i])
probability = model.predict_probability
if (trainlabels[i] != prediction):
errors = errors + 1
print "##########################################"
print " kernel %s: error rate = %d / %d" % (kname[param.kernel_type], errors, size)
print "##########################################"