Hi all, I'm a newbie in python and I'm running this cpu intensive loops for 1. create a sample on n numbers with fixed proportions (up to seven) 2. extract from each permutations, a sample of size m
From support said me this code has to be optimized, but I dont really know how....
Any suggestions would be really appreciated
Thanks in advance.
my code here:
import itertools;
v=[0,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100];
testmode=0 # testmode =1 significa VERO
target=100 #somma da raggiungere - test
if testmode==1:
n_rep=3
tot= 1000 #numero casi da generare - test
size=30 #dimensione campione estratto - test
else:
n_rep=7
tot= 100000 # numero casi da generare
size=3000 #dimensione campione estratto
combos = [a for a in itertools.product(v,repeat=n_rep) if sum(a)==target]
#combos = itertools.product(v,repeat=3)
usable_combos = []
lista = []
results= []
#log = open("filename.txt","w")
#import scipy
from scipy.stats import mannwhitneyu
import random
import collections
tuple_tot = 0
acc=0
high = 0
low = 0
med = 0
for e in combos:
print e #indica la tupla
#log = open("filename.txt","w")
#log = open("filename.txt","a")
#print >> log, "1m\n"*perc1
#print "--->tupla:", e[0],e[1],e[2] # stampa della tupla
tuple_tot += 1 # per sapere numero totale tuple
if testmode==1:
a1=[1]*(e[0]*tot/100)+[2]*(e[1]*tot/100)+[3]*(e[2]*tot/100) #3 repl
else:
a1=[1]*(e[0]*tot/100)+[2]*(e[1]*tot/100)+[3]*(e[2]*tot/100)+[4]*(e[3]*tot/100)+[5]*(e[4]*tot/100)+[6]*(e[5]*tot/100)+[7]*(e[6]*tot/100) # 7 rep
#print a1
#print "lung=",len(a1)
#print "sum=",sum(a1)
#print "mean=",numpy.mean(a1)
random.shuffle(a1)
a2=random.sample(a1, size) #estrazione campione
#print a1
#print a2
#print "lung_sample=",len(a2)
#print "sum_sample=",sum(a2)
#print "mean_sample=",numpy.mean(a2)
counter_orig=collections.Counter(a1)
counter_sample=collections.Counter(a2)
#print "pop:",counter_orig[1],counter_orig[2],counter_orig[3]
#print "sample",counter_sample[1],counter_sample[2],counter_sample[3]
#print "pop%:",float(float(counter_orig[1])/tot*100),float(float(counter_orig[2])/tot*100),float(float(counter_orig[3])/tot*100)
#print "sample%",float(float(counter_sample[1])/size*100),float(float(counter_sample[2])/size*100),float(float(counter_sample[3])/size*100)
if testmode==1:
p1 = round(float(float(counter_orig[1])/tot*100),2)
p2 = round(float(float(counter_orig[2])/tot*100),2)
p3 = round(float(float(counter_orig[3])/tot*100),2)
else:
p1 = round(float(float(counter_orig[1])/tot*100),2)
p2 = round(float(float(counter_orig[2])/tot*100),2)
p3 = round(float(float(counter_orig[3])/tot*100),2)
p4 = round(float(float(counter_orig[4])/tot*100),2)
p5 = round(float(float(counter_orig[5])/tot*100),2)
p6 = round(float(float(counter_orig[6])/tot*100),2)
p7 = round(float(float(counter_orig[7])/tot*100),2)
if testmode==1:
s1 = round(float(float(counter_sample[1])/size*100),2)
s2 = round(float(float(counter_sample[2])/size*100),2)
s3 = round(float(float(counter_sample[3])/size*100),2)
else:
s1 = round(float(float(counter_sample[1])/size*100),2)
s2 = round(float(float(counter_sample[2])/size*100),2)
s3 = round(float(float(counter_sample[3])/size*100),2)
s4 = round(float(float(counter_sample[4])/size*100),2)
s5 = round(float(float(counter_sample[5])/size*100),2)
s6 = round(float(float(counter_sample[6])/size*100),2)
s7 = round(float(float(counter_sample[7])/size*100),2)
#TEST Mann-Whitney ######################
#p_value = 1
try:
u, p_value = mannwhitneyu(a1, a2)
#u, p_value = mannwhitneyu(a1, a2)
#print "two-sample wilcoxon-test", p_value
if p_value > 0.40:
acc += 1
test = "HIGH precision"
high += 1
else:
if p_value < 0.30:
test = "Low precision"
low += 1
else:
test = "Medium precision"
med += 1
###################STANMPA RISULTATI
#results = p1,p2,p3,tot,s1,s2,s3,len(a2),round(float(p_value),2),test
#results = p1,p2,p3,p4,p5,p6,p7,tot,s1,s2,s3,s4,s5,s6,s7,len(a2),round(float(p_value),2),test
# print results
#####################################
#print acc
#print test
#c1 = counter[0]/size*target
#c2 = counter[-1]/size*target
# print c1,c2
#final = e[0],e[1],e[2],tot,round(numpy.mean(a1),2),len(a2),round(numpy.mean(a2),2)
#final = e[0],e[1],e[2],e[3],e[4],e[5],e[6],tot,round(numpy.mean(a1),2),len(a2),round(numpy.mean(a2),2)
#log = open("filename.txt","a")
#print >> log,final
#log.close()
#print "mean=",sum(a1)/tot*1.0
#lista.append(perc1)
#print >> log, ("1m\n"*(e[0]*tot/100))[:-1]
#print >> log, ("2m\n"*(e[1]*tot/100))[:-1]
#print >> log, ("3m\n"*(e[2]*tot/100))[:-1]
#print >> log, ("4m\n"*(e[3]*tot/100))[:-1]
# print >> log, ("5m\n"*(e[4]*tot/100))[:-1]
#print >> log, ("6m\n"*(e[5]*tot/100))[:-1]
#print >> log, ("7m\n"*(e[6]*tot/100))[:-1]
#log.close()
# perc1 = ("1m\n"*(e[1]*tot/100))[:-1]
#lista.append(perc1)
#print lista
#print perc1
#usable_combos.append(e)
#print usable_combos
#log = open("out_1_test.txt","a")
#print >> log, usable_combos
#log.close()
except:
pass
#log = open("SAMPLERESULTS.txt","w")
log = open("SAMPLERESULTS.txt","a")
final = "<<<Tuple cha passano il test in high>>>", round(float(float(acc)/tuple_tot*100),2),"%",'pop=', tot, "; campione=", size , "High:",high," Med:",med," Low:",low
print final
print >> log,final
log.close()