import string
import sys
import random

import tools
#import sw ## removed by Reina
def test_gibbs():
    #random.seed(0) ##Reina is fixing the seed for the pset
    
    seqs = ['ACGGAGATTATATATATATATATATAGCGGTAATTAGTAC',
            'ACCGGGATTAGCGGTAATTATATATATATATATAGTATGAC',
            'ACGCGAATTAGCGGTGACGTACTGGTATATATATATATTTAG',
            'GATAAGATTAGCGGTAATTAGTATATATATATATATACGGA',
            'ACGATTATTAGCGGTAGCCTGTATATATATATATATAGGAC',
            'ACGAGTATTATACTTAATTATATATATATATATATGGCCAT']


    return iterate_gibbs(seqs, 6)
    

def test2_gibbs(file):
	random.seed(0)
	mti = open(file,'r')

	# Need to specify file
	c    = mti.read()
    	seqs = c.split('#')
        for i in range(seqs.count('')):
		seqs.remove('')
	# need to remove empty strings
	# this is a printing subroutine
	#for i in range(len(seqs)):
	#	print(seqs[i])

    	#return simple_align(seqs, 6)
    	return iterate_gibbs(seqs, 6)

def iterate_gibbs(seqs, k):

    origseqs = seqs[:]
    all_motifs = []

    # for long sequences	
    while min(map(len, seqs)) >= k:
	test=min(map(len,seqs))
	print test

        # Find the best motif
        motif, indices = simple_align(seqs, k)
        all_motifs.append((motif,indices))

        # remove that motif from the sequences
        for i in range(0,len(seqs)):
            seqs[i] = seqs[i][:indices[i]]+seqs[i][indices[i]+k:]

        print_motif(motif)
        print "Indices are %s"%indices

    return all_motifs

def simple_align(seqs, k):

    # the basic algorithm simply tries to find an optimal
    # alignment of the sequences so that k consecutive columns
    # form an optimal motif

    # initialize alignment indices to random values
    indices = map(lambda len,k=k: random.randint(0,len-k),
                  map(len, seqs))
    
    
    num_iterations = 10
    last_indices = None
    for iter in range(0,num_iterations):

        #
        # Part I - re-compute motif for current positions
        #

        motif = gather_motif(seqs, indices, k)

        #print "Current motif is: "
        #print_motif(motif)

        #
        # Part II - re-align each of the sequences
        #

        #print "Current indices are: [%s]"%tools.display_list(indices)

        for i in range(0,len(seqs)):\


            # first subtract local instance of the motif before realigning
            # otherwise, you're biased to align to the same spot
	    #print "here motif"
	    #print_motif(motif)
            submotif = map(tools.copy_dic,motif)
	    
            submotif = subtract_seq(submotif, seqs[i][indices[i]:][:len(submotif)])
	    #print "here submotif"
	    #print_motif(submotif)
            
            indices[i],score = best_fit(seqs[i], submotif)

        # no index changed, break
        if last_indices == indices and last_motif == motif:
            print "Found local maximum, breaking"
            break

        last_indices = indices
        last_motif   = motif

    return motif, indices
        
def gather_motif(seqs, indices, k):
    # makes a motif from a set of aligned sequences
    # the alignment indices are indices,
    # the sequences are seqs
    # the length of the motif to build is k
    # simply adds the different character instances
    # for each position. 

    motif = create_empty_motif(k)

    for seq, index in map(None, seqs, indices): 

        add_seq(motif, seq[index:][:k])
            
    return motif

def create_empty_motif(k):
    return map(lambda i: {'A': 0, 'C': 0, 'G': 0, 'T': 0}, range(0,k))

def add_seq(motif, seq): 
    # add a sequence to a motif
    
    assert(len(motif) == len(seq))

    for char,c in map(None, motif, seq):
        char[c] = char[c] + 1

    return motif

def subtract_seq(motif, seq):
    # subtract a sequence from a motif
    # assumes the sequence is already part of the motif
    # otherwise we'll get negative numbers
    
    assert(len(motif) == len(seq))

    for char,c in map(None, motif, seq):
        char[c] = char[c] - 1

        assert(char[c] >= 0)

    return motif

def print_motif(motif, f=sys.stdout):
    # a motif is a list of chardictionaries
    f.write(' %2s '%'')
    for c in 'ACGT':
        f.write('| %2s '%c)
    f.write('\n----')
    for c in 'ACGT':
        f.write('+----')
    for i in range(0,len(motif)):
        char = motif[i]
        f.write('\n %2s '%i)
        for c in 'ACGT':
            f.write('| %2s '%char[c])

        f.write('| %2s'%tools.IUPAC_from_chardic(char))
        
    f.write('\n')

def best_fit(seq, motif):
    # find the best fit of a motif in a sequence.
    # returns the index at which the best score was found,
    # and the corresponding score

    assert(len(seq) >= len(motif))

    k = len(motif)

    max_score, max_i = score_motif(seq[:k],motif), 0
    
    for i in range(1, len(seq) - k):

        score = score_motif(seq[i:][:k], motif)

        if score > max_score:

            max_score = score
            max_i = i


    # print "Seq: %s, Motif: %s -> Score: %s for i=%s, %s"%(
    #     seq, string.join(map(tools.IUPAC_from_chardic,motif),''),
    #     max_score,max_i,seq[max_i:][:len(motif)])

    return max_i, max_score

def score_motif(seq, motif):
    # motif and seq are the same length
    # simply multiply out the motif numbers

    assert(len(seq) == len(motif))

    score = 0
    for i in range(0,len(motif)):

        score = score + motif[i][seq[i]]

    return score