import numpy as np
def read_fasta(filename):
sequences = []
current_sequence = ""
with open(filename, 'r') as file:
for line in file:
line = line.strip()
if line.startswith(">"):
if current_sequence:
sequences.append(current_sequence)
current_sequence = ""
else:
current_sequence += line
if current_sequence:
sequences.append(current_sequence)
return sequences
def read_blosum(filename):
blosum = {}
with open(filename, 'r') as file:
amino_acids = None
for line in file:
line = line.strip()
if line.startswith("#"):
continue
if not amino_acids:
amino_acids = line.split()
else:
values = line.split()
amino_acid = values[0]
scores = [int(val) for val in values[1:]]
blosum[amino_acid] = {aa: score for aa, score in zip(amino_acids, scores)}
return blosum
def needleman_wunsch(seq1, seq2, blosum_matrix, gap_penalty):
len_seq1 = len(seq1)
len_seq2 = len(seq2)
scoring_matrix = np.zeros((len_seq1 + 1, len_seq2 + 1), dtype=int)
traceback_matrix = np.zeros((len_seq1 + 1, len_seq2 + 1), dtype=int)
for i in range(1, len_seq1 + 1):
for j in range(1, len_seq2 + 1):
match = scoring_matrix[i-1][j-1] + blosum_matrix[seq1[i-1]][seq2[j-1]]
delete = scoring_matrix[i-1][j] + gap_penalty
insert = scoring_matrix[i][j-1] + gap_penalty
scoring_matrix[i][j] = max(match, delete, insert)
if scoring_matrix[i][j] == match:
traceback_matrix[i][j] = 0
elif scoring_matrix[i][j] == delete:
traceback_matrix[i][j] = 1
else:
traceback_matrix[i][j] = 2
align1 = []
align2 = []
i, j = len_seq1, len_seq2
while i > 0 or j > 0:
if traceback_matrix[i][j] == 0:
align1.append(seq1[i-1])
align2.append(seq2[j-1])
i -= 1
j -= 1
elif traceback_matrix[i][j] == 1:
align1.append(seq1[i-1])
align2.append('-')
i -= 1
else:
align1.append('-')
align2.append(seq2[j-1])
j -= 1
aligned_seq1 = ''.join(align1[::-1])
aligned_seq2 = ''.join(align2[::-1])
return aligned_seq1, aligned_seq2, scoring_matrix[len_seq1][len_seq2]
for i in range(len(sequences)):
for j in range(i + 1, len(sequences)):
seq1, seq2 = sequences[i], sequences[j]
aligned_seq1, aligned_seq2, score = needleman_wunsch(seq1, seq2, blosum_matrix, gap_penalty)
print(f"Alignment between Sequence {i + 1} and Sequence {j + 1} (Score: {score}):")
print("Sequence 1:", aligned_seq1)
print("Sequence 2:", aligned_seq2)
print()