def build_scoring_matrix(matrix_file):
with open(matrix_file, 'r') as file:
lines = file.readlines()
header = None
scoring_matrix = {}
for line in lines:
if line.startswith('#'):
continue
if not header:
header = line.split()
else:
elements = line.split()
amino_acid = elements[0]
try:
scores = [int(score) if score != '*' else 0 for score in elements[1:]]
except ValueError as e:
print(f"Error processing line: {line.strip()}. Skipping this line.")
continue
scoring_matrix[amino_acid] = dict(zip(header, scores))
return scoring_matrix
def perform_local_alignment(sequence, reference_sequence, scoring_matrix, gap_penalty):
m, n = len(sequence), len(reference_sequence)
for amino_acid in set(sequence + reference_sequence):
if amino_acid not in scoring_matrix:
print(f"Error: Amino acid '{amino_acid}' not found in the scoring matrix.")
return "", "", 0
dp_matrix = [[0] * (n + 1) for _ in range(m + 1)]
traceback_matrix = [[''] * (n + 1) for _ in range(m + 1)]
max_score = 0
max_i, max_j = 0, 0
for i in range(1, m + 1):
for j in range(1, n + 1):
match = dp_matrix[i-1][j-1] + scoring_matrix[sequence[i-1]][reference_sequence[j-1]]
delete = dp_matrix[i-1][j] + gap_penalty
insert = dp_matrix[i][j-1] + gap_penalty
dp_matrix[i][j] = max(0, match, delete, insert)
if dp_matrix[i][j] == 0:
traceback_matrix[i][j] = ''
elif dp_matrix[i][j] == match:
traceback_matrix[i][j] = 'diagonal'
elif dp_matrix[i][j] == delete:
traceback_matrix[i][j] = 'up'
elif dp_matrix[i][j] == insert:
traceback_matrix[i][j] = 'left'
if dp_matrix[i][j] > max_score:
max_score = dp_matrix[i][j]
max_i, max_j = i, j
aligned_sequence = []
aligned_reference = []
i, j = max_i, max_j
while i > 0 and j > 0 and dp_matrix[i][j] > 0:
if traceback_matrix[i][j] == 'diagonal':
aligned_sequence.append(sequence[i-1])
aligned_reference.append(reference_sequence[j-1])
i -= 1
j -= 1
elif traceback_matrix[i][j] == 'up':
aligned_sequence.append(sequence[i-1])
aligned_reference.append('-')
i -= 1
elif traceback_matrix[i][j] == 'left':
aligned_sequence.append('-')
aligned_reference.append(reference_sequence[j-1])
j -= 1
aligned_sequence.reverse()
aligned_reference.reverse()
return "".join(aligned_sequence), "".join(aligned_reference), max_score
def main():
sequence = "SPEAR"
reference_sequence = "SPHERE"
blosum62_file = "/home/users/blosum62.txt"
gap_penalty = -10
scoring_matrix = build_scoring_matrix(blosum62_file)
aligned_seq, aligned_ref, score = perform_local_alignment(sequence, reference_sequence, scoring_matrix, gap_penalty)
print(f"Sequence: {aligned_seq}")
print(f"Reference: {aligned_ref}")
print(f"Score: {score}")
if __name__ == "__main__":
main()