CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

| Download

Local alignment code

Views: 17
Image: ubuntu2204
Kernel: Python 3 (system-wide)
import os import os current_directory = os.getcwd() print("Current Working Directory:", current_directory) # Replace 'blosum62.txt' with the actual filename filename = 'blosum62.txt' file_path = os.path.join(current_directory, filename) # Get the directory of the file directory_path = os.path.dirname(file_path) print("Directory of the file:", directory_path)
Current Working Directory: /home/user Directory of the file: /home/user
def build_scoring_matrix(matrix_file): with open(matrix_file, 'r') as file: lines = file.readlines() header = None scoring_matrix = {} for line in lines: if line.startswith('#'): continue # Skip comment lines if not header: # Parse header line header = line.split() else: # Parse data lines elements = line.split() amino_acid = elements[0] try: scores = [int(score) if score != '*' else 0 for score in elements[1:]] except ValueError as e: print(f"Error processing line: {line.strip()}. Skipping this line.") continue scoring_matrix[amino_acid] = dict(zip(header, scores)) return scoring_matrix def perform_local_alignment(sequence, reference_sequence, scoring_matrix, gap_penalty): m, n = len(sequence), len(reference_sequence) # Check if all amino acids in sequences are present in the scoring matrix for amino_acid in set(sequence + reference_sequence): if amino_acid not in scoring_matrix: print(f"Error: Amino acid '{amino_acid}' not found in the scoring matrix.") return "", "", 0 # Initialize the scoring matrix dp_matrix = [[0] * (n + 1) for _ in range(m + 1)] # Traceback matrix to record the direction of the alignment traceback_matrix = [[''] * (n + 1) for _ in range(m + 1)] # Fill in the scoring matrix max_score = 0 max_i, max_j = 0, 0 for i in range(1, m + 1): for j in range(1, n + 1): match = dp_matrix[i-1][j-1] + scoring_matrix[sequence[i-1]][reference_sequence[j-1]] delete = dp_matrix[i-1][j] + gap_penalty insert = dp_matrix[i][j-1] + gap_penalty dp_matrix[i][j] = max(0, match, delete, insert) # Update traceback matrix if dp_matrix[i][j] == 0: traceback_matrix[i][j] = '' elif dp_matrix[i][j] == match: traceback_matrix[i][j] = 'diagonal' elif dp_matrix[i][j] == delete: traceback_matrix[i][j] = 'up' elif dp_matrix[i][j] == insert: traceback_matrix[i][j] = 'left' # Update maximum score and position if dp_matrix[i][j] > max_score: max_score = dp_matrix[i][j] max_i, max_j = i, j # Traceback to find the alignment aligned_sequence = [] aligned_reference = [] i, j = max_i, max_j while i > 0 and j > 0 and dp_matrix[i][j] > 0: if traceback_matrix[i][j] == 'diagonal': aligned_sequence.append(sequence[i-1]) aligned_reference.append(reference_sequence[j-1]) i -= 1 j -= 1 elif traceback_matrix[i][j] == 'up': aligned_sequence.append(sequence[i-1]) aligned_reference.append('-') i -= 1 elif traceback_matrix[i][j] == 'left': aligned_sequence.append('-') aligned_reference.append(reference_sequence[j-1]) j -= 1 aligned_sequence.reverse() aligned_reference.reverse() return "".join(aligned_sequence), "".join(aligned_reference), max_score def main(): # Replace these with your actual sequences sequence = "SPEAR" reference_sequence = "SPHERE" # Specify the path to the Blosum62 matrix file blosum62_file = "/home/users/blosum62.txt" # Set gap penalty gap_penalty = -10 # Build the scoring matrix scoring_matrix = build_scoring_matrix(blosum62_file) # Perform local alignment aligned_seq, aligned_ref, score = perform_local_alignment(sequence, reference_sequence, scoring_matrix, gap_penalty) # Print the result print(f"Sequence: {aligned_seq}") print(f"Reference: {aligned_ref}") print(f"Score: {score}") if __name__ == "__main__": main()
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) /tmp/ipykernel_378/2177259126.py in <cell line: 127>() 126 127 if __name__ == "__main__": --> 128 main() /tmp/ipykernel_378/2177259126.py in main() 114 115 # Build the scoring matrix --> 116 scoring_matrix = build_scoring_matrix(blosum62_file) 117 118 # Perform local alignment /tmp/ipykernel_378/2177259126.py in build_scoring_matrix(matrix_file) 1 def build_scoring_matrix(matrix_file): ----> 2 with open(matrix_file, 'r') as file: 3 lines = file.readlines() 4 5 header = None FileNotFoundError: [Errno 2] No such file or directory: '/home/users/blosum62.txt'