CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

| Download
Views: 18
Image: ubuntu2204
Kernel: Python 3 (system-wide)
import os import os current_directory = os.getcwd() print("Current Working Directory:", current_directory) # Replace 'blosum62.txt' with the actual filename filename = 'blosum62.txt' file_path = os.path.join(current_directory, filename) # Get the directory of the file directory_path = os.path.dirname(file_path) print("Directory of the file:", directory_path)
Current Working Directory: /home/user Directory of the file: /home/user
def build_scoring_matrix(matrix_file): with open(matrix_file, 'r') as file: lines = file.readlines() header = None scoring_matrix = {} for line in lines: if line.startswith('#'): continue # Skip comment lines if not header: # Parse header line header = line.split() else: # Parse data lines elements = line.split() amino_acid = elements[0] try: scores = [int(score) if score != '*' else 0 for score in elements[1:]] except ValueError as e: print(f"Error processing line: {line.strip()}. Skipping this line.") continue scoring_matrix[amino_acid] = dict(zip(header, scores)) return scoring_matrix def perform_local_alignment(sequence, reference_sequence, scoring_matrix, gap_penalty): m, n = len(sequence), len(reference_sequence) # Check if all amino acids in sequences are present in the scoring matrix for amino_acid in set(sequence + reference_sequence): if amino_acid not in scoring_matrix: print(f"Error: Amino acid '{amino_acid}' not found in the scoring matrix.") return "", "", 0 # Initialize the scoring matrix dp_matrix = [[0] * (n + 1) for _ in range(m + 1)] # Traceback matrix to record the direction of the alignment traceback_matrix = [[''] * (n + 1) for _ in range(m + 1)] # Fill in the scoring matrix max_score = 0 max_i, max_j = 0, 0 for i in range(1, m + 1): for j in range(1, n + 1): match = dp_matrix[i-1][j-1] + scoring_matrix[sequence[i-1]][reference_sequence[j-1]] delete = dp_matrix[i-1][j] + gap_penalty insert = dp_matrix[i][j-1] + gap_penalty dp_matrix[i][j] = max(0, match, delete, insert) # Update traceback matrix if dp_matrix[i][j] == 0: traceback_matrix[i][j] = '' elif dp_matrix[i][j] == match: traceback_matrix[i][j] = 'diagonal' elif dp_matrix[i][j] == delete: traceback_matrix[i][j] = 'up' elif dp_matrix[i][j] == insert: traceback_matrix[i][j] = 'left' # Update maximum score and position if dp_matrix[i][j] > max_score: max_score = dp_matrix[i][j] max_i, max_j = i, j # Traceback to find the alignment aligned_sequence = [] aligned_reference = [] i, j = max_i, max_j while i > 0 and j > 0 and dp_matrix[i][j] > 0: if traceback_matrix[i][j] == 'diagonal': aligned_sequence.append(sequence[i-1]) aligned_reference.append(reference_sequence[j-1]) i -= 1 j -= 1 elif traceback_matrix[i][j] == 'up': aligned_sequence.append(sequence[i-1]) aligned_reference.append('-') i -= 1 elif traceback_matrix[i][j] == 'left': aligned_sequence.append('-') aligned_reference.append(reference_sequence[j-1]) j -= 1 aligned_sequence.reverse() aligned_reference.reverse() return "".join(aligned_sequence), "".join(aligned_reference), max_score def main(): # Paths to file and just sequence = "SPEAR" reference_sequence = "SPHERE" # Specify the path to the Blosum62 matrix file blosum62_file = "/home/user/blosum62.txt" # Set gap penalty gap_penalty = -10 # Build the scoring matrix scoring_matrix = build_scoring_matrix(blosum62_file) # Perform local alignment aligned_seq, aligned_ref, score = perform_local_alignment(sequence, reference_sequence, scoring_matrix, gap_penalty) # Print the result print(f"Sequence: {aligned_seq}") print(f"Reference: {aligned_ref}") print(f"Score: {score}") if __name__ == "__main__": main()
Sequence: SPEAR Reference: SPHER Score: 15
#This is how to define the function of local alignnment read fasta file and output the fasta filr #Defining the function for reading fasta fle def read_fasta(file_path): with open(file_path, 'r') as file: lines = file.readlines() header = lines[0].strip() sequence = ''.join(line.strip() for line in lines[1:]) return header, sequence #Defining function how tpo write fasta file def write_fasta(file_path, header, sequence): with open(file_path, 'w') as file: file.write(f">{header}\n") file.write(sequence + "\n") #Defining the scoring matrix def build_scoring_matrix(matrix_file): with open(matrix_file, 'r') as file: lines = file.readlines() header = None scoring_matrix = {} for line in lines: if line.startswith('#'): continue # Skip comment lines if not header: # Parse header line header = line.split() else: # Parse data lines elements = line.split() amino_acid = elements[0] try: scores = [int(score) if score != '*' else 0 for score in elements[1:]] except ValueError as e: print(f"Error processing line: {line.strip()}. Skipping this line.") continue scoring_matrix[amino_acid] = dict(zip(header, scores)) return scoring_matrix #Defining to perform local alignmnet def perform_local_alignment(sequence, reference_sequence, scoring_matrix, gap_penalty): m, n = len(sequence), len(reference_sequence) # Check if all amino acids in sequences are present in the scoring matrix for amino_acid in set(sequence + reference_sequence): if amino_acid not in scoring_matrix: print(f"Error: Amino acid '{amino_acid}' not found in the scoring matrix.") return "", "", 0 # Initialize the scoring matrix dp_matrix = [[0] * (n + 1) for _ in range(m + 1)] # Traceback matrix to record the direction of the alignment traceback_matrix = [[''] * (n + 1) for _ in range(m + 1)] # Fill in the scoring matrix max_score = 0 max_i, max_j = 0, 0 for i in range(1, m + 1): for j in range(1, n + 1): match = dp_matrix[i-1][j-1] + scoring_matrix[sequence[i-1]][reference_sequence[j-1]] delete = dp_matrix[i-1][j] + gap_penalty insert = dp_matrix[i][j-1] + gap_penalty dp_matrix[i][j] = max(0, match, delete, insert) # Update traceback matrix if dp_matrix[i][j] == 0: traceback_matrix[i][j] = '' elif dp_matrix[i][j] == match: traceback_matrix[i][j] = 'diagonal' elif dp_matrix[i][j] == delete: traceback_matrix[i][j] = 'up' elif dp_matrix[i][j] == insert: traceback_matrix[i][j] = 'left' # Update maximum score and position if dp_matrix[i][j] > max_score: max_score = dp_matrix[i][j] max_i, max_j = i, j # Traceback to find the alignment aligned_sequence = [] aligned_reference = [] i, j = max_i, max_j while i > 0 and j > 0 and dp_matrix[i][j] > 0: if traceback_matrix[i][j] == 'diagonal': aligned_sequence.append(sequence[i-1]) aligned_reference.append(reference_sequence[j-1]) i -= 1 j -= 1 elif traceback_matrix[i][j] == 'up': aligned_sequence.append(sequence[i-1]) aligned_reference.append('-') i -= 1 elif traceback_matrix[i][j] == 'left': aligned_sequence.append('-') aligned_reference.append(reference_sequence[j-1]) j -= 1 aligned_sequence.reverse() aligned_reference.reverse() return "".join(aligned_sequence), "".join(aligned_reference), max_score def main(): #Paths to file scoring_matrix_file = "/home/user/blosum62.txt" sequence_file = "/home/user/.fasta" reference_sequence_file = "/home/user/.fasta" output_file = "/home/user/.fasta" # Set gap penalty gap_penalty = -10 # Build the scoring matrix scoring_matrix = build_scoring_matrix(scoring_matrix_file) # Read sequences from files _, sequence = read_fasta(sequence_file) _, reference_sequence = read_fasta(reference)