Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
| Download
Project: Frany sanghavi - BINF 5004
Views: 7Image: ubuntu2204
{ "cells": [ { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "ename": "NameError", "evalue": "name 'sequences' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/tmp/ipykernel_375/2648947585.py\u001b[0m in \u001b[0;36m<cell line: 93>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 92\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msequences\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 94\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msequences\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0mseq1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseq2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msequences\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msequences\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNameError\u001b[0m: name 'sequences' is not defined" ] } ], "source": [ "\n", "\n", "import numpy as np\n", "\n", "# Defining function for reading fasta file\n", "def read_fasta(filename):\n", " sequences = []\n", " current_sequence = \"\"\n", " with open(filename, 'r') as file:\n", " for line in file:\n", " line = line.strip()\n", " if line.startswith(\">\"):\n", " if current_sequence:\n", " sequences.append(current_sequence)\n", " current_sequence = \"\"\n", " else:\n", " current_sequence += line\n", " if current_sequence:\n", " sequences.append(current_sequence)\n", " return sequences # This line should be indented correctly inside the function\n", "\n", "\n", "\n", "#the above function can be used for reading fasta file\n", "# Defining function for reading blosum matrix \n", "def read_blosum(filename):\n", " blosum = {}\n", " with open(filename, 'r') as file:\n", " amino_acids = None\n", " for line in file:\n", " line = line.strip()\n", " if line.startswith(\"#\"):\n", " # Skip comments (lines starting with '#')\n", " continue\n", " if not amino_acids:\n", " # The first line after comments is expected to be the header line.\n", " amino_acids = line.split()\n", " else:\n", " values = line.split()\n", " amino_acid = values[0]\n", " scores = [int(val) for val in values[1:]]\n", " blosum[amino_acid] = {aa: score for aa, score in zip(amino_acids, scores)}\n", " return blosum\n", "\n", "def needleman_wunsch(seq1, seq2, blosum_matrix, gap_penalty):\n", " len_seq1 = len(seq1)\n", " len_seq2 = len(seq2)\n", " \n", " # Initialize the scoring matrix\n", " scoring_matrix = np.zeros((len_seq1 + 1, len_seq2 + 1), dtype=int)\n", " \n", " # Initialize the traceback matrix\n", " traceback_matrix = np.zeros((len_seq1 + 1, len_seq2 + 1), dtype=int)\n", "\n", " # Fill in the scoring matrix\n", " for i in range(1, len_seq1 + 1):\n", " for j in range(1, len_seq2 + 1):\n", " match = scoring_matrix[i-1][j-1] + blosum_matrix[seq1[i-1]][seq2[j-1]]\n", " delete = scoring_matrix[i-1][j] + gap_penalty\n", " insert = scoring_matrix[i][j-1] + gap_penalty\n", " scoring_matrix[i][j] = max(match, delete, insert)\n", " \n", " if scoring_matrix[i][j] == match:\n", " traceback_matrix[i][j] = 0 # Match\n", " elif scoring_matrix[i][j] == delete:\n", " traceback_matrix[i][j] = 1 # Delete\n", " else:\n", " traceback_matrix[i][j] = 2 # Insert\n", "\n", " # Traceback to find the aligned sequences\n", " align1 = []\n", " align2 = []\n", " i, j = len_seq1, len_seq2\n", " while i > 0 or j > 0:\n", " if traceback_matrix[i][j] == 0: # Match\n", " align1.append(seq1[i-1])\n", " align2.append(seq2[j-1])\n", " i -= 1\n", " j -= 1\n", " elif traceback_matrix[i][j] == 1: # Delete\n", " align1.append(seq1[i-1])\n", " align2.append('-')\n", " i -= 1\n", " else: # Insert\n", " align1.append('-')\n", " align2.append(seq2[j-1])\n", " j -= 1\n", "\n", " aligned_seq1 = ''.join(align1[::-1])\n", " aligned_seq2 = ''.join(align2[::-1])\n", " \n", " return aligned_seq1, aligned_seq2, scoring_matrix[len_seq1][len_seq2]\n", "\n", "\n", "for i in range(len(sequences)):\n", " for j in range(i + 1, len(sequences)):\n", " seq1, seq2 = sequences[i], sequences[j]\n", " aligned_seq1, aligned_seq2, score = needleman_wunsch(seq1, seq2, blosum_matrix, gap_penalty)\n", " print(f\"Alignment between Sequence {i + 1} and Sequence {j + 1} (Score: {score}):\")\n", " print(\"Sequence 1:\", aligned_seq1)\n", " print(\"Sequence 2:\", aligned_seq2)\n", " print()\n", "\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ ] } ], "metadata": { "kernelspec": { "argv": [ "/usr/bin/python3", "-m", "ipykernel", "--HistoryManager.enabled=False", "--matplotlib=inline", "-c", "%config InlineBackend.figure_formats = set(['retina'])\nimport matplotlib; matplotlib.rcParams['figure.figsize'] = (12, 7)", "-f", "{connection_file}" ], "display_name": "Python 3 (system-wide)", "env": { }, "language": "python", "metadata": { "cocalc": { "description": "Python 3 programming language", "priority": 100, "url": "https://www.python.org/" } }, "name": "python3", "resource_dir": "/ext/jupyter/kernels/python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 4 }