from Bio import SeqIO
from collections import Counter
def calculate_complementary_ratios(sequence):
complementary_base = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
counts = Counter(sequence)
ratios = {base: counts[base] / len(sequence) for base in 'ATCG'}
complementary_ratios = {base: ratios[complementary_base[base]] for base in 'ATCG'}
return complementary_ratios
def fastq_summary(input_fastq, output_summary):
with open(input_fastq, "r") as handle, open(output_summary, "w") as output:
for record in SeqIO.parse(handle, "fastq"):
seq_id = record.id
sequence = str(record.seq)
quality_scores = record.letter_annotations["phred_quality"]
counts = Counter(sequence)
frequencies = {base: counts[base] for base in 'ATCG'}
complementary_ratios = calculate_complementary_ratios(sequence)
mean_quality_score = sum(quality_scores) / len(quality_scores)
output.write(f"Sequence ID: {seq_id}\n")
output.write(f"Total Counts: {counts}\n")
output.write(f"Frequencies: {frequencies}\n")
output.write(f"Complementary Ratios: {complementary_ratios}\n")
output.write(f"Mean Quality Score: {mean_quality_score:.2f}\n\n")
if __name__ == "__main__":
input_fastq = "8_Swamp_S1B_MATK_2019_minq7.fastq"
output_summary = "fastq_summary.txt"
fastq_summary(input_fastq, output_summary)