Perl Master Program - Printable Version

Perl Master Program - Printable Version

+- Biotechnology Forums (https://www.biotechnologyforums.com)
+-- Forum: Biotechnology Discussion (https://www.biotechnologyforums.com/forum-1.html)
+--- Forum: Bioinformatics (https://www.biotechnologyforums.com/forum-5.html)
+--- Thread: Perl Master Program (/thread-7908.html)

Perl Master Program - binu - 04-24-2017

#!/usr/bin/perl -w
# Perl programs to find reverse complement, to perform transcription, to convert the DNA to binary form,
# To count the nucleotides, Mutate the DNA, Find the percent identity.
use strict;
my ($file,$line,$dna,$revcom,$rna,$binary,$pos,$newbase,$i,$mutant,$position);
my ($a,$g,$t,$c,$basecount,$nonbase,$percent_ag,$percent_identity,$count) = 0 ;
my @nucs = ('A', 'C', 'G', 'T');

# Ask the user for the fasta filename
print "Enter the fasta filename: ";
$file = <STDIN>;

# Remove the newline from the fasta filename
chomp $file;
# Open the file or exit
open (FILE,$file) or die "Cannot open file : $file \n\n";

# Each line in the file is operated only once by the while loop
# If there are 10 lines in the file then while loop iterates 10 times
# next command tells the while loop to go to the next line
while ($line = <FILE>)
{
# Discard blank lines
if ($line =~ /^\s*$/)
{ next; }

# Discard comment line
elsif ($line=~/^#/)
{ next; }

# Discard fasta header line
elsif ($line=~/^>/)
{ next; }

# Keep concatenating to $dna string
else
{ $dna .= $line; }
}

# Remove whitespaces
$dna =~ s/\s//g;
close FILE;

# Print the extracted nucleotide sequence
print "\nHere is the original DNA : $dna \n\n";

# To find the reverse complement
$revcom = reverse($dna);
# Transliterate operator converts each character individually
$revcom =~ tr/ACGTacgt/TGCAtgca/ ;
print "The reverse complement is : $revcom \n\n";

# To perform transcription
$rna = $dna;
# Substitute operator is used
$rna =~ s/T/U/g;
print "The transcribed RNA is : $rna \n\n";

# To convert the DNA to binary form
$binary = $dna;
#Purine(A,G)--> 1, Pyrimidine(T,C)--> 0
$binary =~ tr/ATGCatgc/10101010/ ;
print "The converted binary format is : $binary \n\n" ;

# To count the nucleotides
$a = ($dna =~ tr/Aa// ) ;
$t = ($dna =~ tr/Tt// ) ;
$g = ($dna =~ tr/Gg// ) ;
$c = ($dna =~ tr/Cc// ) ;
$basecount = ($dna =~ tr/ATGCatgc// ) ;
$nonbase = length ($dna) - $basecount ;
$percent_ag = (($a+$g)/($basecount)*100);
print "Nucleotide Count : \n" ;
print " A=$a \n T=$t \n G=$g \n C=$c \n Total Bases=$basecount \n Errors=$nonbase \n Purine Percentage=$percent_ag \n\n";

# Mutate the DNA 100 times
$mutant = $dna;
for ($i=1 ; $i <= 100 ; $i++)
{
# $pos stores an integer random number between 0 and [(length of $mutant)-1]
$pos = int rand length $mutant;
do
# To select at random one of the four nucleotides.
# [rand @nucs] returns a random integer (0,1,2,3) as there are 4 nucleotides.
{ $newbase = $nucs[rand @nucs];
}
# Make sure it's different from the nucleotide we're mutating. 'ne' stands for not equal to
until ($newbase ne substr($mutant,$pos,1));
# Insert the random nucleotide '$newbase' into the random position '$pos' in the mutant
# In the string '$mutant' at position '$pos' change 1 character to the string in '$newbase'
substr($mutant,$pos,1,$newbase);
}
print "The mutant DNA is : $mutant \n\n";

# Finding the percent identity between input DNA & mutant DNA
for ($position=0; $position < length($mutant) ; $position++)
{
# Match the input DNA with mutant DNA from first position
if(substr($dna,$position,1) eq substr($mutant,$position,1))
{ $count++; }
}
$percent_identity = ($count / (length($mutant)))*100;
print "There are $count identical nucleotides \n";
print "Percent Identity between input DNA & mutant DNA is : $percent_identity \n\n";