Bioinformatics 生物信息学理论和实践 唐继军 jtang@cse.sc.edu 13928761660. #!/usr/bin/perl \$DNA = 'ACGT'; # Next, we print the DNA onto the screen print \$DNA, "\n"; print '\$DNA\n'; print "\$DNA\n"; exit;. Do the Math (your 2nd Perl program). #!/usr/bin/perl print " 4+5\n " ; print 4+5 , " \n " ;

Bioinformatics生物信息学理论和实践唐继军jtang@cse.sc.edu13928761660

\$DNA = 'ACGT';

# Next, we print the DNA onto the screen

print \$DNA, "\n";

print '\$DNA\n';

print "\$DNA\n";

exit;

Do the Math (your 2nd Perl program)

#!/usr/bin/perl

print "4+5\n";

print 4+5 , "\n";

print "4+5=" , 4+5 , "\n";

[Note: use commas to separate multiple items in a print statement, whitespace is ignored]

• Strings (text) in variables can be used for some math-like operations

• Concatenate (join) use the dot . operator

\$seq1= "ACTG";

\$seq2= "GGCTA";

\$seq3= \$seq1 . \$seq2;

print \$seq3;

ACTGGGCTA

\$DNA = 'ACGGGAGGACGGGAAAATTACTACGGCATTAGC';

print "Here is the starting DNA:\n\n";

print "\$DNA\n\n";

# Transcribe the DNA to RNA by substituting all T's with U's.

\$RNA = \$DNA;

\$RNA =~ s/T/U/g;

# Print the RNA onto the screen

print "Here is the result of transcribing the DNA to RNA:\n\n";

print "\$RNA\n";

# Exit the program.

exit;

\$DNA = 'ACGGGAGGACGGGAAAATTACTACGGCATTAGC';

print "\$DNA\n\n";

\$revcom = reverse \$DNA;

# See the text for a discussion of tr///

\$revcom =~ tr/ACGTacgt/TGCAtgca/;

# Print the reverse complement DNA onto the screen

print "Here is the reverse complement DNA:\n\n";

print "\$revcom\n";

exit;

\$proteinfilename = 'NM_021964fragment.pep';

open(PROTEINFILE, \$proteinfilename);

# First line

\$protein = <PROTEINFILE>;

print “\nHere is the first line of the protein file:\n\n”;

print \$protein;

# Second line

\$protein = <PROTEINFILE>;

print “\nHere is the second line of the protein file:\n\n”;

print \$protein;

# Third line

\$protein = <PROTEINFILE>;

print “\nHere is the third line of the protein file:\n\n”;

print \$protein;

close PROTEINFILE;

exit;

# The filename of the file containing the protein sequence data

\$proteinfilename = 'NM_021964fragment.pep';

# First we have to "open" the file

open(PROTEINFILE, \$proteinfilename);

# Read the protein sequence data from the file, and store it

# into the array variable @protein

@protein = <PROTEINFILE>;

# Print the protein onto the screen

print @protein;

# Close the file.

close PROTEINFILE;

exit;

# "scalar context" and "list context"

@bases = ('A', 'C', 'G', 'T');

print "@bases\n";

\$a = @bases;

print \$a, "\n";

(\$a) = @bases;

print \$a, "\n";

exit;

# array indexing

@bases = ('A', 'C', 'G', 'T');

print "@bases\n";

print \$bases[0], "\n";

print \$bases[1], "\n";

print \$bases[2], "\n";

print \$bases[3], "\n";

exit;

• Chomp

• Length of a string

• Substring

\$proteinfilename = 'NM_021964fragment.pep';

unless ( open(PROTEINFILE, \$proteinfilename) ) {

print "Could not open file \$proteinfilename!\n";

exit;

}

while( \$protein = <PROTEINFILE> ) {

print " #####Here is the next line of the file:\n";

print \$protein;

}

# Close the file.

close PROTEINFILE;

exit;

• String comparison (are they the same, > or <)

• eq (equal )

• ne(not equal )

• ge(greater or equal )

• gt (greater than )

• lt(less than )

• le(less or equal )

• if () {}

• elsif() {}

• else {}

\$word = 'MNIDDKL';

if(\$word eq 'QSTVSGE') {

print "QSTVSGE\n";

}

elsif(\$word eq 'MRQQDMISHDEL') {

print "MRQQDMISHDEL\n";

}

elsif ( \$word eq 'MNIDDKL' ) {

print "MNIDDKL-the magic word!\n";

}

else {

print "Is \”\$word\“ a peptide?\n";

}

exit;

\$y = -20;

if (\$x <= 10) { print "1st true\n";}

if (\$x > 10) {print "2nd true\n";}

if (\$x <= 10 || \$y > -21) {print "3rd true\n";}

if (\$x > 5 && \$y < 0) {print "4th true\n";}

if ((\$x > 5 && \$y < 0) || \$y > 5) {print "5th true\n";}

• Use ==, <, <=, >, >=, !=, ||, && for numeric numbers

• Use eq, lt, le, gt, ge, ne, or, and for string comparisons

\$y = -20;

if (\$x le 10) { print "1st true\n";}

if (\$x gt 5) {print "2nd true\n";}

if (\$x le 10 || \$y gt -21) {print "3rd true\n";}

if (\$x gt 5 && \$y lt 0) {print "4th true\n";}

if ((\$x gt 5 && \$y lt 0) || \$y gt 5) {print "5th true\n";}

\$num = 1234;

\$str = '1234';

print \$num, " ", \$str, "\n";

\$num_or_str = \$num + \$str;

print \$num_or_str, "\n";

\$num_or_str = \$num . \$str;

print \$num_or_str, "\n";

exit;

• +, -, *, **, /, %

• +=, -=, *=, **=, /=, %=

• ++, --

\$x = \$x*1.5;

print \$x*=3, "\n";

print \$x++, "\n";

print \$x, "\n";

print ++\$x, "\n";

print \$x, "\n";

print \$x % 3, "\n";

print \$x**2, "\n";

• Read a sequence from a fasta file

• Ask the user to input a motif

• Check if the sequence has the motif

print "Please type the filename: ";

\$fname = <STDIN>;

chomp \$fname;

open(PROTEINFILE, \$fname);

\$name = <PROTEINFILE>;

@protein = <PROTEINFILE>;

close PROTEINFILE;

\$protein = join( '', @protein);

\$protein =~ s/\s//g;

print "Enter a motif to search for: ";

\$motif = <STDIN>;

chomp \$motif;

if ( \$protein =~ /\$motif/ ) {

print "I found it!\n\n";

}

else {

print "I couldn\‘t find it.\n\n";

}

print "Please type the filename of the DNA sequence data: ";

\$dna_filename = <STDIN>;

chomp \$dna_filename;

open(DNAFILE, \$dna_filename);

\$name = <DNAFILE>;

@DNA = <DNAFILE>;

close DNAFILE;

\$DNA = join('', @DNA);

\$DNA =~ s/\s//g;

\$count_of_CG = 0;

\$position = 0;

while ( \$position < length \$DNA) {

\$base = substr(\$DNA, \$position, 1);

if ( \$base eq 'C' or \$base eq 'G') {

++\$count_of_CG;

}

\$position++;

}

print "CG content is ", \$count_of_CG/(length \$DNA)*100, "%\n";

print "Please type the filename of the DNA sequence data: ";

\$dna_filename = <STDIN>;

chomp \$dna_filename;

open(DNAFILE, \$dna_filename);

\$name = <DNAFILE>;

@DNA = <DNAFILE>;

close DNAFILE;

\$DNA = join('', @DNA);

\$DNA =~ s/\s//g;

\$count_of_CG = 0;

for ( \$position = 0 ; \$position < length \$DNA ; ++\$position ) {

\$base = substr(\$DNA, \$position, 1);

if ( \$base eq 'C' or \$base eq 'G') {

++\$count_of_CG;

}

}

print "CG content is ", \$count_of_CG/(length \$DNA)*100, "%\n";

print "Please type the filename of the DNA sequence data: ";

\$dna_filename = <STDIN>;

chomp \$dna_filename;

open(DNAFILE, \$dna_filename);

\$name = <DNAFILE>;

@DNA = <DNAFILE>;

close DNAFILE;

\$DNA = join('', @DNA);

\$DNA =~ s/\s//g;

\$count_of_CG = 0;

while(\$DNA =~ /c/ig) {\$count_of_CG++;}

while(\$DNA =~ /g/ig) {\$count_of_CG++;}

print "CG content is ", \$count_of_CG/(length \$DNA)*100, "%\n";

print "Please type the filename of the DNA sequence data: ";

\$dna_filename = <STDIN>;

chomp \$dna_filename;

open(DNAFILE, \$dna_filename);

\$name = <DNAFILE>;

@DNA = <DNAFILE>;

close DNAFILE;

\$DNA = join('', @DNA);

\$DNA =~ s/\s//g;

\$count_of_CG = 0;

while(\$DNA =~ /c/i) {\$count_of_CG++;}

while(\$DNA =~ /g/ig) {\$count_of_CG++;}

print "CG content is ", \$count_of_CG/(length \$DNA)*100, "%\n";

• Ask for a protein file in fasta format

• Ask for an amino acid

• Count the frequency of that amino acid

• TKFHSNAHFYDCWRMLQYQLDMRCMRAISTFSPHCGMEHMPDQTHNQGEMCKPRMWQVSMNQSCNHTPPFRKTYVEWDYMAKALIAPYTLGWLASTCFIW

• Ask for a DNA file in fasta format