Perl V Problem Set ==================== 1. Create a hash of your favorite 5 genes and their nucleotide sequences, using the "all at once" method, see below as well as the lecture notes. Make the genes the keys and the sequences the values. All at once: %hash = ( key1 => value1, key2 => value2, key3 => value3 ); #!/usr/bin/perl # Perl V Problem I # CSHL 2014 PFB #################### # Hashes ############## use warnings; use strict; ## Problem I # use parens, not curly brackets, when initializing a hash my %sequences = ( "Bden|BDEG_00353" => "MTIAGENYTPTEEDIIRIRSQTTRITETIVTVGKNTYHFYDVGGQIKYRKQWTPYFDTVHSIVFVVSLASFDQFLAEDPTINRMHDALDLFGQISDHPLLRHIPITLFLNKKDLFAQKFPTANIQQYFPDYQAKDIRKAMRYFENKFHNQNKVPGKEITCHFTCCTDTSAMGLIIMTVLEAMVKLQLKTSGMII", "Bden|BDEG_06990" => "MEDGSNEKPQKMGCGMSQEDQEMRRKNDEIDQALKKEKANLKNEVKMLLLDPVSFRDVCGGIGAGESGKSTILKQMTLIHGSGYSQQEKEAFKEIIFSNTAQSLRVILEAMQNMNIGLEIPANEQHRAIIFDLPNQIEAEDLPSEVTTAVKNLWTDGGVQTCFSRSREYQLNDSAKYYFESIDRIGAPGYMPTEQDVLRSRVKTTGITETTFRVGELTYRMFDVGGQRSERKKWIHCFENVTAIVFLVAISEYDQVLVEDETVNRMQEALTLFDSICNSRWFVQTSIILFLNKIDLFKEKLPRSPMAKYFPDFTAGENYEAACDYMLNRFVSLNQSEKKQIYTHFTCATDTSQIKFVMAAVNDIIIQNNLRDVGLM", "Bden|BDEG_00317" => "MKPEDQSIQSILAAQNRHSLEIDRQIEIDRLRRNKIKEEVHVLILGSGDSGKTTFLKQIKILYGGAYKETERKACRFQILKNIRESVMAIIAFLNFDGSMDDFSHLKKSIQVVSEHLKQQEPDGVCNISANCAVEIEMLWKDPKLQEYLKELDRTELQLQDTAGYFLSQASKYIPESYLPVNDDIVRIRSPTSQITESIYTIEGSVFHFFDVGGQLKHRKQWAPYFDHVSTIIFVVSLACYDQNLAEDATVNRMHDALELFEGVCNNPLLKKISVTLFLNKKDLFETKIATVPINNYFPDFLELNTTLKKGSKFFDRKFRYTLLDDETGSDHQRNVFTYVTCCTDTNTMEIIILTVMSSIMQKNLSSIGLQ", "Bden|BDEG_03796" => "MIQEKDDPRLLILGSGDSGKTTLLKQMRLFYGEGFTLEEIEAYRLLLLENVINCMSAYLALCDHLNLPIIHQHERLHYVSYSHAGRSYLPADLPPIISILWNEPSVQQVVLLGPNYHIQDTAPYFLSRVETIGKVGYKPTNQDILHVRAPTLAVSETIFKIGIHHYRLFDVGGQRGLRKQWAPFFDNCHSILFVTSIASYDQTLEEEREHSVNRLHDAIELFGGVINNRILQNSEVLLFLNKKDIFEEKLKHLPFSKFFPRYRGPNDPESIARYVAALFKAQRWDQNRKIMIHRTCCTDTR", "Bden|BDEG_02250" => "MGVYRHHSGNKHELKDSSSYPLADGVVPPIQVQPRGNSLVHQPPDVLLTQSVCDVEGYRDSWGQILDAKTISDEIDRMLVAERKAKRQAAEAPTMLILGSGDSGKTTFIKTLHICLGGGLTAQQKLQYHHQMLDNIVDSIKALLLASIALGYKVEPVQAKQIMLAFHRHGALGLSQQHIDYINQLWRHKGVQSCWKQSFKFKIQDTCDYFLDNIQTKAQFGYKITNQDALHVRHATTTISECVFEQNGITMRFIDVAGQRRFRKNWAAHFDNVDSTLFITSVSSYDQALEEDKTVNRMSDSIQLFSELASNIYLKNKALIVFLNKTDLLKKKLRYTKLQEYFPGYHGGNDPISVIDYFKKELGTKVRTRKDRVYMHATCCTSETAISFLVATTTDVLIKSRLDRHGLL" ); 2. Create a the same hash with your favorite 5 genes and their nucleotide sequences, using the "one at a time" method, see below as well as the lecture notes. Make the genes the keys and the sequences the values. One at a time: my %hash; $hash{key1} = value1; $hash{key2} = value2; $hash{key3} = value3; #!/usr/bin/perl # Perl V Problem II # CSHL 2014 PFB use warnings; use strict; my %sequences; $sequences{"Bden|BDEG_00353"} = "MTIAGENYTPTEEDIIRIRSQTTRITETIVTVGKNTYHFYDVGGQIKYRKQWTPYFDTVHSIVFVVSLASFDQFLAEDPTINRMHDALDLFGQISDHPLLRHIPITLFLNKKDLFAQKFPTANIQQYFPDYQAKDIRKAMRYFENKFHNQNKVPGKEITCHFTCCTDTSAMGLIIMTVLEAMVKLQLKTSGMII"; $sequences{"Bden|BDEG_06990"} = "MEDGSNEKPQKMGCGMSQEDQEMRRKNDEIDQALKKEKANLKNEVKMLLLDPVSFRDVCGGIGAGESGKSTILKQMTLIHGSGYSQQEKEAFKEIIFSNTAQSLRVILEAMQNMNIGLEIPANEQHRAIIFDLPNQIEAEDLPSEVTTAVKNLWTDGGVQTCFSRSREYQLNDSAKYYFESIDRIGAPGYMPTEQDVLRSRVKTTGITETTFRVGELTYRMFDVGGQRSERKKWIHCFENVTAIVFLVAISEYDQVLVEDETVNRMQEALTLFDSICNSRWFVQTSIILFLNKIDLFKEKLPRSPMAKYFPDFTAGENYEAACDYMLNRFVSLNQSEKKQIYTHFTCATDTSQIKFVMAAVNDIIIQNNLRDVGLM"; $sequences{"Bden|BDEG_00317"} => "MKPEDQSIQSILAAQNRHSLEIDRQIEIDRLRRNKIKEEVHVLILGSGDSGKTTFLKQIKILYGGAYKETERKACRFQILKNIRESVMAIIAFLNFDGSMDDFSHLKKSIQVVSEHLKQQEPDGVCNISANCAVEIEMLWKDPKLQEYLKELDRTELQLQDTAGYFLSQASKYIPESYLPVNDDIVRIRSPTSQITESIYTIEGSVFHFFDVGGQLKHRKQWAPYFDHVSTIIFVVSLACYDQNLAEDATVNRMHDALELFEGVCNNPLLKKISVTLFLNKKDLFETKIATVPINNYFPDFLELNTTLKKGSKFFDRKFRYTLLDDETGSDHQRNVFTYVTCCTDTNTMEIIILTVMSSIMQKNLSSIGLQ"; $sequences{"Bden|BDEG_03796"} => "MIQEKDDPRLLILGSGDSGKTTLLKQMRLFYGEGFTLEEIEAYRLLLLENVINCMSAYLALCDHLNLPIIHQHERLHYVSYSHAGRSYLPADLPPIISILWNEPSVQQVVLLGPNYHIQDTAPYFLSRVETIGKVGYKPTNQDILHVRAPTLAVSETIFKIGIHHYRLFDVGGQRGLRKQWAPFFDNCHSILFVTSIASYDQTLEEEREHSVNRLHDAIELFGGVINNRILQNSEVLLFLNKKDIFEEKLKHLPFSKFFPRYRGPNDPESIARYVAALFKAQRWDQNRKIMIHRTCCTDTR"; $sequences{"Bden|BDEG_02250"} => "MGVYRHHSGNKHELKDSSSYPLADGVVPPIQVQPRGNSLVHQPPDVLLTQSVCDVEGYRDSWGQILDAKTISDEIDRMLVAERKAKRQAAEAPTMLILGSGDSGKTTFIKTLHICLGGGLTAQQKLQYHHQMLDNIVDSIKALLLASIALGYKVEPVQAKQIMLAFHRHGALGLSQQHIDYINQLWRHKGVQSCWKQSFKFKIQDTCDYFLDNIQTKAQFGYKITNQDALHVRHATTTISECVFEQNGITMRFIDVAGQRRFRKNWAAHFDNVDSTLFITSVSSYDQALEEDKTVNRMSDSIQLFSELASNIYLKNKALIVFLNKTDLLKKKLRYTKLQEYFPGYHGGNDPISVIDYFKKELGTKVRTRKDRVYMHATCCTSETAISFLVATTTDVLIKSRLDRHGLL"; 3. With a foreach loop, iterate through each key (gene name) of your hash. - Print each key and value in this format "gene:seq\n"; - Are the genes printing in the order you added them to the hash? Why? #/usr/bin/perl #Perl V Problem III use strict; use warnings; my %sequences = ( "Bden|BDEG_00353" => "MTIAGENYTPTEEDIIRIRSQTTRITETIVTVGKNTYHFYDVGGQIKYRKQWTPYFDTVHSIVFVVSLASFDQFLAEDPTINRMHDALDLFGQISDHPLLRHIPITLFLNKKDLFAQKFPTANIQQYFPDYQAKDIRKAMRYFENKFHNQNKVPGKEITCHFTCCTDTSAMGLIIMTVLEAMVKLQLKTSGMII", "Bden|BDEG_06990" => "MEDGSNEKPQKMGCGMSQEDQEMRRKNDEIDQALKKEKANLKNEVKMLLLDPVSFRDVCGGIGAGESGKSTILKQMTLIHGSGYSQQEKEAFKEIIFSNTAQSLRVILEAMQNMNIGLEIPANEQHRAIIFDLPNQIEAEDLPSEVTTAVKNLWTDGGVQTCFSRSREYQLNDSAKYYFESIDRIGAPGYMPTEQDVLRSRVKTTGITETTFRVGELTYRMFDVGGQRSERKKWIHCFENVTAIVFLVAISEYDQVLVEDETVNRMQEALTLFDSICNSRWFVQTSIILFLNKIDLFKEKLPRSPMAKYFPDFTAGENYEAACDYMLNRFVSLNQSEKKQIYTHFTCATDTSQIKFVMAAVNDIIIQNNLRDVGLM", "Bden|BDEG_00317" => "MKPEDQSIQSILAAQNRHSLEIDRQIEIDRLRRNKIKEEVHVLILGSGDSGKTTFLKQIKILYGGAYKETERKACRFQILKNIRESVMAIIAFLNFDGSMDDFSHLKKSIQVVSEHLKQQEPDGVCNISANCAVEIEMLWKDPKLQEYLKELDRTELQLQDTAGYFLSQASKYIPESYLPVNDDIVRIRSPTSQITESIYTIEGSVFHFFDVGGQLKHRKQWAPYFDHVSTIIFVVSLACYDQNLAEDATVNRMHDALELFEGVCNNPLLKKISVTLFLNKKDLFETKIATVPINNYFPDFLELNTTLKKGSKFFDRKFRYTLLDDETGSDHQRNVFTYVTCCTDTNTMEIIILTVMSSIMQKNLSSIGLQ", "Bden|BDEG_03796" => "MIQEKDDPRLLILGSGDSGKTTLLKQMRLFYGEGFTLEEIEAYRLLLLENVINCMSAYLALCDHLNLPIIHQHERLHYVSYSHAGRSYLPADLPPIISILWNEPSVQQVVLLGPNYHIQDTAPYFLSRVETIGKVGYKPTNQDILHVRAPTLAVSETIFKIGIHHYRLFDVGGQRGLRKQWAPFFDNCHSILFVTSIASYDQTLEEEREHSVNRLHDAIELFGGVINNRILQNSEVLLFLNKKDIFEEKLKHLPFSKFFPRYRGPNDPESIARYVAALFKAQRWDQNRKIMIHRTCCTDTR", "Bden|BDEG_02250" => "MGVYRHHSGNKHELKDSSSYPLADGVVPPIQVQPRGNSLVHQPPDVLLTQSVCDVEGYRDSWGQILDAKTISDEIDRMLVAERKAKRQAAEAPTMLILGSGDSGKTTFIKTLHICLGGGLTAQQKLQYHHQMLDNIVDSIKALLLASIALGYKVEPVQAKQIMLAFHRHGALGLSQQHIDYINQLWRHKGVQSCWKQSFKFKIQDTCDYFLDNIQTKAQFGYKITNQDALHVRHATTTISECVFEQNGITMRFIDVAGQRRFRKNWAAHFDNVDSTLFITSVSSYDQALEEDKTVNRMSDSIQLFSELASNIYLKNKALIVFLNKTDLLKKKLRYTKLQEYFPGYHGGNDPISVIDYFKKELGTKVRTRKDRVYMHATCCTSETAISFLVATTTDVLIKSRLDRHGLL" ); foreach my $key (keys %sequences) { print "$key: $sequences{$key}\n" } ##No, hashes are unordered 4. After printing the hash in the above question, reassign one of the sequences to be AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA. Print out the hash again. #/usr/bin/perl #Perl V Problem IV use strict; use warnings; my %sequences = ( "Bden|BDEG_00353" => "MTIAGENYTPTEEDIIRIRSQTTRITETIVTVGKNTYHFYDVGGQIKYRKQWTPYFDTVHSIVFVVSLASFDQFLAEDPTINRMHDALDLFGQISDHPLLRHIPITLFLNKKDLFAQKFPTANIQQYFPDYQAKDIRKAMRYFENKFHNQNKVPGKEITCHFTCCTDTSAMGLIIMTVLEAMVKLQLKTSGMII", "Bden|BDEG_06990" => "MEDGSNEKPQKMGCGMSQEDQEMRRKNDEIDQALKKEKANLKNEVKMLLLDPVSFRDVCGGIGAGESGKSTILKQMTLIHGSGYSQQEKEAFKEIIFSNTAQSLRVILEAMQNMNIGLEIPANEQHRAIIFDLPNQIEAEDLPSEVTTAVKNLWTDGGVQTCFSRSREYQLNDSAKYYFESIDRIGAPGYMPTEQDVLRSRVKTTGITETTFRVGELTYRMFDVGGQRSERKKWIHCFENVTAIVFLVAISEYDQVLVEDETVNRMQEALTLFDSICNSRWFVQTSIILFLNKIDLFKEKLPRSPMAKYFPDFTAGENYEAACDYMLNRFVSLNQSEKKQIYTHFTCATDTSQIKFVMAAVNDIIIQNNLRDVGLM", "Bden|BDEG_00317" => "MKPEDQSIQSILAAQNRHSLEIDRQIEIDRLRRNKIKEEVHVLILGSGDSGKTTFLKQIKILYGGAYKETERKACRFQILKNIRESVMAIIAFLNFDGSMDDFSHLKKSIQVVSEHLKQQEPDGVCNISANCAVEIEMLWKDPKLQEYLKELDRTELQLQDTAGYFLSQASKYIPESYLPVNDDIVRIRSPTSQITESIYTIEGSVFHFFDVGGQLKHRKQWAPYFDHVSTIIFVVSLACYDQNLAEDATVNRMHDALELFEGVCNNPLLKKISVTLFLNKKDLFETKIATVPINNYFPDFLELNTTLKKGSKFFDRKFRYTLLDDETGSDHQRNVFTYVTCCTDTNTMEIIILTVMSSIMQKNLSSIGLQ", "Bden|BDEG_03796" => "MIQEKDDPRLLILGSGDSGKTTLLKQMRLFYGEGFTLEEIEAYRLLLLENVINCMSAYLALCDHLNLPIIHQHERLHYVSYSHAGRSYLPADLPPIISILWNEPSVQQVVLLGPNYHIQDTAPYFLSRVETIGKVGYKPTNQDILHVRAPTLAVSETIFKIGIHHYRLFDVGGQRGLRKQWAPFFDNCHSILFVTSIASYDQTLEEEREHSVNRLHDAIELFGGVINNRILQNSEVLLFLNKKDIFEEKLKHLPFSKFFPRYRGPNDPESIARYVAALFKAQRWDQNRKIMIHRTCCTDTR", "Bden|BDEG_02250" => "MGVYRHHSGNKHELKDSSSYPLADGVVPPIQVQPRGNSLVHQPPDVLLTQSVCDVEGYRDSWGQILDAKTISDEIDRMLVAERKAKRQAAEAPTMLILGSGDSGKTTFIKTLHICLGGGLTAQQKLQYHHQMLDNIVDSIKALLLASIALGYKVEPVQAKQIMLAFHRHGALGLSQQHIDYINQLWRHKGVQSCWKQSFKFKIQDTCDYFLDNIQTKAQFGYKITNQDALHVRHATTTISECVFEQNGITMRFIDVAGQRRFRKNWAAHFDNVDSTLFITSVSSYDQALEEDKTVNRMSDSIQLFSELASNIYLKNKALIVFLNKTDLLKKKLRYTKLQEYFPGYHGGNDPISVIDYFKKELGTKVRTRKDRVYMHATCCTSETAISFLVATTTDVLIKSRLDRHGLL" ); $sequences{"Bden|BDEG_02250"} = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; foreach my $key (keys %sequences) { print "$key: $sequences{$key}\n" } 5. Use the included file "Perl_V.genesAndSeq.txt". Open file and build hash with the first column for your key and the second for your value. 6. With a foreach loop, iterate through each key (gene name) of your hash. - Calculate the length of each seq - Print each key and value in this format "key:value\n"; (gene:seq_len) #!/usr/bin/perl # Perl V Problems V & VI # CSHL 2014 PFB #################### # Hashes from a file ####################### use warnings; use strict; ## Problem III my $infile = shift @ARGV; my %sequences; # Empty hash, will store fasta sequences from file #my $c = 0; # seq counter #my $seq = ''; #my $k; open(IN,'<',$infile) or die "Can't open $infile: $!\n"; while(my $line = ) { chomp $line; my ($key,$value) = split(/\t/,$line); $sequences{$key} = $value; } close(IN); ## Problem IV # Check the contents of the hash foreach my $key (keys %sequences) { my $len = length ($sequences{$key}); print "$key:$len\n"; } 7. Create your hash with gene names and sequences from a FASTA file (Perl_V.fasta). You will have to parse out the gene names and sequences from the file. 8. With a foreach loop, iterate through each gene name (key) in your hash. - Print each key/value pair. "gene:seq\n" - Now, add a sort (default) of the keys and print each value. - Now, sort (numeric, smallest to biggest) by the length of the sequence, and print each key/value pair as well as the length of the sequence. - Now, sort biggest to smallest. Print each key/value pair as well the length of the sequence #!/usr/bin/perl # Perl V Problems VII & VIII # CSHL 2014 PFB #################### # Hashes from a file ####################### use warnings; use strict; ## Problem V my $fasta = shift @ARGV; my %sequences; # Empty hash, will store fasta sequences from file my $c = 0; # seq counter my $seq = ''; my $k; open(FAS,'<',$fasta) or die "Can't open $fasta: $!\n"; while(my $line = ) { chomp $line; if ($line =~ m/^>/) { $c++; if($c > 1) { $sequences{$k} = $seq; $seq = ''; } $k = $line; } else { $seq .= $line; } } $sequences{$k} = $seq; close(FAS); ## Problem VI # Check the contents of the hash foreach my $key (keys %sequences) { print "$key:$sequences{$key}\n"; } print "\n\n\n\n"; # Sort first foreach my $key (sort keys %sequences) { print "$key:$sequences{$key}\n"; } print "\n\n\n\n"; # Sort numeric, small to large by value foreach my $key (sort { length($sequences{$a}) <=> length($sequences{$b})} keys %sequences) { print "$key:$sequences{$key}\t",length($sequences{$key}),"\n"; } print "\n\n\n\n"; # Sort numeric, large to small by value foreach my $key (sort {length($sequences{$b}) <=> length($sequences{$a}) }keys %sequences) { print "$key:$sequences{$key}\t",length($sequences{$key}),"\n"; } 9. In a new script, determine and report the codon usage for a DNA sequence - Create a string containing a DNA sequence - Within a for loop - use the function substr ($seq, $offset, 3) to extract codons - store each codon and the number of times it has occurred in a hash #!/usr/bin/perl #PerlV Prob IX use warnings; use strict; my $seq = 'CGTGACCGCCATGGATGTGGTGTACGCTCTGAAGAGGCAGGGCCGCACTCTGTACGGCTTCGGCGGTTAA'; my %codons; for (my $offset = 0; $offset < length($seq); $offset+=3){ my $codon = substr($seq, $offset, 3); $codons{$codon}++; } foreach my $key (sort keys %codons){ print "$key\t$codons{$key}\n" }