PFB2013PFB2013 | Programming for Biology @ CSHL Programming for Biology @ CSHL Fri, 01 Nov 2013 18:09:32 +0000 en-US hourly 1 http://wordpress.org/?v=3.6.1 BioPerl Lecture Scripts bioperllecturescripts/ bioperllecturescripts/#comments Tue, 29 Oct 2013 14:26:35 +0000 Sofia Robb ?p=1266
  • inFasta_doStuff_outFasta.pl
  • inFasta_loop.pl
  • convert_genbank2fasta.pl
  • createSeqOnFly.pl
  • inFasta_outGenBank.pl
  • local_seq_query.pl
  • multi_align_convert.pl
  • getSeq_genbank.pl
  • get_annot_from_genbank.pl
  • blast_parser_intro.pl
  • sample_blast_parser.pl
  • sample_blast_parser_1.pl
  • sample_blast_parser_2.pl
  • ]]>
    bioperllecturescripts/feed/ 0
    Gbrowse Key gbrowse-key/ gbrowse-key/#comments Wed, 23 Oct 2013 18:06:08 +0000 Sofia Robb ?p=1091 gbrowse_key]]> gbrowse-key/feed/ 0 AWS logins for MAKER 10/20 aws-logins-for-maker-1020/ aws-logins-for-maker-1020/#comments Sun, 20 Oct 2013 18:24:52 +0000 Sofia Robb ?p=883
    Name
    User name
    ssh link
    Samuel Adeogunsadeogunssh ubuntu@ec2-54-242-97-202.compute-1.amazonaws.com Najim Amezianenamezianessh ubuntu@ec2-54-226-209-193.compute-1.amazonaws.com Ian Blabyiblabyssh ubuntu@ec2-23-23-21-109.compute-1.amazonaws.com John Boylejboylessh ubuntu@ec2-23-20-103-199.compute-1.amazonaws.com Nicolas Chevriernchevrierssh ubuntu@ec2-204-236-203-229.compute-1.amazonaws.com Victoria Clarkvclarkssh ubuntu@ec2-50-16-101-128.compute-1.amazonaws.com Ana Dugganadugganssh ubuntu@ec2-50-16-170-67.compute-1.amazonaws.com Karl Frankekfrankessh ubuntu@ec2-23-20-107-73.compute-1.amazonaws.com Sean Gallahersgallaherssh ubuntu@ec2-54-211-134-1.compute-1.amazonaws.com Kira Glover-Cutterkgloverssh ubuntu@ec2-54-234-175-158.compute-1.amazonaws.com Johanna Goldmannjgoldmannssh ubuntu@ec2-23-22-164-129.compute-1.amazonaws.com Longhua Guolguossh ubuntu@ec2-54-227-51-35.compute-1.amazonaws.com Kyra Joneskjonesssh ubuntu@ec2-54-221-134-14.compute-1.amazonaws.com Michael Kantarmkantarssh ubuntu@ec2-174-129-105-33.compute-1.amazonaws.com Sarah Keaseyskeaseyssh ubuntu@ec2-107-22-41-168.compute-1.amazonaws.com Dan Landaudlandaussh ubuntu@ec2-54-221-92-74.compute-1.amazonaws.com David MacHughdmachughssh ubuntu@ec2-54-226-37-83.compute-1.amazonaws.com Colin Malonecmalonessh ubuntu@ec2-23-22-117-239.compute-1.amazonaws.com Shana McDevittsmcdevittssh ubuntu@ec2-23-22-184-96.compute-1.amazonaws.com Anirban Paulapaulssh ubuntu@ec2-54-243-7-119.compute-1.amazonaws.com Sandra Rehansrehanssh ubuntu@ec2-174-129-87-144.compute-1.amazonaws.com Nicole Ruiznruizssh ubuntu@ec2-23-20-27-197.compute-1.amazonaws.com Jill Turnerjturnerssh ubuntu@ec2-54-224-24-32.compute-1.amazonaws.com Hsiao-Lin Wanghwangssh ubuntu@ec2-184-72-155-101.compute-1.amazonaws.com ]]>
    aws-logins-for-maker-1020/feed/ 0
    BioPerl Lecture Scripts bioperl-lecture-scripts/ bioperl-lecture-scripts/#comments Sun, 20 Oct 2013 14:49:25 +0000 Sofia Robb ?p=855
  • inFasta_doStuff_outFasta.pl
  • inFasta_loop.pl
  • convert_genbank2fasta.pl
  • createSeqOnFly.pl
  • inFasta_outGenBank.pl
  • local_seq_query.pl
  • multi_align_convert.pl
  • getSeq_genbank.pl
  • get_annot_from_genbank.pl
  • blast_parser_intro.pl
  • sample_blast_parser.pl
  • sample_blast_parser_1.pl
  • sample_blast_parser_2.pl
  • ]]>
    bioperl-lecture-scripts/feed/ 0
    Lecture Code: Perl V: Hashes lecture-code-perl-v-hashes/ lecture-code-perl-v-hashes/#comments Wed, 16 Oct 2013 15:06:08 +0000 Sofia Robb ?p=601 read more)]]>
    #!/usr/bin/perl
    use warnings;
    use strict;
    use Data::Dumper;
     
    ## create a hash all at once
    my %genetic_code = (
     "ATG" => "Met",
     "AAA" => "Lys",
     "CCA" => "Pro",
    );
     
    ## print a singe key/value pair
    print "Before/ATG: " , $genetic_code{"ATG"} ,"\n";
     
    ##print out the entire hash one key/value pair at a time
    foreach my $codon (keys %genetic_code){
       ## get, or retrieve, a value using a key
       my $aa = $genetic_code{$codon};
       print "$codon translates to $aa\n";
    }
     
    ## overwrite or reset a single key's value
    $genetic_code{"ATG"} = "start_codon";
     
    ## print a single/key value pair after reset
    print "After/ATG: " , $genetic_code{"ATG"} ,"\n";
     
    ## print the entire hash after reset
    foreach my $codon (keys %genetic_code){
       my $aa = $genetic_code{$codon};
       print "$codon translates to $aa\n";
    }
     
    ## what are other ways to print out an entire hash?
    ## in and outside of quotes is useless:
    print "---other ways to print out an entire hash---\n";
    print "Printing the hash within quotes does not work: %genetic_code\n";
    print "Printing outside of quotes is swished and pretty useless: ", %genetic_code,"\n";
     
    ## Data::Dumper is useful for debugging
    print "Print hash using the module Data::Dumper:\n";
    ## make sure to use the '\' before '%'. can also use to print arrays: \@array
    print Dumper \%genetic_code , "\n";
    tab_parser.pl
    #!/usr/bin/perl
    use strict;
    use warnings;
     
    my $file = shift @ARGV;
    open (INFILE, '<', $file)
    or die "can't open file $file $!\n";
    my %hash;
    while (my $line = <INFILE>){
      chomp $line;
     
      ## this works!!
      ## split on tabs and store the value of the first column in a single line to $key.
      ## this is inside of a while, so this will repeat for every line. 
      ## this will ignore every column past the second column
      #my ($key, $value) = split /\t/, $line;
      #$hash{$key} = $value;
     
      ## this works!!
      ## 
      #my @array = split /\t/, $line;
      #print join ('--',@array), "\n";
      ## use the 0th element as the key and the 2nd element as the value
      #$hash{$array[0]} = $array[2];
     
       ## this works!!
       ## this assigns the first three columns to variables
       ## uses the $animal as the key and $food as the value. $count is not used here
       my ($animal,$count,$food) = split /\t/, $line;
     
       $hash{$animal} = $coat;
    }
     
    ## print out the entire hash one key/value pair at a time
    foreach my $key (sort keys %hash){
            my $value = $hash{$key};
            print "key:$key value:$value\n";
    }
    tab.2col.txt: tab-delimited file with 2 columns
    geneA	ATGC
    geneB	GCTA
    geneC	AACT
    tab.3col.txt: tab-delimited file with 3 columns
    cat	2	cat food
    dog	4	cats
    ferret	10	cat food
    bird	15	seeds
    ]]>
    lecture-code-perl-v-hashes/feed/ 0
    Review Session Scripts Perl II & III review-session-scripts-perl-ii-iii/ review-session-scripts-perl-ii-iii/#comments Wed, 16 Oct 2013 01:18:27 +0000 Sofia Robb ?p=558 read more)]]>
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    
    #!/usr/bin/perl                                                                                      
    #Perl_3-5.pl                                                                                       
    #finds occurrences of Nobody and somebody within text using index                                                                       
     
    use strict;
    use warnings;
     
    my $nobody_infile = 'perl_III_nobody.txt';
    my $nobody_outfile = 'Nobody.txt';
    my $somebody_outfile = 'Somebody.txt';
     
    open (IN, '<', $nobody_infile) or die "Cannot open infile: $!\n";
    open (OUT1, '>', $nobody_outfile) or die "Cannot open outfile1: $!\n";
    open (OUT2, '>', $somebody_outfile) or die "Cannot write to outfile2: $!\n";
     
     
    while (my $line = <IN>){
      my $substr1 = 'Nobody';
      my $substr2 = 'somebody';
     
      my $position_nobody = index ($line, $substr1);
      my $position_somebody = index ($line, $substr2);
      if ($position_somebody > -1){
        warn ("somebody is here");
      }
     
     
    print OUT1 $position_nobody,"\n";
    print OUT2 $position_somebody,"\n";
     
    }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    
     
     
    #!/usr/bin/perl -w                                                                                           
    #Perl_3-3.pl
    #Find reverse complement sequence
     
    use strict;
     
    my $infile = 'perl_III.fasta';
    my $outfile = 'fasta.out';
     
    open IN, "<", $infile or die "Cannot read infile: $!\n";
    open OUT, '>', $outfile or die "Cannot write to outfile $!\n";
     
     
    while (my $line = <IN>){
      chomp $line;
      my $header = $line;
      my $sequence = <IN>;
      chomp $sequence;
      my $reverse_sequence = reverse ($sequence);
      my $complement = $reverse_sequence;
      $complement =~ tr/ACGTacgt/TGCAtgca/;
     
      print OUT "$header-reverse_complement\n$complement\n";
    }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    
     
    #!/usr/bin/perl -w                                                                                           
    #Perl_3-4.pl
    #Find character and line totals within fasta file
     
    use strict;
     
    my $infile = 'perl_III.fastq';
    my $outfile = 'fastq.out';
     
    open IN, "<", $infile or die "Cannot read infile: $!\n";
    open OUT, '>', $outfile or die "Cannot write to outfile $!\n";
     
    my $number_of_lines = 0;
    my $total_length = 0;
     
    while (my $line = <IN>){
      chomp $line;
      $number_of_lines++;
      my $length = length ($line);
      $total_length += $length;
    }
     
    my $average_line_length = $total_length/$number_of_lines;
     
    print OUT "Total number of lines = $number_of_lines\nAverage line length = $average_line_length\nTotal line length or number of characters = $total_length\n";
    ]]>
    			review-session-scripts-perl-ii-iii/feed/
    		0
    		
    		
    		Lecture: Unix
    		lecture-unix/
    		lecture-unix/#comments
    		Tue, 15 Oct 2013 01:45:33 +0000
    		Sofia Robb
    				
    
    		?p=508
    		
    				UNIX – Command-Line Survival Guide]]>
    			lecture-unix/feed/
    		0
    		
    		
    		Problem Set: Perl IV
    		problem-set-perl-iv/
    		problem-set-perl-iv/#comments
    		Tue, 15 Oct 2013 01:40:39 +0000
    		Sofia Robb
    				
    
    		?p=502
    		read more)]]>
    				Perl IV Problem Set
    
    Files for Perl IV Problem Set
    
    1. Perl_IV.fasta
    Perl IV Problem Set
    ==================
    1. Iterate through each element of this array using a foreach loop: (101,2,15,22,95,33,2,27,72,15,52);
            - Print out only the values that are even (use modulus operator).
    2. Iterate through each of the elements of the above array, but sort them numerically.
            - Print each element.
            - Create two cumulative sums, one of all the even values and one of all the odd values. 
            - Print the two sums.
    3. Iterate through each element in the above array using a for loop.
            - Print only the values of the indices that are odd    
    4. Create a shuffled sequence
    	Turn a DNA string into an array with split()
    	Use a for loop to perform the following procedure N times (N = length of seq)
    		Select a random position A with rand()
    		Select a random position B with rand()
    		Exchange the letters at array indices A and B
    	Print the now shuffled sequence
     
    5a. Start with 2 very similar DNA sequences. 
    	Align with ClustalW, TCoffee, or some other web alignment application. 
    	Output should be in fasta format.
    	Store (copy and paste) the sequence, including dashes, from each ClustalW fasta output in a separate string variable inside your script.
    	Turn each string into an array with split()
    	Use a for loop to compare each index for nucleotide differences.
    	Report the nucleotide position of each difference.
     
    5b. Do the same as above but instead of coping and pasting into string variables
    import from a file.
     
    6. Calculate GC content
    	Turn a DNA string into an array with split()
    	Use a foreach loop to look at each nucleotide in turn
    	Calculate total length of the sequence
    	Keep a running total of C's and G's
    	Print the calculated GC content as a percent.
     
     
    7.  Run this code. Is its output what you expect? Why?
                      for (my $i = 0; $i < 10; $i++) {
                       if ($i = 2) {
                           print "\$i = $i\n";
                       }
                      }
    ]]>
    problem-set-perl-iv/feed/ 0
    Problem Set: Perl III problem-set-perl-iii/ problem-set-perl-iii/#comments Tue, 15 Oct 2013 01:18:34 +0000 Sofia Robb ?p=488 read more)]]> Perl III Problem Set:
    1. Perl_III.nobody.txt
    2. Perl_III.fastq
    3. Perl_III.fasta
    Perl III Problem Set
    ====================
     
    1.  Create a script that divides two numbers provided on the command line.
            Your script should have the following requirements:
    	Two numbers are required.
    	The numbers have to be positive.
    	The divisor cannot be zero.
     
            You should take care of the following in your Perl script
            =========================================================
    	Write the quotient to STDOUT
    	Write any errors to STDERR
     
            You should take care of the following on the command line in UNIX
            ==================================================================
    	Redirect STDOUT to an output file (out.txt)
    	Redirect STDERR to an error file (err.txt)
     
    2. Open a file using the open function.
            As you read in lines from the file, make all the letters in each line uppercase. (There's a built-in
    	Perl function which will do this.)
     
            Open a new file for output using the open function.
    	Write the output to this file 
     
    3. Open the provided fasta file (File 3). Print the reverse complement of each
        sequence. Make sure to print the output in fasta format including
        the sequence name and a note in the description that this is the
        reverse complement. Print to STDOUT and capture the output into a file
        with a command line redirect '>'.
     
    4. Open the provided fastq file (File 2). Go through each line of the file. Count
        the number of lines and the number of characters per line.
     
        Have your program report the:
             a. total number of lines
             b. total number of characters
             c. average line length 
     
    5. Create a script that uses <> to read in the contents of the provided
        text file (File 1). Use the function index() to
     
          a. find the first position of 'Nobody' on every line
          b. find the first position of 'somebody' on every line
     
        Use the warn() function to warn the user that 'somebody is here'
     
    ** You can look up how to use the index() and warn() functions in your books, from
       the command line with the perldoc command, or on perldoc.org.
    ]]>
    problem-set-perl-iii/feed/ 0
    Command line Cheat Sheet command-line-cheat-sheet/ command-line-cheat-sheet/#comments Mon, 14 Oct 2013 20:44:53 +0000 Sofia Robb ?p=484 fwunixref]]> command-line-cheat-sheet/feed/ 0