1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | #!/usr/bin/perl -w =pod =head1 NAME merge_hits.pl =head1 SYNOPSIS merge_hits.pl file1 file2 file3 ... merge_hits.pl gstt1_drome_hits.bp62 gstt1_drome_hits.vt80 gstt1_drome_hits.vt20 =head1 OPTIONS -h short help --help include description --expect show E()-value --all show all scores (default) --bits show bits --f_id show fraction identical --a_len show alignment length =head1 DESCRIPTION C takes a set of results files produced by the C or C(bp_blastp.pl> scripts, and merges the results in these files, so that they can be compared. The C<*_hits.*> are expected to contain lines containing an accession, E()-value, bit score, fraction identical, and alignment length, ending with a description: #Hits: ACC E() bits f_id a_len descr NP_000552 2e-145 409.0 1.000 218 glutathione S-transferase Mu 1 isoform 1 [Homo sapiens] Results from each of the files are merged using the unique accession (and ordered according to the results in the first file). =head1 AUTHOR William R. Pearson, wrp@virginia.edu =cut use strict; use Pod::Usage; use Getopt::Long; use vars qw($file $fd %results @file_list $matrix @hit_list $shelp $help); use vars qw($expect $a_len $f_id $bits $all); ($expect,$a_len,$f_id, $bits, $all) = ( 0, 0, 0, 0, 0); pod2usage(1) unless @ARGV; GetOptions("h|?" => \$shelp, "help" => \$help, "all" => \$all, "expect" => \$expect, "bits" => \$bits, "a_len" => \$a_len, "f_id" => \$f_id, ); pod2usage(1) if $shelp; pod2usage(exitstatus => 0, verbose => 2) if $help; my @res_fields = (); if ($all) { @res_fields = qw( expect bits f_id a_len ); } elsif ($expect || $bits || $f_id || $a_len) { if ($expect) { push @res_fields, "expect";} if ($bits) { push @res_fields, "bits";} if ($f_id) { push @res_fields, "f_id";} if ($a_len) { push @res_fields, "a_len";} } else { @res_fields = qw( expect bits f_id a_len ); } for (my $i=0; $i < @ARGV; $i++) { $file = $ARGV[$i]; # open the file unless (open($fd,$file)) { warn "Cannot open $file\n"; next; } # extract the suffix ($matrix) = ($file =~ m/\w+\.(\w+)$/); push @file_list, $matrix; while (my $line = <$fd>) { # skip over the comments next if ($line =~ /^#/); chomp($line); # read in the accessions, other info my @data = split(/\t/,$line); my $acc = $data[0]; if ($i == 0) { push @hit_list, $acc;} my %values = (); @values{qw(expect bits f_id a_len)} = @data[1..4]; unless ($results{$acc}) { $results{$acc} = {descr => $data[-1]}; } $results{$acc}{$matrix} = \%values; } } #print Dumper(%results); my $tab_fill = "\t" x scalar(@res_fields); print "#\t\t".join($tab_fill,@file_list) . "\n"; for my $acc ( @hit_list) { print $acc; for $matrix ( @file_list ) { if ($results{$acc}{$matrix}) { my $result_p = $results{$acc}{$matrix}; print "\t",join("\t",@$result_p{@res_fields}); } else { print $tab_fill; } } print "\t".$results{$acc}{descr} . "\n"; } __END__ |
Comments are closed.