#!/usr/bin/perl ############################################## ### Huang He ### ### statistics from summary table ### ############################################## if(@ARGV!=2){ print "Usage: perl $0 [number of level in taxonomy]\n"; exit; } ($summary,$num)=@ARGV; use Data::Dumper; open(File,"$ARGV[0]")||die "can't open the $summary\n"; if($num> 6 or $num <1){ print "Usage:[number of level in taxonomy] should be a number <= 6\n"; exit; } %count=(); while(){ chomp; if(/ID/ and /Genome/ and /Count/){ next; } s/\c@*//g; s/\cM*$//; @temp=split(/\t/,$_); $n=@temp-4; $id=$temp[0]; $count=$temp[1]; $perc=$temp[2]; #if($count eq ""){ # next; #} #if($perc eq ""){ # next; #} #$gra1=$temp[4]; #$gra2=$temp[5]; #$gra3=$temp[6]; #$gra4=$temp[7]; #$gra5=$temp[8]; #$gra6=$temp[9]; #print"$id\t$count\t$perc\t$gra1\n"; #print "$n\n"; for($i=1;$i<=7;$i++){ if($temp[$i+3] eq 'unclassified'){ $temp[$i+3]='Unclassified'; } } if($n<6 and $n>=1){ for($i=$n;$i<=6;$i++){ $temp[$i+3]="unknown"; } } if($n>7){ for($i=3;$i<=6;$i++){ $temp[$i+3]="unknown"; } } if($temp[4] eq "unknown"){ #print "$_\n"; if($temp[3]=~/uncultured/){ $temp[4]= "unknown_without_name"; }else{ $temp[4]= "unknown_with_name"; } } #print "$_\n"; #print "$temp[$ARGV[1]+3]\t$count\n"; #$zzz+=$count; #print "$temp[0]\t$temp[6]\n"; if($temp[$ARGV[1]+3] eq "uncultured"){ next; } $count{$ARGV[1]}{$temp[$ARGV[1]+3]}+=$count; $ide[$ARGV[1]]{$temp[$ARGV[1]+3]}+=$perc*$count; #print "$ide[$ARGV[1]]{$temp[$ARGV[1]+3]}\n"; #print ">$temp[$ARGV[1]+3]<\n"; if(exists $tax[$ARGV[1]]{$temp[$ARGV[1]+3]}){ next; } for($i=1;$i<=$ARGV[1];$i++){ $tax[$ARGV[1]]{$temp[$ARGV[1]+3]}.=$temp[$i+3]."\t"; } } close File; #print Dumper %count; foreach (keys %{$count{$ARGV[1]}}){ if(!/\w/){ next; } if($_ eq "unknown"){ $tax[$ARGV[1]]{$_}="unknown"; } print "$tax[$ARGV[1]]{$_}\t$count{$ARGV[1]}{$_}\t",$ide[$ARGV[1]]{$_}/$count{$ARGV[1]}{$_},"\n"; #$sum+=$count{$ARGV[1]}{$_}; } #print "$zzz\n"; #foreach (keys %{$count{$ARGV[1]}}){ # if(!/\w/){ # next; # } # #$t_ide+=$ide[$ARGV[1]]{$_}*$count{$ARGV[1]}{$_}/$sum; # #} #print "Average identity: $t_ide\n";