#!/usr/bin/perl require "/home/vossman/lib/filetools.pl"; require "/home/vossman/lib/fixnum.pl"; require 'ctime.pl'; use Text::Tabs; use Getopt::Std; use POSIX; use constant FACTOR => 3.45193784733; print STDERR "\nHISTOGRAM MAKER, hist.pl v 0.9 (3/10/04)\n"; # HISTOGRAM MAKER, hist.pl v 0.9 (3/10/04) # creates histograms from packing out file # requires: # - one of atom name, atom type or residue specified # - input packing out file # outputs: # - (x,y) data for histogram # # -i input packing out file # -o output hst file (volume number of atoms) # -P Number of points on x-axis # -H Number of point on displayed y-axis (does not affect output file) # -r residue restriction (e.g. ADE) # -a atom name restriction (e.g. N1) # -t atom type restriction (e.g. N3H1s) # -n minimum volumes value # -x maximum volumes value # -y averaging factor (e.g. 1 means average +/- 1 point; default: 0) # -b boxplot file name # -R residue type RNA or PROT # # changelog: # v 0.8 add boxplot info 5%ile,25%ile,75%ile,95%ile # v 0.7 classic my %args; getopt("sioatrPHnxybRh", \%args); #Set input variables my %isRNA = ("ADE",1,"GUA",1,"CYT",1,"URI",1); $resitype = $args{R} || "rna"; #$resitype = $args{R} || "prot"; if($resitype eq "rna") { $in = $args{i} || "/home/vossman/packing/out/rnaonly_all_trun.out22222"; } elsif($resitype eq "prot") { $in = $args{i} || "/home/vossman/packing/out/protein-edit.out"; } else { $in = $args{i}; } $boxplot = $args{b} || "./boxplot.dat"; $type = $args{t}; # leave blank for unrestricted $atom = $args{a}; # leave blank for unrestricted $resi = $args{r}; # leave blank for unrestricted my $hist = $args{h}; if($args{o}) { $out = $args{o}; } else { my $info; foreach my $item ($resi,$atom,$type) { if($item) { $info .= $item."-"; } } $info =~ s/-$//; $out = $in; substr($out,-4) = "-$info.hst"; } #if(!$type && !$atom && !$resi) { # $type = "P4H0u"; #} $POINTS = $args{P} || 50; $HEIGHT = $args{H} || 20; $defmin = $args{n} || 0; $defmax = $args{x} || 0; $defincr = $args{s} || 0; $aver = $args{y} || 0; #Initialize Variables $max = 0; $min = 1500; $numatoms = 0; $vol = 0; $volsq = 0; my $numlines = getNumLines($in); open(IN, "< $in") or die "Couldn't open $in for reading: $!\n"; print "\nReading $in...\n"; my $count=0; my $carrot=$numlines/76; printBar(76); while( defined($line = ) ) { $count++; if($count > $carrot) { print STDERR "^"; $carrot+=$numlines/76; } #Read Variables @chunks = split('\t', $line); foreach $test (@chunks) { $test =~ s/\s+$//; $test =~ s/^\s+//; } if($chunks[13] eq "\[OK\]") { if($type) { $TYPE = $chunks[6]; } if($atom) { $ATOM = $chunks[4]; } if($resi) { $RESI = $chunks[3]; } if((!$type || $TYPE =~ $type) && (!$atom || $ATOM eq $atom) && (!$resi || $RESI eq $resi || ($resi eq "SUG" && $isRNA{$RESI}))) { $numatoms++; push(@hist,$chunks[10]); if($chunks[10] > $max) { $max = $chunks[10]; } if($chunks[10] < $min) { $min = $chunks[10]; } $vol += $chunks[10]; $volsq += $chunks[10]*$chunks[10]; } } } close(IN); print "\nFound $numatoms \"OK\" atoms...\nContinuing with histogram info...\n"; open(OUT, "> $out") or die "Couldn't open $out for writing: $!\n"; print OUT "\# HISTOGRAM MAKER, hist.pl v 0.9 (3/10/04)\n"; print OUT "\# ",&ctime(time); if($defmin) { $min=$defmin; } if($defmax) { $max=$defmax; } if($defincr) { $incr = $defincr; if(!$defmin) { $min -= $incr*1.5; } $POINTS = int(($max-$min)/$incr)+2; $max = $incr*$POINTS+$min; } else { $incr = fixnum52(($max - $min) / ($POINTS-2)); if(!$defmin) { $min -= $incr*1.5; } } print STDERR "max=$max\tmin=$min\n"; for($i=0; $i <= $POINTS; $i++) { push(@counts,0); } foreach $item (@hist) { $i = floor( ($item - $min - $incr/2)/$incr ); $counts[$i]++; } #Output to file if(!$aver) { for($i=0; $i < $POINTS; $i++) { printf OUT "%.3f\t$counts[$i]\n",$min+$i*$incr; } } else { for($i=0; $i < $POINTS; $i++) { $val=0; for($j=($i-$aver);$j<=($i+$aver);$j++) { $val+=$counts[$j]; } if($val) { $val/=($aver*2+1); } printf OUT "%.3f\t%.2f\n",$min+$i*$incr,$val; } } #Draw Graph $maxcnt = 0; $#counts = $POINTS; foreach $num (@counts) { if($num > $maxcnt) { $maxcnt = $num; } } $incr2 = $maxcnt / $HEIGHT; print "\n"; if($atom) { print "ATOM=$atom\n"; } if($type) { print "TYPE=$type\n"; } if($resi) { print "RESI=$resi\n"; } if(!$hist) { $temp = $HEIGHT*$incr2; print "\n\n$temp\t__ "; for($i=0; $i<=$POINTS+1; $i++) { print "_"; } for($i=$HEIGHT; $i>=0; $i--) { print "\n\t |"; foreach $num (@counts) { if($num != 0 && $num/$incr2 >= $i) { if($num/$incr2 <= $i+0.15 || ($num - $i*$incr2) < 1.5) { print "."; } elsif($num/$incr2 <= $i+0.5) { print "*"; } else { print "\#"; } } else { print " "; } } print " |"; } print "\n0 atoms\t-- "; for($i=0; $i<=$POINTS+1; $i++) { print "-"; } for($j=0; $j<2; $j++) { print "\n\t |"; for($i=0; $i<=$POINTS+1; $i++) { print " "; } print "|"; } } if($numatoms != 0) { if(!$hist) { printf("\n\t %.1f",$min); for($i=0; $i<=$POINTS-2; $i++) { print " "; } printf("%.1f\n",$max); } $avg = int($vol/$numatoms*1000)/1000; $stdev = sqrt( ($volsq - $vol*$vol/$numatoms) / ($numatoms - 1) ); $incr2 = int($incr2*100)/100; print "\n\t Legend: * = $incr2 atoms (total: $numatoms)\n\t x = $incr units\n", " $POINTS points\n"; printf("\t Average: %.2f\t StdDev: %.2f\tRad: %.2f\n\n",$avg,$stdev,($avg/FACTOR)**(1/3)); open(BOXPLOT, ">> $boxplot") or die; @sorted = sort { $a <=> $b } @hist; print BOXPLOT "\tn\t",$sorted[int($#sorted*0.5)],"\t",$sorted[int($#sorted*0.75+0.5)],"\t", $sorted[int($#sorted*0.25+0.5)],"\t",$sorted[int($#sorted*0.98+0.5)],"\t", $sorted[int($#sorted*0.02+0.5)],"\n"; #print BOXPLOT "\tn\t",$sorted[int($#sorted*0.5)],"\t",$sorted[int($#sorted*0.75)],"\t", # $sorted[int($#sorted*0.25)],"\t",$sorted[$#sorted-1],"\t", # $sorted[1],"\n"; my $info; foreach my $item ($resi,$atom,$type) { if($item) { $info .= $item."-"; } } print BOXPLOT "\tn\t",$avg,"\t\#$info$resitype\n"; close(BOXPLOT); } else { print "\n\nError: no ATOMS found.\n\n"; } print OUT "&\n"; close(OUT);