#/usr/bin/perl -w use strict; require "/home/vossman/lib/fixnum.pl"; #amount (in %) to remove each of the data #1 --> keep 98% of data my $cutoff; my $debug = 0; my @cutoffs = (1.25); #my @cutoffs = (0,0.01,0.1,0.2,0.4,0.8,1.0,1.1,1.2,1.3,1.4,1.6,1.8,2.0,4,8,16); #my @cutoffs = (0,0.1); main(); sub main { my $exit; my @files = ; foreach my $file (@files) { foreach $cutoff (@cutoffs) { print STDERR $file,"--- $cutoff\n"; $exit = AnalyzeVol($file,$cutoff); } } if($exit) { print STDERR "good exit\n\n"; } } sub AnalyzeVol { my ($outf,$cutoff) = @_; my %min; my %max; #PASS ONE -- get central limits $cutoff% and (100-$cutoff)% my %list; my @keys; my $totatoms = readPDB($outf,\%list,\@keys); if($debug) { print STDERR "Found $totatoms total atoms\n"; print STDERR "RES-ATM count [old --> new] ". " median\n"; @keys = sort { $a cmp $b } @keys; } my ($perdrop,$drop,$bigrange) = parseList(\%list,\@keys,\%min,\%max,$cutoff); print STDOUT "$cutoff\t$drop\t$bigrange\t$perdrop\n"; #return 1; #PASS TWO --- edit the main file for limiters my $chng = editFile($outf,\%max,\%min); print STDERR "Changed $chng atoms\n\n"; my $bigchng=0; my @files = ; foreach my $file (@files) { $bigchng += editFile($file,\%max,\%min); } print STDERR "Changed $bigchng of expected $chng atoms\n\n"; return int($bigchng/$chng); }; sub isRNA { my ($resi) = @_; $resi = uc($resi); $resi =~ s/^\s+//; $resi =~ s/\s+$//; my %hash = ("ADE",1,"GUA",1,"URI",1,"CYT",1); if($resi && $hash{$resi}) { return 1; } return 0; }; sub getKey { my ($res,$atm) = @_; if($atm eq "P" || $atm eq "O1P" || $atm eq "O2P") { return "SUG-".$atm; } elsif(length($atm) == 3 && substr($atm,2,1) eq "\'") { return "SUG-".$atm; } return $res."-".$atm; }; sub fillRight { my ($str,$num) = @_; while(length($str) < $num) { $str.=" "; } return $str; }; sub readPDB { #my %isKEY; my %list; my @keys; my $outf; #($outf,\%isKEY,\%list,\@keys) = @_; #isKEY local??? my ($outf,$list,$keys) = @_; my $totatoms=0; my %isKEY; open(OUTF, "< $outf") or die "Can't open $outf for reading:\n"; my $count=0; my $carrot=1000; while(defined(my $line = ) ) { $count++; if($count > $carrot) { print STDERR "."; $carrot*=1.1; } my @chunks = split('\t',$line); foreach my $test (@chunks) { $test =~ s/\s+$//; $test =~ s/^\s+//; } my $ATOM = $chunks[4]; my $RESI = $chunks[3]; if(isRNA($RESI) && $chunks[13] eq "\[OK\]") { $totatoms++; my $key = getKey($RESI,$ATOM); my $vol = $chunks[10]; push(@{$$list{$key}},$vol); if(!$isKEY{$key}) { push(@$keys,$key); $isKEY{$key} = 1; } } } close(OUTF); print STDERR "\n"; return $totatoms; }; sub parseList { my ($list,$keys,$min,$max,$cutoff) = @_; my $drop=0; my $bigrange=0; my $perdrop=0; my $count; foreach my $key (@$keys) { $count++; @{$$list{$key}} = sort { $a <=> $b } @{$$list{$key}}; my $minInd = int($cutoff/100*($#{$$list{$key}})+0.5); my $maxInd = int((1-$cutoff/100) * ($#{$$list{$key}})); $$min{$key} = ${$$list{$key}}[$minInd]; $$max{$key} = ${$$list{$key}}[$maxInd]; my $oldrange = fixnum22(${$$list{$key}}[$#{$$list{$key}}] - ${$$list{$key}}[0]); my $newrange = fixnum22($$max{$key} - $$min{$key}); $drop += ($oldrange-$newrange); my $median = ${$$list{$key}}[int($#{$$list{$key}}/2)]; $perdrop += $newrange/$median; $bigrange += $newrange; if($debug) { print STDERR fillRight($key,8),fixnum50($#{$$list{$key}}+1), " [ ",$oldrange," --> ",$newrange," ] ", fixnum22($median)," ",fixnum32($newrange*100/$median),"\%\n"; } else { print STDERR "."; } } print STDERR "\n"; $bigrange /= $count; $perdrop /= $count; $drop /= $count; return ($perdrop,$drop,$bigrange); }; sub editFile { #PASS TWO my ($outf,$max,$min) = @_; my $count=0; my $carrot=1024; my @keys; my %ovol; my %okay; my %ovolsq; my %isKEY; my $numatoms =0; my $rnaatoms = 0; open(OUTF, "< $outf") or die "Can't open $outf for reading\n"; my $editf = $outf; my $chng=0; substr($editf,-4) = "-edit.out"; print STDERR "analyzing $outf --> $editf\n"; open(EDITF, "> $editf") or die "Can't open $editf for writing\n"; while(defined(my $line = ) ) { $count++; if($count > $carrot) { print STDERR "."; $carrot*=1.25; } my @chunks = split('\t',$line); foreach my $test (@chunks) { $test =~ s/\s+$//; $test =~ s/^\s+//; } my $ATOM = $chunks[4]; my $RESI = $chunks[3]; if(isRNA($RESI)) { $rnaatoms++; } if(isRNA($RESI) && $chunks[13] eq "\[OK\]") { my $key = getKey($RESI,$ATOM); my $vol = $chunks[10]; if($vol > $$min{$key} && $vol < $$max{$key}) { $numatoms++; if(!$isKEY{$key}) { push(@keys,$key); $isKEY{$key} = 1; $okay{$key} = 1; $ovol{$key} = $vol; $ovolsq{$key} = ($vol**2); } else { $okay{$key}++; $ovol{$key} += $vol; $ovolsq{$key} += ($vol**2); } } else { $chng++; $line =~ s/\[OK\]/possible/; } } print EDITF $line; } close(OUTF); close(EDITF); #ANALYSIS --- ANALYSIS --- ANALYSIS --- ANALYSIS --- ANALYSIS if($debug) { print STDERR "key mean stdev totperdev\n"; @keys = sort { $a cmp $b } @keys; my %stdev; my %mean; my $totdev=0; my $totperdev=0; my $count=0; foreach my $key (@keys) { if($okay{$key} > 10) { $count++; $stdev{$key} = sqrt(($ovolsq{$key}-($ovol{$key}**2)/$okay{$key})/($okay{$key}-1)); $mean{$key} = $ovol{$key} / $okay{$key}; $totdev += $stdev{$key}; $totperdev += $stdev{$key} / $mean{$key}; if($debug) { print STDERR fillRight($key,8),fixnum32($mean{$key})," ", fixnum22($stdev{$key})," ",fixnum32($totperdev),"\%\n"; } else { print STDERR "."; } } } } print STDERR "$chng\n"; return $chng; };