#!/bin/perl -w use Getopt::Std; use constant PI => 3.14159265358979; #SUPERPAK v0.8 # starting over... # - read symmetry matrices from PDB file itself # - ignore symop.lib altogether # - perform trasnlation on generate unit cell # - no transforming single point; transform prism with center pt. # - fix for overlap (x,y,z) < cut # - only include close transformations # - includes own matrix subroutines getopt("oisgcmrtb", \%args); $outfile = $args{o} || "out.pdb"; #output file $pdbfile = $args{i} || "in.pdb"; #input file #$symfile = $args{s} || "./symop.lib"; #CCP4 symop.lib file $gen_zero = $args{g} || 1; #output original PDB x,y,z if($gen_zero != 1) { $gen_zero = 0; } $cutoff_plus = $args{c} || 25; $cutoff_mult = $args{m} || 20; #this isn't working... use $cutoff_mult/=100; print STDERR "[ cutoff = $cutoff_mult\*size + $cutoff_plus ]\n"; $range = $args{r} || 3; $maxtrans = $args{t} || 1000; $mintrans = $args{b} || 0; print STDOUT "\n####################################################\n" ."$pdbfile\n" ."####################################################\n"; open (PDB, "< $pdbfile") or die "failed opening $pdbfile for reading: $!\n"; $cryst=0; $fscale=0; $smtry=0; while(!($cryst && $fscale && $smtry) && defined($line=)) { if(substr($line,0,19) eq "REMARK 290 SMTRY1") { print STDERR "Entering SMTRY loop..."; do { chomp($line); @chunks=split(' ',$line); $opnum = $chunks[3]; print STDERR "[ $opnum ] "; $x11=$chunks[4]; $x21=$chunks[5]; $x31=$chunks[6]; $y1=$chunks[7]; $line=; chomp($line); @chunks=split(' ',$line); $opnum2 = $chunks[3]; $x12=$chunks[4]; $x22=$chunks[5]; $x32=$chunks[6]; $y2=$chunks[7]; $line=; chomp($line); @chunks=split(' ',$line); $opnum3 = $chunks[3]; $x13=$chunks[4]; $x23=$chunks[5]; $x33=$chunks[6]; $y3=$chunks[7]; $matrix{$opnum} = [ [ $x11, $x21, $x31 ],[ $x12, $x22, $x32 ],[ $x13, $x23, $x33 ],]; $vector{$opnum} = [ [ $y1 ],[ $y2 ],[ $y3 ],]; if($opnum != $opnum2 || $opnum != $opnum3) { die "opnums don't match on read in $opnum,$opnum2,$opnum3 $!"; } push(@opers,$opnum); } while(defined($line=) && substr($line,0,18) eq "REMARK 290 SMTRY"); print STDERR "done\n"; print STDOUT "found ".($#opers+1)." of $opnum operations (including identity)\n"; $smtry=1; } elsif(substr($line,0,5) eq "CRYST") { $a = substr($line,6,8); $a =~ s/\s+$//; $a =~ s/^\s+//; $b = substr($line,15,8); $b =~ s/\s+$//; $b =~ s/^\s+//; $c = substr($line,24,8); $c =~ s/\s+$//; $c =~ s/^\s+//; $alpha = substr($line,34,6); $beta = substr($line,41,6); $gamma = substr($line,48,6); $alpha =~ s/\s+$//; $alpha =~ s/^\s+//; $beta =~ s/\s+$//; $beta =~ s/^\s+//; $gamma =~ s/\s+$//; $gamma =~ s/^\s+//; $symgrp=substr($line,67,3); $symgrp=~s/\s+$//; $symgrp=~s/^\s+//; $symm=substr($line,55,10); $symm=~s/\s+$//; $symm=~s/^\s+//; $symm2 = $symm; $symm2=~s/ //g; print STDOUT "a=$a, b=$b, c=$c, alpha=$alpha, beta=$beta, gamma=$gamma, symm=$symm2\n"; $cryst=1; } elsif(substr($line,0,5) eq "SCALE") { chomp($line); @chunks=split(' ',$line); $x11=$chunks[1]; $x21=$chunks[2]; $x31=$chunks[3]; $line=; chomp($line); @chunks=split(' ',$line); $x12=$chunks[1]; $x22=$chunks[2]; $x32=$chunks[3]; $line=; chomp($line); @chunks=split(' ',$line); $x13=$chunks[1]; $x23=$chunks[2]; $x33=$chunks[3]; $scale = [ [ $x11, $x21, $x31 ],[ $x12, $x22, $x32 ],[ $x13, $x23, $x33 ],]; print STDERR "Found SCALE matrix:\n"; matprint(mmult($scale,[[$a,0,0],[0,$b,0],[0,0,$c]])); $invscale = matinv($scale); $fscale=1; } } #find average x,y,z $residues = 0; $atoms=0; $xmin=10000; $xmax=0; $ymin=10000; $ymax=0; $zmin=10000; $zmax=0; do { if(substr($line,0,4) eq "ATOM" || substr($line,0,6) eq "HETATM") { $x=substr($line,30,8); $y=substr($line,38,8); $z=substr($line,46,8); $x =~ s/\s+$//; $x =~ s/^\s+//; $y =~ s/\s+$//; $y =~ s/^\s+//; $z =~ s/\s+$//; $z =~ s/^\s+//; $res = substr($line,22,4); $res =~ s/\s+$//; $res =~ s/^\s+//; if($x < $xmin) { $xmin = $x; $xminy = $y; $xminz = $z; } if($x > $xmax) { $xmax = $x; $xmaxy = $y; $xmaxz = $z; } if($y < $ymin) { $ymin = $y; $yminx = $x; $yminz = $z; } if($y > $ymax) { $ymax = $y; $ymaxx = $x; $ymaxz = $z; } if($z < $zmin) { $zmin = $z; $zminy = $y; $zminx = $x; } if($z > $zmax) { $zmax = $z; $zmaxy = $y; $zmaxx = $x; } $atoms++; $xavg+=$x; $yavg+=$y; $zavg+=$z; if($residues < $res) { $residues = $res; } } } while(defined($line=)); close(PDB); $xavg=int($xavg*1000/$atoms)/1000; $yavg=int($yavg*1000/$atoms)/1000; $zavg=int($zavg*1000/$atoms)/1000; print STDOUT "#atoms: $atoms\t #residues: $residues\n"; print STDOUT "max (x,y,z) = ($xmax, $ymax, $zmax)\n"; print STDOUT "avg (x,y,z) = ($xavg, $yavg, $zavg)\n"; $xavg2=int(($xmax+$xmin)*500)/1000; $yavg2=int(($ymax+$ymin)*500)/1000; $zavg2=int(($zmax+$zmin)*500)/1000; $length=int(sqrt( ($xavg2-$xmin)**2 + ($yavg2-$ymin)**2 + ($zavg2-$zmin)**2 )*10)/10; $shift=int(sqrt( ($xavg-$xavg2)**2 + ($yavg-$yavg2)**2 + ($zavg-$zavg2)**2 )*10)/10; $percent = int($shift/$length*1000)/10; print STDOUT "av2 (x,y,z) = ($xavg2, $yavg2, $zavg2) -- [ $shift A ($percent\%) ]\n"; print STDOUT "min (x,y,z) = ($xmin, $ymin, $zmin)\n"; #Find MAX RADIUS of pdb $maxdist=-1; $i=0; #insert points... $mainpoint=[ [$xavg,],[$yavg,],[$zavg,],]; push(@points,$mainpoint); push(@points,[ [$xmin,],[$xminy,],[$xminz,],]); push(@points,[ [$xmax,],[$xmaxy,],[$xmaxz,],]); push(@points,[ [$yminx,],[$ymin,],[$yminz,],]); push(@points,[ [$ymaxx,],[$ymax,],[$ymaxz,],]); push(@points,[ [$zminx,],[$zminy,],[$zmin,],]); push(@points,[ [$zmaxx,],[$zmaxy,],[$zmax,],]); #FIND MAX DISTANCE foreach $x2 ($xmax,$xmin) { foreach $y2 ($ymax,$ymin) { foreach $z2 ($zmax,$zmin) { $i++; $dist=int(sqrt( ($xavg-$x2)**2 + ($yavg-$y2)**2 + ($zavg-$z2)**2 )*10)/10; if($dist > $maxdist) { $maxdist=$dist; } } } } print STDOUT "max radius = [ $maxdist A ]\n"; $cutoff = $cutoff_mult*$maxdist + $shift + $cutoff_plus; $percent = int($cutoff/$length*1000)/10; print STDOUT "CUTOFF: [ $cutoff A ($percent\%) ]\n"; #$opnum=1 is identity, so start at 2, 0 is undefined for($opnum=2;$opnum<=$#opers+1;$opnum++) { $included=0; for($i=0;!$included && $i<=$#points;$i++) { $point = $points[$i]; $newpoint = matadd(mmult($matrix{$opnum},$point),$vector{$opnum}); foreach $point2 (@points) { #check if its withing distance of any points } # $point2=$mainpoint; $val = matsubt($newpoint,$point2); $num = mmult(transpose($val),$val); $dist = sqrt($num->[0][0]); # push(@unitcell,$newpoint); # print STDERR "$opnum: ".(int($dist))."\t"; if($dist < $cutoff) { #accepted, check for duplicates $dup=0; if($ingoodlist{$opnum}) { $dup=1; } if(!$dup) { push(@goodlist,$opnum); $ingoodlist{$opnum}=1; $included=1; } } } } } print STDOUT "Kept ".($#goodlist+1)." of $#opers non-identity transformations\n"; if($#goodlist+1 < $#opers) { $range+=2; } #create shifts $changes=0; do { $maxrefnum=0; for(my $i=-$range;$i<=$range;$i++) { for(my $j=-$range;$j<=$range;$j++) { for(my $k=-$range;$k<=$range;$k++) { $refnum=($i+$range)*(($range*2+1)**2)+($j+$range)*($range*2+1)+($k+$range)+1; if(!($i==0 && $j==0 && $k==0)) { $shifts{$refnum} =[[$i],[$j],[$k],]; if($refnum > $maxrefnum) { $maxrefnum = $refnum; } } else { $zeronum=$refnum; } }}} if($maxrefnum*($#opers+1) > $maxtrans) { $range--; $changes++; } elsif($maxrefnum*($#opers+1) < $mintrans) { $range++; $changes++; } #NEED TO BE CAREFUL ABOUT INFINITE LOOPS IF ($#opers+1) is LARGE #Fixed with changes requirement... } while($changes < 6 && ($maxrefnum*($#opers+1) > $maxtrans || $maxrefnum*($#opers+1) < $mintrans)); $shifts{$zeronum}=$shifts{$maxrefnum}; #need to account for lack of (0,0,0) $maxrefnum--; # $refnum = 0 reserved for identity $shifts{0}=[[0],[0],[0]]; print STDOUT "Using range=[-$range\-$range\] for ".($maxrefnum*($#opers+1))." translations\n"; print STDERR "Starting translations... [-$range\...$range: ".($maxrefnum*($#opers+1))."]\n"; print STDERR "This takes awhile, but it is worth it!!!\n"; print STDERR "|----+----+----+----+----|----+----+----+----+----|\n"; #translate each symmetry and find distance... $cut=int($maxrefnum*($#opers+1)/50); $count=0; for($refnum=1; $refnum<=$maxrefnum; $refnum++) { $shift=$shifts{$refnum}; for($opnum=1;$opnum<=$#opers+1;$opnum++) { $included=0; $unique_id=$refnum."_".$opnum; $count++; if($count % $cut == 0) { print STDERR "^"; } for($i=0;!$included && $i<=$#points;$i++) { $point = $points[$i]; #operate $newpoint = matadd(mmult($matrix{$opnum},$point),$vector{$opnum}); #scale and shift then transform back $newpoint = mmult($scale,$newpoint); $newpoint = matadd($newpoint,$shift); $newpoint = mmult($invscale,$newpoint); #check distance. foreach $point2 (@points) { #check if its withing distance of any points # $point2 = $mainpoint; $val = matsubt($newpoint,$point2); $dist = sqrt(mmult(transpose($val),$val)->[0][0]); if($dist < $cutoff) { #accepted, check for duplicates $dup=0; if($ingoodlist{$unique_id}) { $dup=1; } if(!$dup) { push(@goodlist,$unique_id); $ingoodlist{$unique_id}=1; $included=1; } } } } } } print STDERR "\n"; print STDOUT "Now have ".($#goodlist+1)." of ".(($#opers+1)*$maxrefnum) ." non-identity molecules!\n"; print STDERR "Time to generate new molecules...\n"; my $i=0; if($gen_zero) { print STDERR "Generating Identity Molecule as well\n"; push(@goodlist,1); $ingoodlist{1}=1; } else { print STDERR "Ignoring Identity Molecule\n"; } print STDOUT "$atoms atoms to generate\n"; $cut=int($atoms/50); open (OUT, "> $outfile") or die "failed opening $outfile for writing: $!\n"; my $struct=0; foreach $id (@goodlist) { $struct++; print STDERR "Working on structure $struct of ".($#goodlist+1).", ID=$id\n"; print STDERR "|----+----+----+----+----|----+----+----+----+----|\n"; #$unique_id=$refnum."_".$opnum; ($refnum,$opnum)=split('_',$id); if(!$opnum) { $opnum=$refnum; $refnum=0; } $rnum=mmult(transpose($shifts{$refnum}),$shifts{$refnum})->[0][0]; open (PDB, "< $pdbfile") or die "failed opening $pdbfile for reading: $!\n"; $count=0; #QUEUE UP FILE while(defined($line=) && !(substr($line,0,4) eq "ATOM" || substr($line,0,6) eq "HETATM")) {} do { if(substr($line,0,4) eq "ATOM" || substr($line,0,6) eq "HETATM") { $x=substr($line,30,8); $y=substr($line,38,8); $z=substr($line,46,8); $line =~ s/\s+$//; $line =~ s/^\s+//; # print STDERR $line."\n"; $x =~ s/\s+$//; $x =~ s/^\s+//; $y =~ s/\s+$//; $y =~ s/^\s+//; $z =~ s/\s+$//; $z =~ s/^\s+//; $res = substr($line,22,4); $res =~ s/\s+$//; $res =~ s/^\s+//; $vec=[[$x],[$y],[$z]]; #symmerty operate (opnum=1 => identity) $newvec = matadd(mmult($matrix{$opnum},$vec),$vector{$opnum}); #scale and shift (refnum=0 => identity) $newvec = matadd(mmult($scale,$newvec),$shifts{$refnum}); #transform back $newvec = mmult($invscale,$newvec); $x2=$newvec->[0][0]; $y2=$newvec->[1][0]; $z2=$newvec->[2][0]; substr($line,46,8)=proper($z2); substr($line,38,8)=proper($y2); substr($line,30,8)=proper($x2); if(!($refnum==0 && $opnum==1)) { substr($line,56,1)=0; $bfact = proper($rnum+$opnum/10); substr($bfact,-1)=""; substr($bfact,0,2)=""; substr($line,61,5)=$bfact; $newres = proper(($residues*$struct + $res)%10000); substr($newres,-4)=""; substr($line,22,4)=$newres; my $label = makelabel($struct); substr($line,20,1)=substr($label,0,1); } print OUT $line."\n"; $count++; if($count % $cut == 0) { print STDERR "^"; } } } while(defined($line=)); print OUT "COMPND\n"; print STDERR "\nfinal count: $count\n"; } print OUT "END\n"; sub makelabel { my $struct = $_[0]; my $label; if($struct < 10) {$label=$struct;$label=~tr/1-9/A-I/;} elsif($struct<20) {$label=$struct-10;$label=~tr/0-9/J-S/;} elsif($struct<27) {$label=$struct-20;$label=~tr/0-6/T-Z/;} elsif($struct<37) {$label=$struct-27;$label=~tr/0-9/a-j/;} elsif($struct<47) {$label=$struct-37;$label=~tr/0-9/k-t/;} elsif($struct<53) {$label=$struct-47;$label=~tr/0-5/u-z/;} else {$label=($struct-52)%10;} return $label; } sub proper { my ($num) = @_; $toler = 0.00001; $num = int($num*1000)/1000; $text=""; if($num >= 0) { if($num < 10) { $text = " ".$num; } elsif($num < 100) { $text = " ".$num; } elsif($num < 1000) { $text = " ".$num; } else { $text = $num; } } elsif($num < 0) { if($num > -10) { $text = " ".$num; } elsif($num > -100) { $text = " ".$num; } else { $text = $num; } } if(abs(int($num) - $num) < $toler || (1 - abs(int($num) - $num)) < $toler) { $text = $text."\.000"; } elsif(abs(int($num*10) - $num*10) < $toler || (1 - abs(int($num*10) - $num*10)) < $toler) { $text = $text."00"; } elsif(abs(int($num*100) - $num*100) < $toler || (1 - abs(int($num*100) - $num*100)) < $toler) { $text = $text."0"; } else { $text = $text; } if(length($text) != 8) { if((int($num*100) - $num*100) == 0) { $text = $text."0"; } print STDERR "ERROR: Incorrect Length: \'$text\'\,\'" .abs($num*100 - int($num*100))."\'\n"; return " 0.000"; } return $text; } sub mmult { my ($m1,$m2) = @_; my ($m1rows,$m1cols) = matdim($m1); my ($m2rows,$m2cols) = matdim($m2); if($m1cols != $m2rows && ($m2rows == 1 || $m2cols == 1) ) { print "multiply $m1rows x $m1cols by $m2rows x $m2cols error\n"; $m2 = transpose($m2); ($m2rows,$m2cols) = matdim($m2); print "transposed: multiply $m1rows x $m1cols by $m2rows x $m2cols\n"; } unless($m1cols == $m2rows) { # raise exception print "multiply $m1rows x $m1cols by $m2rows x $m2cols\n"; die "Cannot multiply these matrices bad array sizes: $m1cols != $m2rows $!"; } my $result = []; my ($i,$j,$k); for($i=0;$i<$m1rows;$i++) { for($j=0;$j<$m2cols;$j++) { for($k=0;$k<$m1cols;$k++) { $result->[$i][$j] += $m1->[$i][$k] * $m2->[$k][$j]; } } } return $result; } sub matsubt { my ($m1,$m2) = @_; my ($m1rows,$m1cols) = matdim($m1); my ($m2rows,$m2cols) = matdim($m2); if($m1cols != $m2cols && ($m2rows == 1 || $m2cols == 1) ) { print "multiply $m1rows x $m1cols by $m2rows x $m2cols error\n"; $m2 = transpose($m2); ($m2rows,$m2cols) = matdim($m2); print "transposed: multiply $m1rows x $m1cols by $m2rows x $m2cols\n"; } unless($m1cols == $m2cols && $m1rows == $m2rows ) { # raise exception print "subtract $m1rows x $m1cols by $m2rows x $m2cols\n"; die "Cannot subtract these matrices bad array sizes $!"; } my $result = []; my ($i,$j); for($i=0;$i<$m1rows;$i++) { for($j=0;$j<$m1cols;$j++) { $result->[$i][$j] = $m1->[$i][$j] - $m2->[$i][$j]; } } return $result; } sub matadd { my ($m1,$m2) = @_; my ($m1rows,$m1cols) = matdim($m1); my ($m2rows,$m2cols) = matdim($m2); if($m1cols != $m2cols && ($m2rows == 1 || $m2cols == 1) ) { print "multiply $m1rows x $m1cols by $m2rows x $m2cols error\n"; $m2 = transpose($m2); ($m2rows,$m2cols) = matdim($m2); print "transposed: multiply $m1rows x $m1cols by $m2rows x $m2cols\n"; } unless($m1cols == $m2cols && $m1rows == $m2rows ) { # raise exception print "subtract $m1rows x $m1cols by $m2rows x $m2cols\n"; die "Cannot subtract these matrices bad array sizes $!"; } my $result = []; my ($i,$j); for($i=0;$i<$m1rows;$i++) { for($j=0;$j<$m1cols;$j++) { $result->[$i][$j] = $m1->[$i][$j] + $m2->[$i][$j]; } } return $result; } sub veclen { my $ary_ref = $_[0]; my $type = ref $ary_ref; if($type ne "ARRAY") { die "$type is bad array ref for $ary_ref $!"; } return scalar(@$ary_ref); } sub matdim { my $m = $_[0]; my $rows = veclen($m); my $cols = veclen($m->[0]); return ($rows,$cols); } sub matprint { my $m = $_[0]; my ($rows,$cols) = matdim($m); print STDERR "Matrix size: $rows x $cols\n"; for(my $i=0;$i<$rows;$i++) { print STDERR "[\t"; for(my $j=0;$j<$cols;$j++) { print STDERR (int($m->[$i][$j]*10000)/10000)."\t"; } print STDERR "]\n"; } } sub matinv { #Assume 3x3 my $m = $_[0]; my ($rows,$cols) = matdim($m); if(!($rows == 3 && $cols == 3)) { die "cannot invert non-3x3 matrix $!"; } my $det = matdet($m); if($det == 0) { die "matrix is singular, cannot inverse $!"; } my $result = []; for(my $i=0;$i<$cols;$i++) { for(my $j=0;$j<$rows;$j++) { $result->[$j][$i] = ( $m->[($i+1)%3][($j+1)%3]*$m->[($i+2)%3][($j+2)%3] - $m->[($i+2)%3][($j+1)%3]*$m->[($i+1)%3][($j+2)%3] ); $result->[$j][$i]/=$det; } } # matprint($result); return $result; } sub matdet { #assume 3x3 my $m = $_[0]; my ($rows,$cols) = matdim($m); if(!($rows == 3 && $cols == 3)) { die "cannot do non-3x3 matrix $!"; } my $det = $m->[0][0]*($m->[1][1]*$m->[2][2] - $m->[1][2]*$m->[2][1]) - $m->[1][0]*($m->[0][1]*$m->[2][2] - $m->[0][2]*$m->[2][1]) + $m->[2][0]*($m->[0][1]*$m->[1][2] - $m->[0][2]*$m->[1][1]); return $det; } sub transpose { my $m = $_[0]; my ($rows,$cols) = matdim($m); my $result = []; # print "transpose $rows x $cols"; for(my $i=0;$i<$cols;$i++) { for(my $j=0;$j<$rows;$j++) { $result->[$i][$j]=$m->[$j][$i]; } } my ($rows2,$cols2) = matdim($result); # print " into $rows2 x $cols2\n"; return $result; }