#include #include "pdb.h" file_records * current_file ; int model_number = -1 ; int read_nmr_files = 0 ; /* This file contain a code to read pdb files in and write them out. variable name conventions : ff FILE * file file_records * Y 25/10/92 start current version just know about brookhaven format. volume and access have to be added */ int check_sulphur_bridges = 1; int deal_with_disorder = 1; void invert_the_scale(file_records *file) { symmetry_operation *so = &file->scale ; a_matrix *ma = &file->inverse_scale ; a_matrix temp ; a_matrix * tma = &temp ; double div; tma->xx = so->yy * so->zz - so->zy * so->yz ; tma->yy = so->xx * so->zz - so->zx * so->xz ; tma->zz = so->yy * so->xx - so->xy * so->yx ; tma->yx = so->xz * so->zy - so->zz * so->xy ; tma->zx = so->xy * so->yz - so->yy * so->xz ; tma->xy = so->yz * so->zx - so->zz * so->yx ; tma->zy = so->yx * so->xz - so->xx * so->yz ; tma->xz = so->zy * so->yx - so->yy * so->zx ; tma->yz = so->zx * so->xy - so->xx * so->zy ; div = so->xx * tma->xx +so->yx * tma->yx +so->zx * tma->zx ; if (fabs(div) > 0.000001) { ma->xx = tma->xx / div ; ma->yx = tma->xy / div ; ma->zx = tma->xz / div ; ma->xy = tma->yx / div ; ma->yy = tma->yy / div ; ma->zy = tma->yz / div ; ma->xz = tma->zx / div ; ma->yz = tma->zy / div ; ma->zz = tma->zz / div ; } } /* the strings maybe wrong */ translation record_types[] = { { RECORD_ATOM, "ATOM "}, { RECORD_HETATM, "HETATM"}, { RECORD_DUMMY, "DUMMY "}, { RECORD_SSBOND, "SSBOND"}, { RECORD_REMARK, "REMARK"} , { RECORD_EXPDTA, "EXPDTA"} , { RECORD_MODEL, "MODEL "} , { RECORD_ENDMDL, "ENDMDL"} , { RECORD_HEADER, "HEADER"} , { RECORD_CRYST, "CRYST1"}, { RECORD_SCALE, "SCALE1"}, { RECORD_SCALE, "SCALE2"}, { RECORD_SCALE, "SCALE3"} } ; /* find the type of the record from the string in the beginning record. return 0 if it is unknown */ int find_record_type (char *string) { int ii ; for (ii= 0 ; ii< sizeof ( record_types)/ sizeof(translation); ii++) if (! strncmp (string , record_types[ii].string, 6)) return record_types[ii].number ; return 0; } /* */ void copy_all_record (char *string,atom_record *newrec) { strncpy(newrec->ident, string, 100) ; } /* SSBOND 1 CYS 5 CYS 55 5PTI 67 SSBOND 3 CYS D 87H CYS D 133 01234567890123456789012345678901234567890 00000000001111111111222222222233333333334 */ static int ss_bonded[200] ; static int ss_bonded_x[200] ; static int ss_bonded_c[200] ; static int max_ssbonded ; void record_ssbond(char * string) { if(string[11] == 'C' && string[12] == 'Y' && string[13] == 'S') { read_residue_spec(string + 15, &ss_bonded[max_ssbonded], &ss_bonded_c[max_ssbonded], &ss_bonded_x[max_ssbonded]) ; max_ssbonded++ ; } if(string[25] == 'C' && string[26] == 'Y' && string[27] == 'S') { read_residue_spec(string + 29, &ss_bonded[max_ssbonded], &ss_bonded_c[max_ssbonded], &ss_bonded_x[max_ssbonded]) ; max_ssbonded++ ; } } int action_flag = 0 ; void check_for_nmr (char *string) { if (strstr(string , "NMR") != NULL) action_flag = -1 ; } char * file_header ; void record_header(char *string) { maybe_free(file_header) ; file_header = new_string( string) ; } void start_model (char *string) { model_number = atoi(string + 11 ) ; } void record_end_model(char *string) { action_flag = ACTION_END_MODEL ; } void record_cryst (char *string) { current_file->a = strtod (string + 7,NULL) ; current_file->b = strtod (string + 16,NULL) ; current_file->c = strtod (string + 25,NULL) ; current_file->alpha = strtod (string + 34,NULL) ; current_file->beta = strtod (string + 41,NULL) ; current_file->gamma = strtod (string + 48,NULL) ; { char *fill = current_file->sg.s_name; char *read = string +55 ; while (read < string + 66) { char cc = *read ++ ; if (!isspace (cc)) *fill++ = cc; } *fill = '\0' ; } } void record_scale(char *string) { double *xpoi,*ypoi,*zpoi, *tpoi ; symmetry_operation *so = ¤t_file->scale; switch (string[5]) { case '1' : zpoi = &so->zx ; ypoi = &so->yx; xpoi = &so->xx; tpoi = &so->addx ; break; case '2' : zpoi = &so->zy ; ypoi = &so->yy; xpoi = &so->xy; tpoi = &so->addy ; break; case '3' : zpoi = &so->zz ; ypoi = &so->yz; xpoi = &so->xz; tpoi = &so->addz ; break; } sscanf(string +11 , LF_STRING_16, xpoi,ypoi,zpoi,tpoi) ; } function_translation other_type_functions[] = {{ RECORD_SSBOND, record_ssbond} , {RECORD_EXPDTA, check_for_nmr} , {RECORD_SCALE, record_scale} , {RECORD_CRYST, record_cryst} , {RECORD_HEADER, record_header} , {RECORD_MODEL , start_model} , {RECORD_ENDMDL , record_end_model}} ; void maybe_call_specific_function (int record_type, char *string) { int ii ; for (ii= 0 ; ii< sizeof ( other_type_functions)/ sizeof(function_translation); ii++) if (record_type == other_type_functions[ii].number) { (*other_type_functions[ii].function )(string) ; return ; } } /* The following functions record any cys sg atoms, and chek if it is in ssbond distance from any other sg atom. This is done only if the file does not contain ssbond records, which are assumed to be correct */ atom_record *sg_atoms[300] ; int max_sg_atom ; void add_suphur_bridges(file_records *file) { int ii, jj; for(ii= 0 ; ii< max_sg_atom - 1 ; ii++) { atom_record * atom = sg_atoms[ii] ; if(!(atom->flags &(IGNORE_ATOM | SULPHUR_BRIDGE))) { for (jj = ii +1 ; jj < max_sg_atom ; jj++) { atom_record * atom1 = sg_atoms[jj] ; if (!(atom1->flags &IGNORE_ATOM) && atoms_distance(atom,atom1) < 2.5 && (atom->resnum != atom1->resnum || atom->chain != atom1->chain || atom->x_char != atom1->x_char )) { print_an_atom(atom, stderr); print_an_atom(atom1, stderr); fprintf(stderr, " adding sulpur bridge\n") ; SWITCH_ON(atom,SULPHUR_BRIDGE); SWITCH_ON(atom1,SULPHUR_BRIDGE); { residue_record * res = residue_by_number(file, atom->resnum,atom->chain,atom->x_char) ; if(res != NULL) res->flags |= SULPHUR_BRIDGE ; } { residue_record * res = residue_by_number(file, atom1->resnum,atom1->chain,atom1->x_char) ; if(res != NULL) res->flags |= SULPHUR_BRIDGE ; } } } } } } void record_sg_atom(atom_record *atom) { sg_atoms[max_sg_atom++] = atom; } /* read a record from the file. if always is non-zero, return records for non-atoms records, too, otherwise skip them. Return nil on end of file */ /* brookhaven format : ATOM 131 N ASN I 19 13.270 -37.510 4.242 1.00 57.52 100.25 100.25 12345|1234| 123||123||123|| 1234567|1234567|1234567|12345|12345| 12345| 12345| 0000000000111111111122222222223333333333444444444455555555556666666666777777777788888888889999 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123 */ atom_record *read_record (FILE *ff,int format) { char string [200] ; int ii , type; int allrecords = format & IO_ALL_RECORDS ; int read_hydrogen = format & IO_HYDROGEN ; while (1) { char *sa = fgets (string, 200, ff) ; if (sa == NULL) return NULL ; if (strlen(string) < 6) continue; type = find_record_type(string) ; if (RECORD_ATOM > type) { maybe_call_specific_function (type,string) ; if (file_header == NULL) file_header = new_string(string) ; if (allrecords) { atom_record * newrec = (atom_record *) malloc(sizeof(atom_record)); newrec->atomtype = type ; copy_all_record (string, newrec); return newrec ; } if (action_flag == ACTION_END_MODEL) return NULL; } else { char t12 = string[12] ; char t13= string[13] ; if (t12 == ' ' || isdigit(t12)) t12 = t13 ; if (read_hydrogen || (t12 != 'H' & t12 != 'D' )) { atom_record * newrec = (atom_record *) malloc(sizeof(atom_record)); char *name = newrec->name ; int flags = 0 ; newrec->atomtype = type ; strncpy(newrec->ident,string, 6) ; newrec->ident[6] = 0 ; sscanf (string +6 ,"%5d" , &newrec->number) ; strncpy(newrec->resname,string+17, 3) ; if (string[12] == ' ') { strncpy(name,string+13, 3) ; name[3] = '\0'; if(name[0] == 'S') { if ( name[1] == 'G' && name[2] == ' ' &&name[3] == '\0' && newrec->resname[0] == 'C' && newrec->resname[1] == 'Y' && newrec->resname[2] == 'S') record_sg_atom(newrec) ; } else if(type == RECORD_ATOM && ((name[0] == 'C' && name[1] == ' ') || (name[0] == 'C' && name[1] == 'A' && name[2] == ' ')|| (name[0] == 'O' && name[1] == ' ') || (name[0] == 'N' && name[1] == ' ') )) flags |= MAIN_CHAIN_ATOM ; } else strncpy(name,string+12, 4) ; newrec->extraflag= string[16] ; if (newrec->extraflag != ' ' && newrec->extraflag != 'A') flags |= IGNORE_ATOM ; newrec->resname[3] = 0 ; { char chain = string[21] ; if (newrec->atomtype != RECORD_ATOM) chain = tolower(chain) ; newrec->chain = chain; } newrec->x_char = string[26] ; sscanf(string+22, "%4d", &newrec->resnum); sscanf(string+30, LF_STRING_07, &newrec->x,&newrec->y,&newrec->z, &newrec->occ,&newrec->b); newrec->restype = find_residue_type(newrec->resname) ; newrec->definition = find_atom_definition(newrec->restype,newrec->name) ; sscanf(string+67, LF_STRING_08,&newrec->surface, &newrec->volume) ; newrec->flags = flags ; return newrec; } } } } /* reads a pdb file. This ignores non-atom records */ file_records *read_pdb_file (FILE *ff,int format) { int num = 0 ; atom_record * res ; int do_break = 0 ; int max = 200 ; int len = max * sizeof(res); atom_record **temp_atom = (atom_record **)malloc (len) ; file_records temp_file ; bzero(&temp_file, sizeof(file_records)) ; current_file = & temp_file; max_sg_atom = 0; model_number = -1 ; action_flag = 0 ; while (!do_break) { atom_record *res = read_record(ff, format) ; if (res == NULL) break; temp_atom[num++] = res ; switch(action_flag) { case ACTION_NMR : if (read_nmr_files <= 0) { for(max = 0 ; max < num ; max++) free_atom_record(temp_atom[max]) ; return NULL ; } ; break; case ACTION_END_MODEL :do_break = 1 ; break ; } if (num >= max ) { atom_record **new =(atom_record **) malloc (len + len) ; bcopy(temp_atom, new, len) ; free (temp_atom) ; temp_atom = new ; len = len +len ; max = max + max ; } } { file_records * file =(file_records *) malloc (sizeof (file_records )) ; int ii ; bcopy(&temp_file,file, sizeof(file_records)) ; invert_the_scale(file) ; file->atoms = temp_atom ; file->atomnum = num ; make_residues(file) ; for(ii = 0; ii< max_ssbonded ; ii++) { residue_record *res = residue_by_number(file, ss_bonded[ii], ss_bonded_c[ii], ss_bonded_x[ii]); if (res != NULL) { atom_record *atom = find_atom_in_residue(res,"SG") ; if (atom != NULL) SWITCH_ON(atom,SULPHUR_BRIDGE) ; res->flags |= SULPHUR_BRIDGE ; } } if (((!max_ssbonded && check_sulphur_bridges ) || check_sulphur_bridges == 2) && max_sg_atom ) add_suphur_bridges(file) ; file->name = NULL ; LinkUp_Residues_Atoms(file); return file ; } } /* Take a name of a file and return a file_records of it, or NULL if failed to open it */ file_records *open_and_read_pdb (char * name,int format) { if (dumm_residue == NULL) serious_error(1, "do_initialization was not called before reading a file","") ; fprintf(stderr,"open_and_read_pdb(): Trying to read %s\n", name) ; { FILE * ff= fopen(name,"r") ; file_header = NULL ; max_ssbonded = 0 ; if (ff == NULL) { fprintf(stderr,"open_and_read_pdb(): Unable to open file, returning NULL.\n"); return(NULL) ; } else { file_records *new = read_pdb_file ( ff, format) ; fclose(ff) ; if (new != NULL) { new->name = new_string(name); new->header = file_header; } fprintf(stderr,"open_and_read_pdb(): Finished reading %s\n", name) ; return new ; } } } file_records *repeat_read_pdb (FILE *ff, file_records *file,int format) { file_records * new = read_pdb_file(ff,format) ; if (action_flag != ACTION_END_MODEL) { if (file != NULL) free_file_records(file) ; if (new != NULL) free_file_records(new) ; return NULL; } if(file != NULL) { new->name = file->name; new->header = file->header ; file->header = NULL; file->name = NULL; free_file_records(file) ; } else new->header = file_header ; return new ; } FILE *open_for_repeat_read (char *name) { read_nmr_files = 1; max_ssbonded = 0 ; max_sg_atom = 0; return fopen(name,"r"); } /* write the header for a pdb file. */ void write_pdb_header(FILE * ff , file_records *file, int format) { } /* brookhaven format : ATOM 131 N ASN I 19 13.270 -37.510 4.242 1.00 57.52 100.25 100.25 12345|1234| 123||123||123| 1234567|1234567|1234567|12345|12345| 12345| 12345| 0000000000111111111122222222223333333333444444444455555555556666666666777777777788888888889999 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123 */ void write_pdb_record (FILE *ff, atom_record * atom,int format) { if (atom == NULL) { fprintf(stderr,"write_pdb_record(): atom == NULL\n"); } else if (RECORD_ATOM > atom->atomtype) { fputs(atom->ident, ff) ; } else { if( (format & IO_ONLY_SELECTED) && (atom->flags & (IGNORE_ATOM | DONT_CALCULATE_ATOM))) return ; if( (format & IO_NOT_IGNORED) && (atom->flags & IGNORE_ATOM)) return ; if (format & IO_SHORT) fprintf (ff , "%s%3.3d:%4.4s ", atom->resname,atom->resnum, atom->name) ; else fprintf(ff , atom->name[3] == '\0' ? LF_STRING_09 : LF_STRING_10 atom->ident, atom->number, atom->name, atom->extraflag, atom->resname, atom->chain, atom->resnum, atom->x_char, atom->x, atom->y, atom->z, atom->occ, atom->b) ; if (format & IO_FOOTPLUS) { char * footnote = (atom->footnote == NULL ? " " : atom->footnote) ; char * strg71on = (atom->strg71on == NULL ? " " : atom->strg71on) ; /* PDB format from here on is 1X,I3 */ fprintf(ff," %3s%-10s",footnote,strg71on); } if (format & IO_SURFACE) fprintf(ff,LF_STRING_11, atom->surface); if (format & IO_VOLUME) fprintf(ff,(format & IO_SURFACE) ? LF_STRING_12 : LF_STRING_13, atom->volume,atom->dumm_solvents); fputc('\n', ff) ; } } file_records * write_pdb_file (FILE *ff, file_records *file,int format) { int num ; for (num = 0 ; num < file->atomnum ; num++) write_pdb_record (ff , file->atoms[num],format) ; return file ; } /* Take a name of a file and return a file_records of it, or NULL if failed to open it */ file_records * open_and_write_pdb (char *name, file_records * file,int format) { FILE * ff = fopen (name , "w") ; if (ff == NULL) return NULL ; write_pdb_header(ff, file,format) ; write_pdb_file(ff, file,format) ; return file ; } /* MBG */ /* ============== */ file_records *open_pdb_stdin (int format) /* ============== */ { if (dumm_residue == NULL) serious_error(1, "do_initialization was not called before reading a file","") ; fprintf(stderr, "Trying to read stdin\n") ; file_header = NULL ; max_ssbonded = 0 ; { file_records *new = read_pdb_file ( stdin, format) ; if (new != NULL) { new->name = "stdin" ; new->header = file_header; } return new ; } }