# MACROS that need to be set: # TARGET= SET_IN_Makefile # REDO-SEED if set, refetch seed from orf_trans # REDO-SEARCHES if set, causes the template library to be # included in the dependencies of the searches. # (if not defined, then template library not in dependencies # REDO-T2K if set, causes NR to be included in the dependencies # for the t2k iterative search, usually triggering rebuilding # the t2k.a2m.gz file. # macros that control the t2k iterative search # BLAST_MAX default 10,000, set it smaller for long proteins with many # homologs that take a long time to run. # ADPSTYLE default 5, set it to 1 for long proteins that cause hmmscore # to crash in the final alignment step of target2k # macros that control compression # NOGZIP_PDB if set causes undertaker.pdb file not to be gzipped. # macros used in recursive makes for pairwise alignments: # PRED needs to be set to template chain ID in recursive makes for # pairwise alignments # PRED2 first two letters of PRED, set at the same time as PRED. # ALIGN_VITERBI if set, causes only ADP=1 and not ADP=5 alignments to # be tried # The following macros are set automatically when making pairwise alignments. # ALIGN-TYPE local or global, set when generating pairwise alignments # ALIGN-NAME # ADP # TWO_TRACK_ADP (used to set ADP for two-track alignments) # SW # FSSP # FSSP2 # STRUCT-ALPH # STRUCT-WEIGHT # TO DO: # * Add more pseudo-targets to get finer-grained output to summary.html # # get the version of the compiler used on this machine # (not needed for running on cluster, as we are not checking GCC_VERSION # and g++ is not installed) # ifndef GCC_VERSION # GCC_VERSION := ${shell g++ --version} # endif # ifneq '${words ${GCC_VERSION}}' '1' # GCC_VERSION := ${word 3,${GCC_VERSION}} # endif # Normally compute mutual information for all column pairs, # but turn it off if NO-MUTUAL is set. # (computation is now fast enough that this shouldn't be necessary) ifndef NO-MUTUAL MUTUAL-DEPEND = mutual_info_start_section mutual_info 8.end_section else MUTUAL-DEPEND = endif # what is first column for numbering alignments and mutual information ifndef START-COL START-COL := 1 endif AL_METHOD= t2k TARG_AL = ${TARGET}.${AL_METHOD} WEBROOTDIR=/projects/compbio/experiments/protein-predict/SAM_T02 YEAST = /projects/compbio/experiments/protein-predict/yeast YEAST-SCRIPTS = ${YEAST}/scripts UNDERTAKER-SCRIPTS = /cse/faculty/karplus/undertaker/scripts ifndef UNDERTAKER # UNDERTAKER = /cse/faculty/karplus/undertaker/undertaker # UNDERTAKER = ${PCB-SUB}/undertaker UNDERTAKER = /projects/compbio/programs/undertaker/undertaker endif WORKDIR := $(shell pwd) HOST := $(shell hostname) PCL = /projects/compbio/lib PCB = /projects/compbio/bin PCBS = ${PCB}/scripts EXTRACT = ${PCBS}/extract-from-fasta # how many residues wide should each row of the logos be? # Ideally, we'd like this to be computed from the sequence length, # with length<=200 yielding 50 # 200 ${TARGET}.ids ${EXTRACT} ${TARGET}.ids < ${MASTER_LIST} > $@ #create html results page ifndef CREATE_SUMMARY_SCRIPT CREATE_SUMMARY_SCRIPT = ${YEAST-SCRIPTS}/create_summary_html endif summary_create: ${TARGET}.a2m ${CREATE_SUMMARY_SCRIPT} \ ${TARGET} ${TARGET}.a2m> ${WORKDIR}/summary.html #start the inputs section summary_inputs: ${YEAST-SCRIPTS}/add_section_head_summary_html \ Inputs >> ${WORKDIR}/summary.html #acknowledge sequence receipt #add pointer to sequence file to the html results page receipt_ack: ${TARGET}.a2m echo Received sequence ${TARGET}.a2m ${YEAST-SCRIPTS}/add_summary_html \ "Submitted sequence(s)" \ ${TARGET}.a2m >> ${WORKDIR}/summary.html ${YEAST-SCRIPTS}/add_summary_html \ "README file" \ README >> ${WORKDIR}/summary.html ${YEAST-SCRIPTS}/add_summary_html \ "Provided documentation" \ ${TARGET}.doc.html >> ${WORKDIR}/summary.html summary_alignment: ${YEAST-SCRIPTS}/add_section_head_summary_html \ 'Multiple alignment' >> ${WORKDIR}/summary.html build_t2k_alignment: ${TARG_AL}.a2m.gz ${YEAST-SCRIPTS}/add_summary_html \ "SAM_T02 multiple alignment in a2m format" \ $^ >> ${WORKDIR}/summary.html build_pretty_alignment: ${TARG_AL}.pa.html ${YEAST-SCRIPTS}/add_summary_html \ "SAM_T02 multiple alignment in pretty html format" \ $^ >> ${WORKDIR}/summary.html build_mod: ${TARG_AL}-w0.5.mod ${YEAST-SCRIPTS}/add_summary_html \ "SAM_T02 target hidden Markov model" \ $^ >> ${WORKDIR}/summary.html #start the two-track section summary_2track: ${YEAST-SCRIPTS}/add_section_head_summary_html \ '\ Secondary Structure Prediction\ \ (Explanation of secondary-structure predictions) \ ' \ >> ${WORKDIR}/summary.html 2track: ${TARG_AL}-thin90.a2m.gz \ do_str2 str2_logo_record \ do_dssp dssp_logo_record \ do_stride stride_logo_record \ do_alpha alpha_logo_record \ do_CB_burial_14_7 CB_burial_14_7_logo_record \ do_dssp-ehl2 dssp-ehl2_logo_record do_dssp: ${TARG_AL}.dssp-ebghstl.rdb \ ${TARG_AL}.dssp-ebghstl.seq \ ${TARG_AL}.dssp-ebghstl.mod \ ${TARG_AL}.dssp-color.rasmol ${YEAST-SCRIPTS}/add_summary_multiple_formats \ "DSSP EBGHSTL structure prediction" \ ${TARG_AL}.dssp-ebghstl \ rdb seq \ >> ${WORKDIR}/summary.html do_stride: ${TARG_AL}.stride-ebghtl.rdb \ ${TARG_AL}.stride-ebghtl.seq \ ${TARG_AL}.stride-ebghtl.mod \ ${TARG_AL}.stride-color.rasmol ${YEAST-SCRIPTS}/add_summary_multiple_formats \ "Stride EBGHTL structure prediction" \ ${TARG_AL}.stride-ebghtl \ rdb seq \ >> ${WORKDIR}/summary.html do_str2: ${TARG_AL}.str2.rdb \ ${TARG_AL}.str2.seq \ ${TARG_AL}.str2.mod \ ${TARG_AL}.str2-color.rasmol ${YEAST-SCRIPTS}/add_summary_multiple_formats \ "Str2 structure prediction" \ ${TARG_AL}.str2 \ rdb seq \ >> ${WORKDIR}/summary.html do_alpha: ${TARG_AL}.alpha.rdb \ ${TARG_AL}.alpha.seq \ ${TARG_AL}.alpha.mod \ ${TARG_AL}.alpha-color.rasmol ${YEAST-SCRIPTS}/add_summary_multiple_formats \ "Alpha angle prediction" \ ${TARG_AL}.alpha\ rdb seq \ >> ${WORKDIR}/summary.html do_CB_burial_14_7: ${TARG_AL}.CB_burial_14_7.rdb \ ${TARG_AL}.CB_burial_14_7.seq \ ${TARG_AL}.CB_burial_14_7.mod \ ${TARG_AL}.CB_burial_14_7-color.rasmol ${YEAST-SCRIPTS}/add_summary_multiple_formats \ "Burial (CB_14_7) prediction" \ ${TARG_AL}.CB_burial_14_7\ rdb seq \ >> ${WORKDIR}/summary.html do_dssp-ehl2: ${TARG_AL}.dssp-ehl2.rdb \ ${TARG_AL}.dssp-ehl2 \ ${TARG_AL}.dssp-ehl2.mod ${YEAST-SCRIPTS}/add_summary_multiple_formats \ "DSSP 3-value prediction" \ ${TARG_AL}.dssp-ehl2 \ rdb seq \ >> ${WORKDIR}/summary.html # BUG: don't have rdb->seq format conversion for dssp-ehl2 # ${TARG_AL}.dssp-ehl2-color.rasmol # ${YEAST-SCRIPTS}/add_summary_html \ # "DSSP 3-value prediction sequence format" \ # ${TARG_AL}.dssp-ehl2.seq >> ${WORKDIR}/summary.html w0.5_logo: ${TARG_AL}.w0.5-logo.eps ${TARG_AL}.w0.5-logo.pdf w0.5_logo_record: ${TARG_AL}.w0.5-logo.eps ${TARG_AL}.w0.5-logo.pdf ${YEAST-SCRIPTS}/add_summary_multiple_formats \ "SAM_T02 multiple alignment---sequence logo" \ ${TARG_AL}.w0.5-logo \ eps pdf >> ${WORKDIR}/summary.html dssp_logo: ${TARG_AL}.dssp-ebghstl-logo.eps ${TARG_AL}.dssp-ebghstl-logo.pdf dssp_logo_record: ${TARG_AL}.dssp-ebghstl-logo.eps ${TARG_AL}.dssp-ebghstl-logo.pdf ${YEAST-SCRIPTS}/add_summary_multiple_formats \ "DSSP EBGHSTL prediction---sequence logo" \ ${TARG_AL}.dssp-ebghstl-logo \ eps pdf \ >> ${WORKDIR}/summary.html echo '
' >> ${WORKDIR}/summary.html stride_logo: ${TARG_AL}.stride-ebghtl-logo.eps ${TARG_AL}.stride-ebghtl-logo.pdf stride_logo_record: ${TARG_AL}.stride-ebghtl-logo.eps ${TARG_AL}.stride-ebghtl-logo.pdf ${YEAST-SCRIPTS}/add_summary_multiple_formats \ "Stride EBGHTL prediction---sequence logo" \ ${TARG_AL}.stride-ebghtl-logo \ eps pdf \ >> ${WORKDIR}/summary.html echo '
' >> ${WORKDIR}/summary.html str2_logo: ${TARG_AL}.str2-logo.eps ${TARG_AL}.str2-logo.pdf str2_logo_record: ${TARG_AL}.str2-logo.eps ${TARG_AL}.str2-logo.pdf ${YEAST-SCRIPTS}/add_summary_multiple_formats \ "Str2 prediction---sequence logo" \ ${TARG_AL}.str2-logo \ eps pdf >> ${WORKDIR}/summary.html echo '
' >> ${WORKDIR}/summary.html alpha_logo: ${TARG_AL}.alpha-logo.eps ${TARG_AL}.alpha-logo.pdf alpha_logo_record: ${TARG_AL}.alpha-logo.eps ${TARG_AL}.alpha-logo.pdf ${YEAST-SCRIPTS}/add_summary_multiple_formats \ "Alpha angle prediction---sequence logo" \ ${TARG_AL}.alpha-logo \ eps pdf \ >> ${WORKDIR}/summary.html echo '
' >> ${WORKDIR}/summary.html CB_burial_14_7_logo: ${TARG_AL}.CB_burial_14_7-logo.eps ${TARG_AL}.CB_burial_14_7-logo.pdf CB_burial_14_7_logo_record: ${TARG_AL}.CB_burial_14_7-logo.eps ${TARG_AL}.CB_burial_14_7-logo.pdf ${YEAST-SCRIPTS}/add_summary_multiple_formats \ "Burial CB_14_7 prediction---sequence logo" \ ${TARG_AL}.CB_burial_14_7-logo \ eps pdf \ >> ${WORKDIR}/summary.html echo '
' >> ${WORKDIR}/summary.html dssp-ehl2_logo: ${TARG_AL}.dssp-ehl2-logo.eps ${TARG_AL}.dssp-ehl2-logo.pdf dssp-ehl2_logo_record: ${TARG_AL}.dssp-ehl2-logo.eps ${TARG_AL}.dssp-ehl2-logo.pdf ${YEAST-SCRIPTS}/add_summary_multiple_formats \ "DSSP 3-value prediction---sequence logo" \ ${TARG_AL}.dssp-ehl2-logo \ eps pdf \ >> ${WORKDIR}/summary.html # echo '
' >> ${WORKDIR}/summary.html #start the target model scores section summary_target_mod_scores: ${YEAST-SCRIPTS}/add_section_head_summary_html \ "Target model scores" >> ${WORKDIR}/summary.html 2track_target_mod_scores: \ ${TARG_AL}-100-30-str2.mlib \ ${TARG_AL}-100-30-str2.dist \ ${AL_METHOD}-100-30-str2-scores \ ${TARG_AL}-100-30-dssp-ebghstl.mlib \ ${TARG_AL}-100-30-dssp-ebghstl.dist \ ${AL_METHOD}-100-30-dssp-ebghstl-scores \ ${TARG_AL}-100-30-stride-ebghtl.mlib \ ${TARG_AL}-100-30-stride-ebghtl.dist \ ${AL_METHOD}-100-30-stride-ebghtl-scores \ ${TARG_AL}-100-30-alpha.mlib \ ${TARG_AL}-100-30-alpha.dist \ ${AL_METHOD}-100-30-alpha-scores \ ${TARG_AL}-100-30-dssp-ehl2.mlib \ ${TARG_AL}-100-30-dssp-ehl2.dist \ ${AL_METHOD}-100-30-dssp-ehl2-scores \ ${TARG_AL}-100-30-CB_burial_14_7.mlib \ ${TARG_AL}-100-30-CB_burial_14_7.dist \ ${AL_METHOD}-100-30-CB_burial_14_7-scores \ ${TARG_AL}-100-40-40-str2+CB_burial_14_7.mlib \ ${TARG_AL}-100-40-40-str2+CB_burial_14_7.dist \ ${AL_METHOD}-100-40-40-str2+CB_burial_14_7-scores ############################################################ # BUG: The add_summary command picks up the date from the # html file, but really should use the date from the underlying # .dist file, which is when the search was done. ############################################################ ${AL_METHOD}-%-scores: ${TARG_AL}-%-scores.rdb \ ${TARG_AL}-%-scores.html ${YEAST-SCRIPTS}/add_summary_html \ "Annotated amino acid/$* two-track target model scores" \ ${TARG_AL}-$*-scores.html >> ${WORKDIR}/summary.html 1track_target_mod_scores: ${TARG_AL}-w0.5.mlib \ ${TARG_AL}-w0.5.dist \ ${TARG_AL}-w0.5-scores.rdb \ ${TARG_AL}-w0.5-scores.html ${YEAST-SCRIPTS}/add_summary_html \ "Annotated amino acid single-track target model scores of PDB" \ ${TARG_AL}-w0.5-scores.html >> ${WORKDIR}/summary.html #start the template model scores section summary_template_mod_scores: ${YEAST-SCRIPTS}/add_section_head_summary_html \ "Template model scores" >> ${WORKDIR}/summary.html template_mod_scores: ${TARGET}.template-lib-scores.rdb \ ${TARGET}.template-lib-scores.html ${YEAST-SCRIPTS}/add_summary_html \ "Annotated template model scores" \ ${TARGET}.template-lib-scores.html >> ${WORKDIR}/summary.html #start the top hits section summary_top_hits: ${YEAST-SCRIPTS}/add_section_head_summary_html \ "Top Hits" >> ${WORKDIR}/summary.html top_hits: ${TARG_AL}.best-scores.rdb \ ${TARG_AL}.best-scores.html ${YEAST-SCRIPTS}/add_summary_html \ "Best scoring hits from all models" \ ${TARG_AL}.best-scores.html >> ${WORKDIR}/summary.html #start the top alignments section summary_top_alignments: ${YEAST-SCRIPTS}/add_section_head_summary_html \ "Alignments for Top Hits" >> ${WORKDIR}/summary.html build_top_alignments: ${TARG_AL}.best-scores.rdb ${TARGET}.mod ifdef VITERBI_ALIGN ${YEAST-SCRIPTS}/make-alignments ${TARGET} VITERBI_ALIGN=1 < $< else ${YEAST-SCRIPTS}/make-alignments ${TARGET} < $< endif final_predictions: many-alignments top-alignments many-alignments: ${TARG_AL}.predicted_alignments.rdb \ ${TARG_AL}.many_alignments.html ${YEAST-SCRIPTS}/add_summary_html \ "T02 Many Alignments Summary" \ ${TARG_AL}.many_alignments.html >> ${WORKDIR}/summary.html top-alignments: ${TARG_AL}.top_reported_alignments.rdb \ ${TARG_AL}.top_reported_alignments.html ${YEAST-SCRIPTS}/add_summary_html \ "T02 Top Models Summary" \ ${TARG_AL}.top_reported_alignments.html >> ${WORKDIR}/summary.html remove-top-reported-alignments: -rm ${TARG_AL}.top_reported_alignments.rdb remove-best-scores: -rm ${TARG_AL}.best-scores.* rasmol_scripts: \ dssp_color_script \ stride_color_script \ str2_color_script \ CB_burial_14_7_color_script \ alpha_color_script \ conserved_script # BUG: don't have way to create color script for dssp-ehl2 yet # dssp-ehl2_color_script %_color_script: ${TARG_AL}.%-color.rasmol ${YEAST-SCRIPTS}/add_summary_html \ "Script for $* coloring in rasmol" \ $^ >> ${WORKDIR}/summary.html conserved_script: conserved ${YEAST-SCRIPTS}/add_summary_html \ "Script for $* coloring in rasmol" \ $^ >> ${WORKDIR}/summary.html undertaker_multi_align: ${TARG_AL}-2track-undertaker.a2m ${YEAST-SCRIPTS}/add_summary_html \ "Multiple alignment for undertaker templates" \ $^ >> ${WORKDIR}/summary.html undertaker_from_many: ${TARG_AL}.undertaker-align.under ${YEAST-SCRIPTS}/add_summary_html \ "Undertaker input for top alignments" \ $^ >> ${WORKDIR}/summary.html ifdef NOGZIP_PDB PDBEXT = pdb else PDBEXT = pdb.gz endif undertaker_show_alignment: ${TARG_AL}.undertaker-align.${PDBEXT} ${YEAST-SCRIPTS}/add_summary_html \ 'PDB file with \ \ model(s) for top alignments' \ $^ >> ${WORKDIR}/summary.html undertaker_add_pictures: jpeg ${YEAST-SCRIPTS}/add_jpeg_views_html ${TARGET} >> ${WORKDIR}/summary.html frag-a2m: ${TARG_AL}.frag.gz ${YEAST-SCRIPTS}/add_summary_html \ "Fragment list for undertaker (from fragfinder)" \ $^ >> ${WORKDIR}/summary.html #end html results page summary_end: ${YEAST-SCRIPTS}/end_summary_html \ >> ${WORKDIR}/summary.html ################# # web-interface # ################# %.end_section: date ${YEAST-SCRIPTS}/end_section_summary_html \ >> ${WORKDIR}/summary.html ######################################## # Building a ${AL_METHOD} alignment from a seed # ######################################## ifndef A2M A2M = ${AL_METHOD}.a2m.gz endif TARGET-A2M = ${TARGET}.${A2M} NR:=/scratch/data/nrp/nr ifeq (($wildcard ${NR}),) NR:=/var/tmp/nrp/nr endif ifeq ($(wildcard ${NR}),) NR=/projects/compbio/data/nrp/nr endif ifndef ADPSTYLE ADPSTYLE = 5 endif ifndef BLAST_MAX BLAST_MAX = 10000 endif ifdef REDO-T2K T2K-DEPEND = ${NR} else T2K-DEPEND = endif %.t2k.a2m.gz: %.a2m ${T2K-DEPEND} ${TARGET2K} -out $*.t2k \ -final_adpstyle ${ADPSTYLE} \ -blast_max_report ${BLAST_MAX} \ -db ${NR} \ -seed $< -tmp_dir /var/tmp gzip -f $*.t2k.a2m rm_empty: -find . -empty -exec rm -f '{}' \; -print -prune #thin the alignment to 90% sequence identity for use with the neural nets #(which were trained on thinned alignments) %-thin90.a2m.gz: %.a2m.gz ${BIN-SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.90 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin62.a2m.gz: %.a2m.gz ${BIN-SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.62 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin50.a2m.gz: %.a2m.gz ${BIN-SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.50 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin40.a2m.gz: %.a2m.gz ${BIN-SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.40 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ %-thin30.a2m.gz: %.a2m.gz ${BIN-SAM}/uniqueseq unique-tmp -alignfile $*.a2m.gz -percent_id 0.30 gzip -9f unique-tmp.a2m mv unique-tmp.a2m.gz $@ #convert an a2m.gz alignment to a human-readable model %-w0.5.mod: %.a2m.gz ${PCBS}/w0.5 $^ $@.tmp ${BIN-SAM}/hmmconvert $*-w0.5 -model_file $@.tmp -rm -f $@.tmp %-w1.0.mod: %.a2m.gz ${PCBS}/w1.0 $^ $@ #compress a file %.gz: % gzip -f $^ #make a pretty-aligned alignment from a compressed a2m alignment %.pa: %.a2m.gz ${BIN-SAM}/prettyalign $^ -m8 -i -n -L3333 > $@ %.pa: %.a2m ${BIN-SAM}/prettyalign $^ -m8 -i -n -L3333 > $@ # Make a2m file with dots. %.dotted-a2m: %.a2m.gz ${BIN-SAM}/prettyalign $^ -f \ | grep -v '^;' > $@ %.dotted-a2m: %.a2m ${BIN-SAM}/prettyalign $^ -f \ | grep -v '^;' > $@ %.pa.html: %.a2m.gz gunzip -c $^ > tmp.a2m ${A2M2HTML} -a2m_in tmp.a2m > $@ rm tmp.a2m %.pa.html: %.a2m ${A2M2HTML} -a2m_in $^ > $@ ################################## # ANNOTATING A TARGET # ################################## # secondary structure prediction # ################################## PCEM-INDEXES = /projects/compbio/experiments/models.97/indexes # PREDICT-2ND = /cse/faculty/karplus/dna/predict-2nd/predict-2nd PREDICT-2ND = ${PCB-SUB}/predict-2nd TEMPLATE-SEQS = ${PCEM-INDEXES}/t2k.x-seqs MIXTURE = ${PCL}/recode3.20comp TRANS-REG = ${PCL}/fssp-trained.regularizer # DSSP-related stuff: EBGHSTL-NET = ${FREEZELIB-PREDICT2ND}/t2k-5740-IDaaHr-5-15-7-15-9-15-13-ebghstl-seeded.net TEMPLATE-EBGHSTL = ${PCEM-INDEXES}/t2k.dssps EBGHSTL-TWOTRACKDBS = ${TEMPLATE-SEQS},${TEMPLATE-EBGHSTL} ifdef REDO-SEARCHES EBGHSTL-DEPEND = ${TEMPLATE-SEQS} ${TEMPLATE-EBGHSTL} else EBGHSTL-DEPEND = endif %.dssp-ebghstl %.dssp-ebghstl.rdb %.dssp-ebghstl.seq : %-thin90.a2m.gz ${EBGHSTL-NET} echo ReadAlphabet ${PCL}/alphabet/DSSP.alphabet >tmp.script echo ReadNeuralNet ${EBGHSTL-NET} >> tmp.script echo ReadA2M $< >> tmp.script # echo PrintPrediction $*.dssp-ebghstl >> tmp.script echo PrintPredictionFasta $*.dssp-ebghstl.seq >> tmp.script echo PrintRDB $*.dssp-ebghstl.rdb >> tmp.script ${PREDICT-2ND} < tmp.script rm tmp.script %.dssp-ebghstl.mod: %.dssp-ebghstl.rdb ${RDB_TO_SAM} ${RDB_TO_SAM} -alphabet EBGHSTL $< $@ #this target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %-100-30-dssp-ebghstl.dist is called %-100-30-dssp-ebghstl.mlib: %-w0.5.mod %.dssp-ebghstl.mod ${HMMSCORE} $*-100-30-dssp-ebghstl \ -calibrate 1 \ -alphabet protein,EBGHSTL \ -trackmod $*-w0.5.mod,$*.dssp-ebghstl.mod \ -db ${EBGHSTL-TWOTRACKDBS} \ -trackcoeff 1.0,0.3 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*-100-30-dssp-ebghstl.dist # -trackprior rsdb-comp2.32comp,t99-2d-comp.9comp %-100-30-dssp-ebghstl.dist: %-100-30-dssp-ebghstl.mlib \ ${EBGHSTL-DEPEND} ${HMMSCORE} dsspfoo \ -modellibrary $< \ -db ${EBGHSTL-TWOTRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax 40. sleep 20 mv -f dsspfoo.1.$@ $@ %-100-30-dssp-ebghstl-scores.rdb: %-100-30-dssp-ebghstl.dist ${SCOP} ${YEAST-SCRIPTS}/annotate_target_scores ${AL_METHOD}-100-30-dssp-ebghstl < $< > $@ %-100-30-dssp-ebghstl-scores.html: %-100-30-dssp-ebghstl-scores.rdb ${YEAST-SCRIPTS}/oneway_hits_rdb2html $*-100-30-dssp-ebghstl-scores < $^ > $@ %.dssp-ebghstl-logo.eps: %.dssp-ebghstl.mod ${BIN-SAM}/makelogo $*.dssp-ebghstl-logo -i $^ \ -logo_start_num ${START-COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO-WIDTH} -logo_title "$* EBGHSTL" \ -logo_caption_f ${TARG_AL}.dssp-ebghstl.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/stride.colors %.dssp-color.rasmol: %.dssp-ebghstl.seq ${YEAST-SCRIPTS}/rasmol_color_from_burial -pdb ${TARGET}.blank.pdb \ -start_col ${START-COL} < $^ > $@ ln -sf $@ dssp # STRIDE-related stuff: EBGHTL-NET= ${FREEZELIB-PREDICT2ND}/t2k-5651-IDaaHr-5-15-7-15-9-15-13-ebghtl-stride-seeded.net TEMPLATE-EBGHTL = ${PCEM-INDEXES}/t2k.2ds EBGHTL-TWOTRACKDBS = ${TEMPLATE-SEQS},${TEMPLATE-EBGHTL} ifdef REDO-SEARCHES EBGHTL-DEPEND = ${TEMPLATE-SEQS} ${TEMPLATE-EBGHTL} else EBGHTL-DEPEND = endif %.stride-ebghtl %.stride-ebghtl.rdb %.stride-ebghtl.seq : %-thin90.a2m.gz ${EBGHTL-NET} echo ReadAlphabet ${PCL}/alphabet/DSSP.alphabet >tmp.script echo ReadNeuralNet ${EBGHTL-NET} >> tmp.script echo ReadA2M $< >> tmp.script # echo PrintPrediction $*.stride-ebghtl >> tmp.script echo PrintPredictionFasta $*.stride-ebghtl.seq >> tmp.script echo PrintRDB $*.stride-ebghtl.rdb >> tmp.script ${PREDICT-2ND} < tmp.script rm tmp.script %.stride-ebghtl.mod: %.stride-ebghtl.rdb ${RDB_TO_SAM} ${RDB_TO_SAM} -alphabet EBGHTL $< $@ #this target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %-100-30-stride-ebghtl.dist is called %-100-30-stride-ebghtl.mlib: %-w0.5.mod %.stride-ebghtl.mod ${HMMSCORE} $*-100-30-stride-ebghtl \ -calibrate 1 \ -alphabet protein,EBGHTL \ -trackmod $*-w0.5.mod,$*.stride-ebghtl.mod \ -db ${EBGHTL-TWOTRACKDBS} \ -trackcoeff 1.0,0.3 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*-100-30-stride-ebghtl.dist # -trackprior rsdb-comp2.32comp,t99-ebghtl-comp.6comp # this target creates mlib and distfile in one step but then it's awkward # to get the distfile printed to the web page in a separate section #%-100-30-stride-ebghtl.mlib: %-w0.5.mod %.stride-ebghtl.mod # ${HMMSCORE} $*-100-30-stride-ebghtl \ # -calibrate 1 \ # -alphabet protein,EBGHTL \ # -trackmod $*-w0.5.mod,$*.stride-ebghtl.mod \ # -db ${EBGHTL-TWOTRACKDBS} \ # -trackcoeff 1.0,0.3 \ # -sw 2 -dpstyle 0 -subtract_null 4 \ # -select_score 8 # ${YEAST-SCRIPTS}/add_summary_html \ # "Calibration results for amino acid/STRIDE EBGHTL two-track target model" \ # $@ >> ${WORKDIR}/summary.html %-100-30-stride-ebghtl.dist: %-100-30-stride-ebghtl.mlib \ ${EBGHTL-DEPEND} ${HMMSCORE} stridefoo \ -modellibrary $< \ -db ${EBGHTL-TWOTRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax 40. sleep 20 mv -f stridefoo.1.$@ $@ %-100-30-stride-ebghtl-scores.rdb: %-100-30-stride-ebghtl.dist ${SCOP} ${YEAST-SCRIPTS}/annotate_target_scores ${AL_METHOD}-100-30-stride-ebghtl < $< > $@ %-100-30-stride-ebghtl-scores.html: %-100-30-stride-ebghtl-scores.rdb ${YEAST-SCRIPTS}/oneway_hits_rdb2html $*-100-30-stride-ebghtl-scores < $^ > $@ #make the logo files with TXXX.t2k.stride-ebghtl-logo.eps # NOT TXXX.stride-ebghtl-logo.eps %.stride-ebghtl-logo.eps: %.stride-ebghtl.mod ${BIN-SAM}/makelogo $*.stride-ebghtl-logo -i $^ \ -logo_start_num ${START-COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO-WIDTH} -logo_title "$* EBGHTL" \ -logo_caption_f ${TARG_AL}.stride-ebghtl.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/stride.colors %.stride-ebghtl-logo-small.eps: %.stride-ebghtl.mod ${BIN-SAM}/makelogo $*.stride-ebghtl-logo-small -i $^ \ -logo_start_num ${START-COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line 110 \ -logo_title "$* EBGHTL" \ -logo_caption_f ${TARG_AL}.stride-ebghtl.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/stride.colors %.stride-color.rasmol: %.stride-ebghtl.seq ${YEAST-SCRIPTS}/rasmol_color_from_burial -pdb ${TARGET}.blank.pdb \ -start_col ${START-COL} < $^ > $@ ln -sf $@ stride # STR2 (extended DSSP) stuff: STR2-NET = ${EXPLIB-PREDICT2ND}/str2/networks/t2k-5651-IDaaHr-5-15-7-15-9-15-13-str2-from-empty.net TEMPLATE-STR2 = ${PCEM-INDEXES}/t2k.str2s STR2-TWOTRACKDBS = ${TEMPLATE-SEQS},${TEMPLATE-STR2} ifdef REDO-SEARCHES STR2-DEPEND = ${TEMPLATE-SEQS} ${TEMPLATE-STR2} else STR2-DEPEND = endif %.str2 %.str2.rdb %.str2.seq : %-thin90.a2m.gz ${STR2-NET} echo ReadAlphabet /projects/compbio/lib/alphabet/str.alphabet > tmp.script echo ReadNeuralNet ${STR2-NET} >> tmp.script echo ReadA2M $< >> tmp.script # echo PrintPrediction $*.str2 >> tmp.script echo PrintPredictionFasta $*.str2.seq >> tmp.script echo PrintRDB $*.str2.rdb >> tmp.script ${PREDICT-2ND} < tmp.script rm tmp.script %.str2.mod: %.str2.rdb ${RDB_TO_SAM} ${RDB_TO_SAM} -alphabet STR2 $< $@ #this target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %-100-30-str2.dist is called %-100-30-str2.mlib: %-w0.5.mod %.str2.mod ${HMMSCORE} $*-100-30-str2 \ -calibrate 1 \ -alphabet protein,STR2 \ -trackmod $*-w0.5.mod,$*.str2.mod \ -db ${STR2-TWOTRACKDBS} \ -trackcoeff 1.0,0.3 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*-100-30-str2.dist # this target creates mlib and distfile in one step but then it's awkward # to get the distfile printed to the web page in a separate section #%-100-30-str2.mlib: %-w0.5.mod %.str2.mod # ${HMMSCORE} $*-100-30-str2 \ # -calibrate 1 \ # -alphabet protein,STR2 \ # -trackmod $*-w0.5.mod,$*.str2.mod \ # -db ${STR2-TWOTRACKDBS} \ # -trackcoeff 1.0,0.3 \ # -sw 2 -dpstyle 0 -subtract_null 4 \ # -select_score 8 \ # ${YEAST-SCRIPTS}/add_summary_html \ # "Calibration results for amino acid/STR2 two-track target model" \ # $@ >> ${WORKDIR}/summary.html %-100-30-str2.dist: %-100-30-str2.mlib \ ${STR2-DEPEND} ${HMMSCORE} str2foo \ -modellibrary $< \ -db ${STR2-TWOTRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax 40. sleep 20 mv -f str2foo.1.$@ $@ %-100-30-str2-scores.rdb: %-100-30-str2.dist ${SCOP} ${YEAST-SCRIPTS}/annotate_target_scores ${AL_METHOD}-100-30-str2 < $< > $@ %-100-30-str2-scores.html: %-100-30-str2-scores.rdb ${YEAST-SCRIPTS}/oneway_hits_rdb2html $*-100-30-str2-scores < $^ > $@ #make the logo files with TXXX.t2k.str2-logo.eps # NOT TXXX.str2-logo.eps %.str2-logo.eps: %.str2.mod ${BIN-SAM}/makelogo $*.str2-logo -i $^ \ -logo_start_num ${START-COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO-WIDTH} \ -logo_title "$* STR2" \ -logo_caption_f ${TARG_AL}.str2.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/str.colors %.str2-logo-small.eps: %.str2.mod ${BIN-SAM}/makelogo $*.str2-logo-small -i $^ \ -logo_start_num ${START-COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line 110 \ -logo_title "$* STR2" \ -logo_caption_f ${TARG_AL}.str2.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/str.colors %.str2-color.rasmol: %.str2.seq ${YEAST-SCRIPTS}/rasmol_color_from_burial -pdb ${TARGET}.blank.pdb \ -start_col ${START-COL} < $^ > $@ ln -sf $@ str2 # ALPHA angle stuff: ALPHA-NET = ${EXPLIB-PREDICT2ND}/alpha/networks/t2k-5651-IDaaHr-5-15-7-15-9-15-13-alpha-seeded.net TEMPLATE-ALPHA = ${PCEM-INDEXES}/t2k.alphas ALPHA-TWOTRACKDBS = ${TEMPLATE-SEQS},${TEMPLATE-ALPHA} ifdef REDO-SEARCHES ALPHA-DEPEND = ${TEMPLATE-SEQS} ${TEMPLATE-ALPHA} else ALPHA-DEPEND = endif %.alpha %.alpha.rdb %.alpha.seq : %-thin90.a2m.gz ${ALPHA-NET} echo ReadAlphabet /projects/compbio/lib/alphabet/alpha.alphabet > tmp.script echo ReadNeuralNet ${ALPHA-NET} >> tmp.script echo ReadA2M $< >> tmp.script # echo PrintPrediction $*.alpha >> tmp.script echo PrintPredictionFasta $*.alpha.seq >> tmp.script echo PrintRDB $*.alpha.rdb >> tmp.script ${PREDICT-2ND} < tmp.script rm tmp.script %.alpha.mod: %.alpha.rdb ${RDB_TO_SAM} ${RDB_TO_SAM} -alphabet ALPHA $< $@ #this target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %-100-30-alpha.dist is called %-100-30-alpha.mlib: %-w0.5.mod %.alpha.mod ${HMMSCORE} $*-100-30-alpha \ -calibrate 1 \ -alphabet protein,ALPHA \ -trackmod $*-w0.5.mod,$*.alpha.mod \ -db ${ALPHA-TWOTRACKDBS} \ -trackcoeff 1.0,0.3 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*-100-30-alpha.dist # this target creates mlib and distfile in one step but then it's awkward # to get the distfile printed to the web page in a separate section #%-100-30-alpha.mlib: %-w0.5.mod %.alpha.mod # ${HMMSCORE} $*-100-30-alpha \ # -calibrate 1 \ # -alphabet protein,ALPHA \ # -trackmod $*-w0.5.mod,$*.alpha.mod \ # -db ${ALPHA-TWOTRACKDBS} \ # -trackcoeff 1.0,0.3 \ # -sw 2 -dpstyle 0 -subtract_null 4 \ # -select_score 8 \ # ${YEAST-SCRIPTS}/add_summary_html \ # "Calibration results for amino acid/ALPHA two-track target model" \ # $@ >> ${WORKDIR}/summary.html %-100-30-alpha.dist: %-100-30-alpha.mlib \ ${ALPHA-DEPEND} ${HMMSCORE} alphafoo \ -modellibrary $< \ -db ${ALPHA-TWOTRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax 40. sleep 20 mv -f alphafoo.1.$@ $@ %-100-30-alpha-scores.rdb: %-100-30-alpha.dist ${SCOP} ${YEAST-SCRIPTS}/annotate_target_scores ${AL_METHOD}-100-30-alpha < $< > $@ %-100-30-alpha-scores.html: %-100-30-alpha-scores.rdb ${YEAST-SCRIPTS}/oneway_hits_rdb2html $*-100-30-alpha-scores < $^ > $@ %.alpha-logo.eps: %.alpha.mod ${BIN-SAM}/makelogo $*.alpha-logo -i $^ \ -logo_start_num ${START-COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO-WIDTH} -logo_title "$* ALPHA" \ -logo_caption_f ${TARG_AL}.alpha.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/alpha.colors %.alpha-logo-small.eps: %.alpha.mod ${BIN-SAM}/makelogo $*.alpha-logo-small -i $^ \ -logo_start_num ${START-COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line 80 \ -logo_title "$* ALPHA" \ -logo_caption_f ${TARG_AL}.alpha.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/alpha.colors %.alpha-color.rasmol: %.alpha.seq ${YEAST-SCRIPTS}/rasmol_color_from_burial -pdb ${TARGET}.blank.pdb \ -start_col ${START-COL} < $^ > $@ ln -sf $@ alpha # CB_BURIAL_14_7 angle stuff: CB_BURIAL_14_7-NET = ${EXPLIB-PREDICT2ND}/CB-burial-14-7/networks/t2k-5631-IDaaHr-5-15-7-15-9-15-13-CB-burial-14-7-from-empty.net TEMPLATE-CB_BURIAL_14_7 = ${PCEM-INDEXES}/t2k.CB-burial-14-7s CB_BURIAL_14_7-TWOTRACKDBS = ${TEMPLATE-SEQS},${TEMPLATE-CB_BURIAL_14_7} ifdef REDO-SEARCHES CB_BURIAL_14_7-DEPEND = ${TEMPLATE-SEQS} ${TEMPLATE-CB_BURIAL_14_7} else CB_BURIAL_14_7-DEPEND = endif %.CB_burial_14_7 %.CB_burial_14_7.rdb %.CB_burial_14_7.seq : %-thin90.a2m.gz ${CB_BURIAL_14_7-NET} echo ReadAlphabet /projects/compbio/lib/alphabet/burial.alphabet > tmp.script echo ReadNeuralNet ${CB_BURIAL_14_7-NET} >> tmp.script echo ReadA2M $< >> tmp.script # echo PrintPrediction $*.CB_burial_14_7 >> tmp.script echo PrintPredictionFasta $*.CB_burial_14_7.seq >> tmp.script echo PrintRDB $*.CB_burial_14_7.rdb >> tmp.script ${PREDICT-2ND} < tmp.script rm tmp.script %.CB_burial_14_7.mod: %.CB_burial_14_7.rdb ${RDB_TO_SAM} ${RDB_TO_SAM} -alphabet CB_BURIAL_14_7 $< $@ #this target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %-100-30-CB_burial_14_7.dist is called %-100-30-CB_burial_14_7.mlib: %-w0.5.mod %.CB_burial_14_7.mod ${HMMSCORE} $*-100-30-CB_burial_14_7 \ -calibrate 1 \ -alphabet protein,CB_BURIAL_14_7 \ -trackmod $*-w0.5.mod,$*.CB_burial_14_7.mod \ -db ${CB_BURIAL_14_7-TWOTRACKDBS} \ -trackcoeff 1.0,0.3 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*-100-30-CB_burial_14_7.dist # this target creates mlib and distfile in one step but then it's awkward # to get the distfile printed to the web page in a separate section #%-100-30-CB_burial_14_7.mlib: %-w0.5.mod %.CB_burial_14_7.mod # ${HMMSCORE} $*-100-30-CB_burial_14_7 \ # -calibrate 1 \ # -alphabet protein,CB_BURIAL_14_7 \ # -trackmod $*-w0.5.mod,$*.CB_burial_14_7.mod \ # -db ${CB_BURIAL_14_7-TWOTRACKDBS} \ # -trackcoeff 1.0,0.3 \ # -sw 2 -dpstyle 0 -subtract_null 4 \ # -select_score 8 \ # ${YEAST-SCRIPTS}/add_summary_html \ # "Calibration results for amino acid/CB_BURIAL_14_7 two-track target model" \ # $@ >> ${WORKDIR}/summary.html %-100-30-CB_burial_14_7.dist: %-100-30-CB_burial_14_7.mlib \ ${CB_BURIAL_14_7-DEPEND} ${HMMSCORE} CB_burial_14_7foo \ -modellibrary $< \ -db ${CB_BURIAL_14_7-TWOTRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax 40. sleep 20 mv -f CB_burial_14_7foo.1.$@ $@ %-100-30-CB_burial_14_7-scores.rdb: %-100-30-CB_burial_14_7.dist ${SCOP} ${YEAST-SCRIPTS}/annotate_target_scores ${AL_METHOD}-100-30-CB_burial_14_7 < $< > $@ %-100-30-CB_burial_14_7-scores.html: %-100-30-CB_burial_14_7-scores.rdb ${YEAST-SCRIPTS}/oneway_hits_rdb2html $*-100-30-CB_burial_14_7-scores < $^ > $@ %.CB_burial_14_7-logo.eps: %.CB_burial_14_7.mod ${BIN-SAM}/makelogo $*.CB_burial_14_7-logo -i $^ \ -logo_start_num ${START-COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO-WIDTH} -logo_title "$* CB_BURIAL_14_7" \ -logo_caption_f ${TARG_AL}.CB_burial_14_7.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/burial7.colors %.CB_burial_14_7-logo-small.eps: %.CB_burial_14_7.mod ${BIN-SAM}/makelogo $*.CB_burial_14_7-logo-small -i $^ \ -logo_start_num ${START-COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line 80 \ -logo_title "$* CB_BURIAL_14_7" \ -logo_caption_f ${TARG_AL}.CB_burial_14_7.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/burial7.colors %.CB_burial_14_7-color.rasmol: %.CB_burial_14_7.seq ${YEAST-SCRIPTS}/rasmol_color_from_burial -pdb ${TARGET}.blank.pdb \ -start_col ${START-COL} \ -color burial7 < $^ > $@ ln -sf $@ CB_burial_14_7 # three-track str2 + CB_BURIAL_14_7 angle stuff: STR2+CB_BURIAL_14_7-THREETRACKDBS = ${TEMPLATE-SEQS},${TEMPLATE-STR2},${TEMPLATE-CB_BURIAL_14_7} ifdef REDO-SEARCHES STR2+CB_BURIAL_14_7-DEPEND = ${TEMPLATE-SEQS} ${TEMPLATE-STR2} ${TEMPLATE-CB_BURIAL_14_7} else STR2+CB_BURIAL_14_7-DEPEND = endif #this target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %-100-40-40-str2+CB_burial_14_7.dist is called %-100-40-40-str2+CB_burial_14_7.mlib: %-w0.5.mod %.str2.mod %.CB_burial_14_7.mod ${HMMSCORE} $*-100-40-40-str2+CB_burial_14_7 \ -calibrate 1 \ -alphabet protein,str2,CB_BURIAL_14_7 \ -trackmod $*-w0.5.mod,$*.str2.mod,$*.CB_burial_14_7.mod \ -db ${STR2+CB_BURIAL_14_7-THREETRACKDBS} \ -trackcoeff 1.0,0.4,0.4 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*-100-40-40-str2+CB_burial_14_7.dist %-100-40-40-str2+CB_burial_14_7.dist: %-100-40-40-str2+CB_burial_14_7.mlib \ ${STR2+CB_BURIAL_14_7-DEPEND} ${HMMSCORE} CB_burial_14_7foo \ -modellibrary $< \ -db ${STR2+CB_BURIAL_14_7-THREETRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax 40. sleep 20 mv -f CB_burial_14_7foo.1.$@ $@ %-100-40-40-str2+CB_burial_14_7-scores.rdb: %-100-40-40-str2+CB_burial_14_7.dist ${SCOP} ${YEAST-SCRIPTS}/annotate_target_scores ${AL_METHOD}-100-40-40-str2+CB_burial_14_7 < $< > $@ %-100-40-40-str2+CB_burial_14_7-scores.html: %-100-40-40-str2+CB_burial_14_7-scores.rdb ${YEAST-SCRIPTS}/oneway_hits_rdb2html $*-100-40-40-str2+CB_burial_14_7-scores < $^ > $@ # DSSP-EHL2 merged prediction stuff: TEMPLATE-DSSP-EHL2 = ${PCEM-INDEXES}/t2k.dssps DSSP-EHL2-TWOTRACKDBS = ${TEMPLATE-SEQS},${TEMPLATE-DSSP-EHL2} ifdef REDO-SEARCHES DSSP-EHL2-DEPEND = ${TEMPLATE-SEQS} ${TEMPLATE-DSSP-EHL2} else DSSP-EHL2-DEPEND = endif %.t2k.dssp-ehl2.rdb: %.t2k.dssp-ebghstl.rdb \ %.t2k.stride-ebghtl.rdb \ %.t2k.str2.rdb \ %.t2k.alpha.rdb ${YEAST-SCRIPTS}/RDBCombine $^ -a SAM-TO2 > $@ %.t2k.dssp-ehl2: %.t2k.dssp-ehl2.rdb ${YEAST-SCRIPTS}/rdb2casp $^ SAM-T02 > $@ %.dssp-ehl2.mod: %.dssp-ehl2.rdb ${RDB_TO_SAM} ${RDB_TO_SAM} -alphabet EHL2 $< $@ #this target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %-100-30-dssp-ehl2.dist is called %-100-30-dssp-ehl2.mlib: %-w0.5.mod %.dssp-ehl2.mod ${HMMSCORE} $*-100-30-dssp-ehl2 \ -calibrate 1 \ -alphabet protein,EHL2 \ -trackmod $*-w0.5.mod,$*.dssp-ehl2.mod \ -db ${DSSP-EHL2-TWOTRACKDBS} \ -trackcoeff 1.0,0.3 \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*-100-30-dssp-ehl2.dist %-100-30-dssp-ehl2.dist: %-100-30-dssp-ehl2.mlib \ ${DSSP-EHL2-DEPEND} ${HMMSCORE} dssp-ehl2foo \ -modellibrary $< \ -db ${DSSP-EHL2-TWOTRACKDBS} \ -db_size ${LIBSIZE} \ -select_score 4 -Emax 40. sleep 20 mv -f dssp-ehl2foo.1.$@ $@ %-100-30-dssp-ehl2-scores.rdb: %-100-30-dssp-ehl2.dist ${SCOP} ${YEAST-SCRIPTS}/annotate_target_scores ${AL_METHOD}-100-30-dssp-ehl2 < $< > $@ %-100-30-dssp-ehl2-scores.html: %-100-30-dssp-ehl2-scores.rdb ${YEAST-SCRIPTS}/oneway_hits_rdb2html $*-100-30-dssp-ehl2-scores < $^ > $@ # BUG: don't have dssp-ehl2.seq, so using dssp-ebghstl.seq %.dssp-ehl2-logo.eps: %.dssp-ehl2.mod %.dssp-ebghstl.seq ${BIN-SAM}/makelogo $*.dssp-ehl2-logo -i $< \ -logo_start_num ${START-COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO-WIDTH} -logo_title "$* DSSP-EHL2" \ -logo_caption_f $*.dssp-ebghstl.seq \ -logo_under_file ${TARGET}.a2m \ -logo_color_file /projects/compbio/lib/dssp.colors %.dssp-ehl2-color.rasmol: %.dssp-ehl2.seq ${YEAST-SCRIPTS}/rasmol_color_from_burial -pdb ${TARGET}.blank.pdb \ -start_col ${START-COL} < $^ > $@ ln -sf $@ ehl ifdef REDO-SEARCHES TEMPLATE-LIB-DEPEND = ${PCEM-INDEXES}/t2k-w0.5-db.mlib else TEMPLATE-LIB-DEPEND = endif #template library scores %.template-lib-scores.rdb: ${TEMPLATE-LIB-DEPEND} ${TARGET}.a2m ${HMMSCORE} $*.template-lib \ -modellibrary ${PCEM-INDEXES}/t2k-w0.5-db.mlib \ -db_size ${LIBSIZE} \ -db ${TARGET}.a2m -rdb 1 \ -select_score 4 -Emax 40 grep -v '^[#]' < $*.template-lib.dist-rdb \ | ${YEAST-SCRIPTS}/shorten_mod_names \ | ${PCB}/row SEQID eq ${TARGET} \ | ${PCB}/sorttbl EVALUE \ > $@ -rm $*.template-lib.dist-rdb %.template-lib-scores.html: %.template-lib-scores.rdb ${SCOP} head -n 500 < $< \ | ${YEAST-SCRIPTS}/annotate_template_scores \ | ${YEAST-SCRIPTS}/oneway_hits_rdb2html $*.template-lib-scores \ > $@ # single-track model pdb scoring # Note: all-protein has the NCBI names, # while SAM_T02/data has our short names for chain ids # PDB_DB = /projects/compbio/data/pdb/all-protein # PDB_DB = /projects/compbio/experiments/protein-predict/SAM_T02/data/pdbaa PDB_DB = /projects/compbio/data/pdb/dunbrack-pdbaa #calibrate the single track model #this target creates the mlib and dist file with no scores # we then remove the dist file so it will be created when # %-w0.5.dist is called %-w0.5.mlib: %-w0.5.mod ${HMMSCORE} $*-w0.5 \ -calibrate 1 \ -i $*-w0.5.mod \ -db ${TEMPLATE-SEQS} \ -sw 2 -dpstyle 0 -subtract_null 4 \ -select_score 0 -rm -f $*-w0.5.dist # -trackprior rsdb-comp2.32comp ifdef REDO-SEARCHES PDB-LIB-DEPEND = ${PDB_DB} else PDB-LIB-DEPEND = endif %-w0.5.dist: %-w0.5.mlib ${PDB-LIB-DEPEND} ${HMMSCORE} w0.5foo \ -modellibrary $< \ -db ${PDB_DB} \ -dbsize ${LIBSIZE} \ -select_score 4 -Emax 40. sleep 20 mv -f w0.5foo.1.$*-w0.5.mod.dist $@ %-w0.5-scores.rdb: %-w0.5.dist ${SCOP} ${YEAST-SCRIPTS}/annotate_target_scores $*-w0.5 < $< > $@ %-w0.5-scores.html: %-w0.5-scores.rdb ${YEAST-SCRIPTS}/oneway_hits_rdb2html $*-w0.5-scores < $^ > $@ ############# # LOGOS # ############# #what is the target.seq if an alignment is submitted ? %.w0.5-logo.eps %.w0.5.saves: %-w0.5.mod %.dssp-ebghstl.seq ${BIN-SAM}/makelogo $*.w0.5-logo -i $< \ -logo_start_num ${START-COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO-WIDTH} -logo_title "$* w0.5" \ -logo_caption_f $*.dssp-ebghstl.seq \ -logo_under_file ${TARGET}.a2m \ -logo_savings_output $*.w0.5.saves ############### # TOP HITS # ############### # report at least this many hits ifndef MIN_NUM_BEST MIN_NUM_BEST := 12 endif # report at most this many hits ifndef MAX_NUM_BEST MAX_NUM_BEST := 50 endif # define the threshold below which you want hits reported. # If ANY of the methods reports a hit this good, it will be included # in ${TARGET}.best_scores.rdb (up to the limit MAX_NUM_BEST) ifndef BEST_EVALUE BEST_EVALUE := 1.e-05 endif #find the best hits (include dupes) %.${AL_METHOD}.best-scores.rdb: \ %.${AL_METHOD}-w0.5-scores.rdb \ %.template-lib-scores.rdb \ %.${AL_METHOD}-100-30-dssp-ebghstl-scores.rdb \ %.${AL_METHOD}-100-30-stride-ebghtl-scores.rdb \ %.${AL_METHOD}-100-30-str2-scores.rdb \ %.${AL_METHOD}-100-30-alpha-scores.rdb \ %.${AL_METHOD}-100-30-CB_burial_14_7-scores.rdb \ %.${AL_METHOD}-100-40-40-str2+CB_burial_14_7-scores.rdb \ %.${AL_METHOD}-100-30-dssp-ehl2-scores.rdb ${YEAST-SCRIPTS}/best_scores \ -num ${MIN_NUM_BEST} -E ${BEST_EVALUE} \ -maxnum ${MAX_NUM_BEST} \ -lib_size ${LIBSIZE} \ $^ > $@ %.best-scores.html: %.best-scores.rdb ${YEAST-SCRIPTS}/oneway_hits_rdb2html $*.best_hits < $^ > $@ ########################################### # ALIGNMENTS # ########################################### #track models to be used in pairwise alignments EBGHSTL_TRACKMOD = ${TARG_AL}.dssp-ebghstl.mod EBGHTL_TRACKMOD = ${TARG_AL}.stride-ebghtl.mod STR2_TRACKMOD = ${TARG_AL}.str2.mod W05_MLIB = ${TARG_AL}-w0.5.mlib EBGHSTL_MLIB = ${TARG_AL}-100-30-dssp-ebghstl.mlib EBGHTL_MLIB = ${TARG_AL}-100-30-stride-ebghtl.mlib STR2_MLIB = ${TARG_AL}-100-30-str2.mlib ALPHA_MLIB = ${TARG_AL}-100-30-alpha.mlib DSSP-EHL2_MLIB = ${TARG_AL}-100-30-dssp-ehl2.mlib #settings of how many templates to predict vs. number of #alignments to convert to CASP format must be done carefully # need to add error checking so these numbers don't conflict #with each other # how many alignments to select from best templates ifndef NUM_ALIGNMENTS NUM_ALIGNMENTS = 250 endif #top alignments we report for top_reported ifndef NUM_TOP NUM_TOP = 5 endif #build an HMM from target sequence only to produce alignments #similar to simple Smith-Waterman. We observe that the #T2K HMMs are so general that they may drift away from the #original seed sequence %.mod: %.a2m ${BIN-SAM}/modelfromalign $* \ -alignfile $^ \ -insert /projects/compbio/lib/fssp-trained.regularizer \ -aweight_bits 0.8\ -fimtrans -1\ -fimstrength 1\ -ins_jump_conf 1 \ -match_jump_conf 1 \ -del_jump_conf 1 \ -binary_output 1\ -prior_library ${MIXTURE} \ -a2mdots 0 \ -a protein \ -sw 2 -jump_in_prob 0.2 -jump_out_prob 1 \ -aweight_method 1\ -aweight_exponent 10 #report sorted list of the pairwise alignments %.predicted_alignments.rdb: %.best-scores.rdb $(wildcard [1-9]*) ${YEAST-SCRIPTS}/gather_best_align_scores ${NUM_ALIGNMENTS} ${WORKDIR} ${TARGET} < $< > $@ # ${YEAST-SCRIPTS}/add_summary_html \ "T02 Sorted list of top alignments of ${TARGET} and top hits" \ $@ >> ${WORKDIR}/summary.html #get the ${NUM_TOP} best alignments %.top_reported_alignments.rdb: \ %.predicted_alignments.rdb \ %.best-scores.rdb ${YEAST-SCRIPTS}/top_reported_alignments ${NUM_TOP} $^ > $@ #convert this to html and add to the summary page %.many_alignments.html: %.predicted_alignments.rdb %.best-scores.rdb ${YEAST-SCRIPTS}/casp_summary_report_html \ --align $*.predicted_alignments.rdb \ --best $*.best-scores.rdb \ --target ${TARGET} > $@ %.top_reported_alignments.html: %.top_reported_alignments.rdb %.best-scores.rdb ${YEAST-SCRIPTS}/casp_summary_report_html \ --align $*.top_reported_alignments.rdb \ --best $*.best-scores.rdb \ --target ${TARGET} --make_al > $@ mutual_info_start_section: ${YEAST-SCRIPTS}/add_section_head_summary_html \ "Significant mutual information between column pairs" >> ${WORKDIR}/summary.html mutual_info: do_mutual_info_30 do_mutual_info_40 do_mutual_info_50 do_mutual_info_62 do_mutual_info_30: ${TARG_AL}-thin30.a2m.gz ${TARG_AL}-thin30.mi.rdb ${YEAST-SCRIPTS}/add_summary_html \ "Mutual information table (align thinned to 30% ID)" \ ${TARG_AL}-thin30.mi.rdb >> ${WORKDIR}/summary.html do_mutual_info_40: ${TARG_AL}-thin40.a2m.gz ${TARG_AL}-thin40.mi.rdb ${YEAST-SCRIPTS}/add_summary_html \ "Mutual information table (align thinned to 40% ID)" \ ${TARG_AL}-thin40.mi.rdb >> ${WORKDIR}/summary.html do_mutual_info_50: ${TARG_AL}-thin50.a2m.gz ${TARG_AL}-thin50.mi.rdb ${YEAST-SCRIPTS}/add_summary_html \ "Mutual information table (align thinned to 50% ID)" \ ${TARG_AL}-thin50.mi.rdb >> ${WORKDIR}/summary.html do_mutual_info_62: ${TARG_AL}-thin62.a2m.gz ${TARG_AL}-thin62.mi.rdb ${YEAST-SCRIPTS}/add_summary_html \ "Mutual information table (align thinned to 62% ID)" \ ${TARG_AL}-thin62.mi.rdb >> ${WORKDIR}/summary.html ifndef CORR-MIN-SEP CORR-MIN-SEP = 7 endif ifndef CORR-SIGNIF CORR-SIGNIF = 0.1 endif ifndef CORR-OCC-FRAC CORR-OCC-FRAC = 0.7 endif ifndef CORR-START-COL CORR-START-COL = ${START-COL} endif ifeq (${GCC_VERSION},2.96) # This version of correlated-columns is compiled to run # using the 2.96 g++ libraries, but is rather obsolete. CORR-COLUMNS = /cse/faculty/karplus/commands/2.96/correlated-columns else CORR-COLUMNS = /projects/compbio/programs/correlated-columns/bin/${UNAME-M}/opt/correlated-columns endif %.mi.rdb: %.a2m.gz echo "SetSignif ${CORR-SIGNIF}" > tmp.script echo "SetMinPairs ${CORR-OCC-FRAC}" >> tmp.script echo "SetMinSep ${CORR-MIN-SEP}" >> tmp.script echo "ReadA2M $< ${CORR-START-COL}" >> tmp.script echo "MutualInfoSignif $@ ${@:.rdb=.constraints}" >> tmp.script ${CORR-COLUMNS} < tmp.script rm tmp.script %.mi.rdb: %.a2m echo "SetSignif ${CORR-SIGNIF}" > tmp.script echo "SetMinPairs ${CORR-OCC-FRAC}" >> tmp.script echo "ReadA2M $<" >> tmp.script echo "MutualInfoSignif $@ ${@:.rdb=.constraints}" >> tmp.script ${CORR-COLUMNS} < tmp.script rm tmp.script undertaker_start_section: ${YEAST-SCRIPTS}/add_section_head_summary_html \ "Undertaker (3d) files" >> ${WORKDIR}/summary.html %.undertaker-align.under: %.top_reported_alignments.rdb ${YEAST-SCRIPTS}/make_undertaker_alignment_list < $^ > $@ ## ## FRAGFINDER ## FRAGFINDER-SEQS = ${PCEM-INDEXES}/dunbrack-50pc-2621.x-seqs FRAGFINDER-STR2 = ${PCEM-INDEXES}/dunbrack-50pc-2621.str2s FRAGFINDER-CB_BURIAL_14_7 = ${PCEM-INDEXES}/dunbrack-50pc-2621.CB-burial-14-7s FRAGFIND-STR2-TWOTRACKDBS = ${FRAGFINDER-SEQS},${FRAGFINDER-STR2} FRAGFIND-STR2+CB_BURIAL_14_7-THREETRACKDBS = ${FRAGFINDER-SEQS},${FRAGFINDER-STR2},${FRAGFINDER-CB_BURIAL_14_7} ${TARG_AL}.frag.gz: ${TARG_AL}-w0.5.mod ${STR2_TRACKMOD} ${FRAGFINDER} ${TARG_AL} \ -a protein,STR2 \ -trackmod ${TARG_AL}-w0.5.mod,${TARG_AL}.str2.mod \ -track_coeff 1.0,0.3 \ -db ${FRAGFIND-STR2-TWOTRACKDBS} \ -firstsequence ${TARGET}.a2m,- \ -fraglen 9 -numpermatch 6 gzip -9f ${TARG_AL}.frag gzip -9f ${TARG_AL}.fstat ${TARG_AL}.many.frag.gz: ${TARG_AL}-w0.5.mod ${TARG_AL}.str2.mod ${TARG_AL}.CB_burial_14_7.mod ${FRAGFINDER} ${TARG_AL}.many \ -alphabet protein,str2,CB_BURIAL_14_7 \ -trackmod ${TARG_AL}-w0.5.mod,${TARG_AL}.str2.mod,${TARG_AL}.CB_burial_14_7.mod \ -trackcoeff 1.0,0.4,0.4 \ -db ${FRAGFIND-STR2+CB_BURIAL_14_7-THREETRACKDBS} \ -firstsequence ${TARGET}.a2m,-,- \ -fraglen 9 -numpermatch 30 gzip -9f ${TARG_AL}.many.frag gzip -9f ${TARG_AL}.many.fstat ifndef USE_MLIB_FOR_UNDERTAKER %-2track-undertaker.a2m: %-w0.5.mod %.str2.mod \ ${TEMPLATE-SEQS} ${TEMPLATE-STR2} ${HMMSCORE} $*-2track-undertaker \ -calibrate 1 \ -a protein,STR2 \ -trackmod $*-w0.5.mod,$*.str2.mod \ -track_coeff 1.0,0.3 \ -sw 2 -adpstyle ${ADPSTYLE} \ -db ${TARGET}.a2m,$*.str2.seq \ -db ${STR2-TWOTRACKDBS} \ -select_score 8 -Emax ${BEST_EVALUE} \ -select_align 4 else # WARNING: THIS OPTION NOT DEBUGGED YET! # There seems to be a bug in HMMSCORE that causes the db list on the # command line to be misparsed when using a model library %-2track-undertaker.a2m: ${STR2_MLIB} ${STR2-DEPEND} ${HMMSCORE} str2foo \ -modellibrary $< \ -db ${TARGET}.a2m,$*.str2.seq \ -db ${STR2-TWOTRACKDBS} \ -select_score 8 -Emax ${BEST_EVALUE} \ -select_align 4 sleep 20 mv -f str2foo.1.$*-100-30-str2.a2m $@ endif # will need to create a "decoys" directory before any full 3D building # with undertaker can be done. read-decoys.under: decoys echo "InfilePrefix decoys/"> $@ ls decoys/*${TARGET}*pdb* \ | sed 's;decoys/;ReadConformPDB ;' \ >> $@ echo "InfilePrefix" >> $@ -chgrp protein $@ -chmod g+w $@ score-decoys.rdb score-decoys.breaks: read-decoys.under \ define-score.under ${YEAST-SCRIPTS}/score-decoys.under ${UNDERTAKER} < ${YEAST-SCRIPTS}/score-decoys.under ${PCB}/sorttbl cost < score-decoys-unsorted.rdb > $@ -rm score-decoys-unsorted.rdb -chgrp protein $@ -chmod g+w $@ %.undertaker-align.${PDBEXT}: show-align.under \ %.undertaker-align.under \ %-2track-undertaker.a2m ${UNDERTAKER} < show-align.under >& show-align.log ifndef NOGZIP_PDB gzip -f $*.undertaker-align.pdb endif ifdef REDO-SEARCHES JPEG-DEPEND := ${TARGET}.t2k.undertaker-align.${PDBEXT} else ifdef NO-REDO-UNDERTAKER JPEG-DEPEND := else JPEG-DEPEND := ${TARGET}.t2k.undertaker-align.${PDBEXT} endif endif CONVERT_200_OPTIONS := -resize 200x200 -quality 85 -frame 1x1 -mattecolor '\#000000' CONVERT_500_OPTIONS := -resize 500x500 -quality 75 -frame 1x1 -mattecolor '\#000000' jpeg: ${TARGET}.view1_200.jpg ${TARGET}.view2_200.jpg ${TARGET}.view3_200.jpg \ ${TARGET}.view1_500.jpg ${TARGET}.view2_500.jpg ${TARGET}.view3_500.jpg %.view1_200.jpg %.view2_200.jpg %.view3_200.jpg \ %.view1_500.jpg %.view2_500.jpg %.view3_500.jpg : ${JPEG-DEPEND} ${YEAST}/starter-directory/make-eps.rasmol rasmol -nodisplay $*.t2k.undertaker-align.${PDBEXT} < ${YEAST}/starter-directory/make-eps.rasmol convert ${CONVERT_200_OPTIONS} tmp1.eps $*.view1_200.jpg convert ${CONVERT_200_OPTIONS} tmp2.eps $*.view2_200.jpg convert ${CONVERT_200_OPTIONS} tmp3.eps $*.view3_200.jpg convert ${CONVERT_500_OPTIONS} tmp1.eps $*.view1_500.jpg convert ${CONVERT_500_OPTIONS} tmp2.eps $*.view2_500.jpg convert ${CONVERT_500_OPTIONS} tmp3.eps $*.view3_500.jpg -rm -f tmp*eps %/read-alignments.under: % cd $*; \ ${UNDERTAKER-SCRIPTS}/make-read-fragments.csh > read-alignments.under %/read-pdb.under: % cd $*; \ ${UNDERTAKER-SCRIPTS}/make-read-decoys.csh > read-pdb.under # for close homology modeling, may want to pick out best scores using # single sequences sw-best: [1-9]* echo 'foreach x ([0-9]*)' > tmp.script echo 'grep -h "$$x " $$x/*SW*dist' >> tmp.script echo 'end' >> tmp.script chmod +x tmp.script csh tmp.script \ | sort -n +3 \ | uniq \ > $@ rm tmp.script ifdef PRED ifdef PRED2 PRED-NOSTRUCT = ${PCEM}/pdb/${PRED2}/${PRED}/nostruct-align PRED-INFO = ${PCEM}/pdb/${PRED2}/${PRED}/info ifdef ALIGN-TYPE ifeq (${ALIGN-TYPE},local) SW=2 endif ifeq (${ALIGN-TYPE},global) SW=0 endif ifeq (${ALIGN-TYPE},simplesw) SW=2 endif endif ifeq (${MASTER},target) ifeq (${ALIGN-TYPE},local) ALIGN-MODEL=${TARG_AL}-w0.5.mod endif ifeq (${ALIGN-TYPE},global) ALIGN-MODEL=${TARG_AL}-w0.5.mod endif ifeq (${ALIGN-TYPE},simplesw) ALIGN-MODEL=${TARGET}.mod endif ALIGN-NAME = ${PRED}/${TARGET}-${PRED}-${ALIGN-TYPE}-adpstyle${ADP} endif ifeq (${MASTER},template) ALIGN-MODEL = ${PRED-NOSTRUCT}/${PRED}.t2k-w0.5.mod ALIGN-NAME = ${PRED}/${PRED}-${TARGET}-${ALIGN-TYPE}-adpstyle${ADP} endif ifeq (${MASTER},fssp) FSSP-STRUCT = ${PCEM}/pdb/${FSSP2}/${FSSP}/struct-align ALIGN-MODEL = ${FSSP-STRUCT}/${FSSP}.fssp-w0.5.mod ALIGN-NAME = ${PRED}/${FSSP}-${TARGET}-fssp-${ALIGN-TYPE}-adpstyle${ADP} endif ifdef ALIGN-NAME single-track-alignment: ${ALIGN-NAME}.a2m echo $^ made. ${ALIGN-NAME}.a2m: ${ALIGN-MODEL} ${PRED}/${PRED}.seq ${HMMSCORE} ${ALIGN-NAME} \ -alphabet protein -i $< -db ${TARGET}.a2m \ -db ${PRED}/${PRED}.seq \ -db_size ${LIBSIZE} \ -simple_threshold 10000 \ -sw ${SW} -dpstyle 0 -subtract_null 4 \ -adpstyle ${ADP} \ -select_align 8 gzip -f ${ALIGN-NAME}.dist endif single-track-target-alignments: -mkdir ${PRED} test -e ${PRED}/${PRED}.seq -o '!' -e ${PRED-INFO}/${PRED}.stride-mixed.seq \ || cp -p ${PRED-INFO}/${PRED}.stride-mixed.seq ${PRED}/${PRED}.seq test -e ${PRED}/${PRED}.seq \ || ${YEAST-SCRIPTS}/extract-one-seq ${PRED} < ${PDB_DB} > ${PRED}/${PRED}.seq \ || { echo removing rm ${PRED}/${PRED}.seq; rm ${PRED}/${PRED}.seq ;} -make -k ALIGN-TYPE=simplesw \ PRED=${PRED} PRED2=${PRED2} \ ADP=1 MASTER=target single-track-alignment ifndef ALIGN_VITERBI -make -k ALIGN-TYPE=simplesw \ PRED=${PRED} PRED2=${PRED2} \ ADP=5 MASTER=target single-track-alignment endif -make -k ALIGN-TYPE=local \ PRED=${PRED} PRED2=${PRED2} \ ADP=1 MASTER=target single-track-alignment ifndef ALIGN_VITERBI -make -k ALIGN-TYPE=local \ PRED=${PRED} PRED2=${PRED2} \ ADP=5 MASTER=target single-track-alignment endif -make -k ALIGN-TYPE=global \ PRED=${PRED} PRED2=${PRED2} \ ADP=1 MASTER=target single-track-alignment ifndef ALIGN_VITERBI -make -k ALIGN-TYPE=global \ PRED=${PRED} PRED2=${PRED2} \ ADP=5 MASTER=target single-track-alignment endif template-alignments: -mkdir ${PRED} -make -k ALIGN-TYPE=local \ PRED=${PRED} PRED2=${PRED2} ADP=1 MASTER=template single-track-alignment ifndef ALIGN_VITERBI -make -k ALIGN-TYPE=local \ PRED=${PRED} PRED2=${PRED2} ADP=5 MASTER=template single-track-alignment endif -make -k ALIGN-TYPE=global \ PRED=${PRED} PRED2=${PRED2} ADP=1 MASTER=template single-track-alignment ifndef ALIGN_VITERBI -make -k ALIGN-TYPE=global \ PRED=${PRED} PRED2=${PRED2} ADP=5 MASTER=template single-track-alignment endif ######################################## # FSSP-based alignments ######################################## ifdef FSSP ifdef FSSP2 fssp-template-alignments: -mkdir ${PRED} -make -k ALIGN-TYPE=local \ PRED=${PRED} PRED2=${PRED2} ADP=5 \ MASTER=fssp FSSP=${FSSP} FSSP2=${FSSP2} single-track-alignment -make -k ALIGN-TYPE=global \ PRED=${PRED} PRED2=${PRED2} ADP=5 \ MASTER=fssp FSSP=${FSSP} FSSP2=${FSSP2} single-track-alignment endif endif ######################################## # Two-track target alignments ######################################## ifndef STRUCT-WEIGHT STRUCT-WEIGHT=0.3 endif ifdef STRUCT-ALPH # copy local structure alphabet name to HMMSCORE-ALPH, renaming as needed to # match HMMSCORE alphabet names HMMSCORE-ALPH = ${STRUCT-ALPH} ifeq (${STRUCT-ALPH},stride-ebghtl) HMMSCORE-ALPH = EBGHTL endif ifeq (${STRUCT-ALPH},dssp-ebghstl) HMMSCORE-ALPH = EBGHSTL endif ifeq (${STRUCT-ALPH},dssp-ehl2) HMMSCORE-ALPH = EHL2 endif # copy local structure alphabet name to SEQ-ALPH, renaming as needed to # match sequence names in info directories INFO-ALPH = ${STRUCT-ALPH} ifeq (${STRUCT-ALPH},stride-ebghtl) INFO-ALPH = 2d endif ifeq (${STRUCT-ALPH},dssp-ebghstl) INFO-ALPH = dssp endif ifeq (${STRUCT-ALPH},dssp-ehl2) INFO-ALPH = dssp endif TWO-ALIGN-NAME = ${PRED}/${TARGET}-${PRED}-${ALIGN-TYPE}-${STRUCT-ALPH}-${STRUCT-WEIGHT}-adpstyle${ADP} two-track-alignment: ${TWO-ALIGN-NAME}.a2m echo $^ made. ${TWO-ALIGN-NAME}.a2m: ${TARG_AL}-w0.5.mod ${TARG_AL}.${STRUCT-ALPH}.mod -mkdir ${PRED} ${HMMSCORE} ${TWO-ALIGN-NAME} \ -alphabet protein,${HMMSCORE-ALPH} \ -trackmod ${TARG_AL}-w0.5.mod,${TARG_AL}.${STRUCT-ALPH}.mod \ -trackcoeff 1.0,${STRUCT-WEIGHT} \ -db ${TARGET}.a2m,${TARG_AL}.${STRUCT-ALPH}.seq \ -db ${PRED-INFO}/${PRED}.stride-mixed.seq,${PRED-INFO}/${PRED}.stride-mixed.${INFO-ALPH} \ -db_size ${LIBSIZE} \ -simple_threshold 10000 \ -sw ${SW} -dpstyle 0 -subtract_null 4 \ -adpstyle ${ADP} \ -select_align 8 gzip -f ${TWO-ALIGN-NAME}.dist endif # Note: no two-track alignment for dssp-ehl2, since we aren't creating # a sequence for that alphabet yet. ifdef ALIGN_VITERBI TWO_TRACK_ADP=1 else TWO_TRACK_ADP=5 endif two-track-alignments: -mkdir ${PRED} -make -k ALIGN-TYPE=local ADP=${TWO_TRACK_ADP} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT-ALPH=str2\ two-track-alignment -make -k ALIGN-TYPE=global ADP=${TWO_TRACK_ADP} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT-ALPH=str2\ two-track-alignment -make -k ALIGN-TYPE=local ADP=${TWO_TRACK_ADP} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT-ALPH=stride-ebghtl\ two-track-alignment -make -k ALIGN-TYPE=global ADP=${TWO_TRACK_ADP} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT-ALPH=stride-ebghtl\ two-track-alignment -make -k ALIGN-TYPE=local ADP=${TWO_TRACK_ADP} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT-ALPH=alpha\ two-track-alignment -make -k ALIGN-TYPE=global ADP=${TWO_TRACK_ADP} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT-ALPH=alpha\ two-track-alignment -make -k ALIGN-TYPE=local ADP=${TWO_TRACK_ADP} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT-ALPH=dssp-ebghstl\ two-track-alignment -make -k ALIGN-TYPE=global ADP=${TWO_TRACK_ADP} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT-ALPH=dssp-ebghstl\ two-track-alignment -make -k ALIGN-TYPE=local ADP=${TWO_TRACK_ADP} \ PRED=${PRED} PRED2=${PRED2} \ STRUCT-WEIGHT=1.5 \ MASTER=target STRUCT-ALPH=str2\ two-track-alignment -make -k ALIGN-TYPE=global ADP=${TWO_TRACK_ADP} \ PRED=${PRED} PRED2=${PRED2} \ STRUCT-WEIGHT=1.5 \ MASTER=target STRUCT-ALPH=str2\ two-track-alignment ######################################## # Three-track target alignments ######################################## ifndef BURIAL-WEIGHT BURIAL-WEIGHT=0.4 endif ifndef BURIAL-ALPH BURIAL-ALPH = CB_burial_14_7 endif BURIAL-INFO-ALPH = ${BURIAL-ALPH} ifeq (${BURIAL-ALPH},CB_burial_14_7) BURIAL-INFO-ALPH = CB-burial-14-7 endif THREE-ALIGN-NAME = ${PRED}/${TARGET}-${PRED}-${ALIGN-TYPE}-${STRUCT-ALPH}+${BURIAL-ALPH}-${STRUCT-WEIGHT}+${BURIAL-WEIGHT}-adpstyle${ADP} three-track-alignment: ${THREE-ALIGN-NAME}.a2m echo $^ made. ${THREE-ALIGN-NAME}.a2m: ${TARG_AL}-w0.5.mod \ ${TARG_AL}.${STRUCT-ALPH}.mod \ ${TARG_AL}.${BURIAL-ALPH}.mod -mkdir ${PRED} ${HMMSCORE} ${THREE-ALIGN-NAME} \ -alphabet protein,${HMMSCORE-ALPH},${BURIAL-ALPH} \ -trackmod ${TARG_AL}-w0.5.mod,${TARG_AL}.${STRUCT-ALPH}.mod,${TARG_AL}.${BURIAL-ALPH}.mod \ -trackcoeff 1.0,${STRUCT-WEIGHT},${BURIAL-WEIGHT} \ -db ${TARGET}.a2m,${TARG_AL}.${STRUCT-ALPH}.seq,${TARG_AL}.${BURIAL-ALPH}.seq \ -db ${PRED-INFO}/${PRED}.stride-mixed.seq,${PRED-INFO}/${PRED}.stride-mixed.${INFO-ALPH},${PRED-INFO}/${PRED}.stride-mixed.${BURIAL-INFO-ALPH} \ -db_size ${LIBSIZE} \ -simple_threshold 10000 \ -sw ${SW} -dpstyle 0 -subtract_null 4 \ -adpstyle ${ADP} \ -select_align 8 gzip -f ${THREE-ALIGN-NAME}.dist ifdef ALIGN_VITERBI THREE_TRACK_ADP=1 else THREE_TRACK_ADP=5 endif three-track-alignments: -mkdir ${PRED} -make -k ALIGN-TYPE=local ADP=${THREE_TRACK_ADP} \ PRED=${PRED} PRED2=${PRED2} \ MASTER=target STRUCT-ALPH=str2 BURIAL-ALPH=CB_burial_14_7\ STRUCT-WEIGHT=0.4 BURIAL-WEIGHT=0.4 \ three-track-alignment endif #if PRED2 endif #if PRED # tree building (not done by default) # programs in non-standard places PHYTREE = /projects/compbio/usr/karplus/src/phytree/phytree DG = /projects/compbio/usr/karplus/src/phytree/dg DTREE = /projects/compbio/usr/karplus/src/phytree/dtree %_sorted.ids %.tree %_sorted.a2m.gz %.phytrace: %.a2m.gz -gunzip -f $*.a2m.gz ${PHYTREE} -f -o -i -r flat $* $*.a2m ${MIXTURE} -gzip -f $*.a2m -gzip -f $*_sorted.a2m -rm $*.phytrace $*.tree_weight %tree.ps: %tree ${DG} $^ %tree-unroot.ps: %tree ${DTREE} $^ # extract the ids stripping off the muldomain-added section. %.bare-ids: %.a2m.gz gunzip -c $^ \ | ${PCBS}/ids-from-fasta -nodom \ >$@ # WARNING: DISTILL is not installed on most SoE computers, # but ps2pdf produces very verbose pdf files. %.pdf: %.eps ssh apache 'cd ${WORKDIR}; distill $^' # This section is for realignment using key residues and selecting # sequences that have those key residues. t2k-selected: \ ${TARGET}.t2k.w0.5.key-residues \ ${TARGET}.t2k.selected.a2m.gz t2k-realign: \ ${TARGET}.t2k.w0.5.key-residues \ ${TARGET}.t2k.selected.a2m.gz \ ${TARGET}.t2k.realign.a2m.gz t2k-realign-w0.5: \ ${TARGET}.t2k.w0.5.key-residues \ ${TARGET}.t2k.realign.a2m.gz \ ${TARGET}.t2k.realign-w0.5.mod \ ${TARGET}.t2k.realign.w0.5-logo.eps \ ${TARGET}.t2k.realign.w0.5-logo.pdf ifndef KEY-MIN-SAVINGS KEY-MIN-SAVINGS = 1.5 endif ifndef KEY-MIN-FREQ KEY-MIN-FREQ = 0.04 endif %.key-residues: %.saves ${PCEM-SCRIPTS2K}/pick-key-residues \ -minbits ${KEY-MIN-SAVINGS} \ -minfreq ${KEY-MIN-FREQ} \ -first_residue ${START-COL} \ <$^ >$@ %.selected.a2m.gz: %.a2m.gz %.w0.5.key-residues gunzip -c $< \ | ${PCEM-SCRIPTS2K}/select-by-key-residues \ -first_residue ${START-COL} \ -residues $*.w0.5.key-residues \ | gzip \ >$@ %.realign.a2m.gz: %.selected-w0.5.mod %.a2m.gz hmmscore $*.realign -i $< -db $*.a2m.gz \ -adpstyle 5 -sw 2 -selectalign 8 gzip -8f $*.realign.a2m %.conserved.rasmol: %.key-residues ${YEAST-SCRIPTS}/key-to-rasmol < $^ > $@ conserved: ${TARG_AL}.w0.5.conserved.rasmol -ln -s $^ $@ # This section if for realignment using Bob Edgar's "muscle" program. %.muscle.gz: %.a2m.gz gunzip -c $^ \ | muscle -maxhours 2.0 \ | gzip \ > $@ %.muscle.a2m.gz: %.muscle.gz ${PCEM-SCRIPTS}/a2m_from_muscle -in $^ -out $@ -guide 1 all-align.a2m.gz: */*.a2m* ${PCEM-SCRIPTS}/merge_a2m -guide ${TARGET} -out $@ $^ all-align.pa: all-align.a2m.gz ${BIN-SAM}/prettyalign $^ -m5 > $@ %.no-thin.mod: %.a2m.gz ${PCL}/make-weights.pl $^ tmp.weight ${MIXTURE} \ "EntropyWeight 0.7 10" 1.0 > tmp.log ${BIN-SAM}/modelfromalign $*.no-thin -alignfile $^ \ -prior_library ${MIXTURE} \ -alignment_weights tmp.weight -rm -f tmp.weight tmp.log %.no-thin.logo.eps: %.no-thin.mod ${BIN-SAM}/makelogo $*.no-thin.logo -i $< \ -logo_start_num ${START-COL} \ -logo_rel_entropy 1 \ -logo_bars_per_line ${LOGO-WIDTH} -logo_title "$* no thinning" \ -logo_caption_f ${TARG_AL}.dssp-ebghstl.seq \ -logo_under_file ${TARGET}.a2m \ -logo_savings_output $*.no-thin.saves # This section is for SAM-T04 alignments and related information. ifdef REDO-T04 T04-DEPEND = ALWAYS else T04-DEPEND= endif ${TARGET}.t04.a2m.gz: ${TARGET}.a2m ${T04-DEPEND} echo "making T04 alignment" ${PCEM-SCRIPTS04}/target04 \ -seed $< -out $@ \ -tmp /var/tmp -db ${NR} \ -final_align viterbi