00001
00002
00003
00005
00006 static int INTERNAL_INT_ID;
00007 static int INITIAL_INT_ID;
00008 static int TERMINAL_INT_ID;
00009 static int SINGLE_INT_ID;
00010
00011 static bool checkForAdjacentStopCodon(const RegionPrediction& predReg, const RegionPrediction& itemReg,
00012 const DnaStr& theSequence) {
00013 if(itemReg.getEnd5()-predReg.getEnd3() > 3) {
00014 char codon[3];
00015 int idx = predReg.getEnd3()+1;
00016 codon[0] = theSequence[idx];
00017 codon[1] = theSequence[idx+1];
00018 codon[2] = theSequence[idx+2];
00019 return theSequence.isStop(codon);
00020 }
00021 return false;
00022 }
00023
00024 static bool isValidFrameFromLen(int start, int stop, int bp_need, int frame) {
00025 const int raw_len = stop-start+1;
00026 if(raw_len == 1) {
00027 if(bp_need == 1 && frame == 0) return true;
00028 if(bp_need == 2 && frame == 1) return true;
00029 if(bp_need == 3 && frame == 2) return true;
00030 } else if(raw_len == 2) {
00031 if(bp_need == 1 && frame == 2) return true;
00032 if(bp_need == 2 && frame == 0) return true;
00033 if(bp_need == 3 && frame == 1) return true;
00034 } else {
00035 int leading_bp = (raw_len-bp_need) % 3;
00036 int predFrame = AnnotationItem::leadingBp2Frame(leading_bp);
00037 if(predFrame == frame) return true;
00038 }
00039 return false;
00040 }
00041
00042 static char justDontAskAboutThisOptimizationHackShit[3];
00043 static const char* extractCodonFrom3to5(const AnnotationItem& item, const UnitLoc& ul,
00044 const DnaStr& theSequence, int bp_need) {
00045 char* codon = justDontAskAboutThisOptimizationHackShit;
00046 if(bp_need <= 0) return codon;
00047 const int start = item.getEnd5(), stop = item.getEnd3();
00048 int clen = stop-start+1, cnt = 0;
00049
00050 if(!isValidFrameFromLen(start,stop,bp_need,ul.getFrame())) return NULL;
00051 while(bp_need > 0 && clen > 0) {
00052 --bp_need;
00053 codon[bp_need] = theSequence[(stop-cnt)];
00054 --clen;
00055 ++cnt;
00056 }
00057
00058 if(bp_need == 0) return codon;
00059
00060 assert(ul.getStrnd()==theSequence.getStrnd());
00061 const UnitScore& unit = item.getMatrix().getUnitScore(ul);
00062 const AnnotationItem* predCell = unit.getPredCell();
00063 if(predCell) {
00064 const AnnotationItem& predItem = *predCell;
00065 const UnitLoc& pul = unit.getPredUnitLoc();
00066 return extractCodonFrom3to5(predItem,pul,theSequence,bp_need);
00067 }
00068 return NULL;
00069 }
00070
00075 static bool checkFirstCodonOfNewModel(const AnnotationItem& item, int itemType, int currFrame,
00076 const AnnotationItem& pred, const UnitLoc& pl) {
00077 int predType = pl.getType();
00078 int predFrame = pl.getFrame();
00079 bool rghtBnd = pl.isRghtBnd();
00080 bool isBeginParse = pl.isBeginParse();
00081
00082 assert(currFrame==0||currFrame==1);
00083 assert((item.length()==1&&currFrame==1) || (currFrame==0&&(item.length()==1||item.length()==2)));
00084 if( itemType == Internal::getInstance().getScoreIdx() || predType == Internal::getInstance().getScoreIdx())
00085 return false;
00086 if( item.getStrnd() == dsu::ePos && (itemType==Terminal::getInstance().getScoreIdx())) return false;
00087 if( item.getStrnd() == dsu::eNeg && (itemType==Initial::getInstance().getScoreIdx())) return false;
00088
00089 bool isContig = genemodels::isContiguous(pred.getEnd3(),pred.getStrnd(),item.getEnd5(),item.getStrnd());
00090
00091
00092
00093
00094
00095 if( currFrame==0 && !isContig ) return true;
00096
00097
00098
00099
00100 bool predIsStart = false;
00101 int exonCnt = 0;
00102 const UnitScore& us = pred.getMatrix().getUnitScore(UnitLoc(predType,predFrame,exonCnt,pred.getStrnd(),rghtBnd,isBeginParse));
00103 if(us.getPredCell()) {
00104 const AnnotationItem& predPred = *us.getPredCell();
00105 predIsStart = !genemodels::isContiguous(predPred.getEnd3(), us.getPredStrnd(),pred.getEnd5(),pred.getStrnd());
00106 } else {
00107 predIsStart = true;
00108 }
00109 if( currFrame==1 && pred.length()==1 && predIsStart ) return true;
00110
00111
00112
00113 return false;
00114 }
00115
00119 static bool passSpecialLookAhead(const AnnotationItem& item, int itemType, int currFrame,
00120 const AnnotationItem& pred, const UnitLoc& pul, bool newModel) {
00121
00122 int predType = pul.getType();
00123 assert((item.length()==1&&currFrame==1) || (currFrame==0&&(item.length()==1||item.length()==2)));
00124 bool isContig = genemodels::isContiguous(pred.getEnd3(),pred.getStrnd(),item.getEnd5(),item.getStrnd());
00125 const DnaStr& str = item.getSeq();
00126
00127
00128 if( (isContig && pred.length()==1) || !isContig) {
00129
00130 const UnitScore& us = pred.getMatrix().getUnitScore(pul);
00131 if(us.getPredCell() || !isContig) {
00132 bool checkCodon = !isContig;
00133 if(us.getPredCell() && isContig) {
00134 const AnnotationItem& predPred = *us.getPredCell();
00135 checkCodon = !genemodels::isContiguous(predPred.getEnd3(),us.getPredStrnd(),pred.getEnd5(),pred.getStrnd());
00136 }
00137 char codon[3];
00138 bool isStop = false;
00139 if(checkCodon && currFrame == 1) {
00140
00141 if(item.getEnd3()+1 >= (signed)item.getSeq().length()) return true;
00142 assert(us.getStrnd()==pred.getStrnd());
00143 codon[0] = str[pred.getEnd3()];
00144 codon[1] = str[item.getEnd5()];
00145 codon[2] = str[item.getEnd5()+1];
00146 isStop = str.isStop(codon);
00147 } else if(checkCodon && currFrame == 0) {
00148
00149 if(item.getEnd5()+2 >= (signed)str.length()) return true;
00150 codon[0] = str[item.getEnd5()];
00151 codon[1] = str[item.getEnd5()+1];
00152 codon[2] = str[item.getEnd5()+2];
00153 isStop = str.isStop(codon);
00154 }
00155 if(checkCodon) {
00156 if( !newModel ) {
00157 if(item.getStrnd()==dsu::ePos &&
00158
00159 (predType != Terminal::getInstance().getScoreIdx() || predType != Single::getInstance().getScoreIdx()) &&
00160 (itemType != Terminal::getInstance().getScoreIdx() || itemType != Single::getInstance().getScoreIdx()) && isStop) {
00161 return false;
00162 }
00163 else if(item.getStrnd()==dsu::eNeg && isStop)
00164 return false;
00165 } else if( newModel ) {
00166
00167 assert(itemType != Internal::getInstance().getScoreIdx());
00168 if(dsu::ePos==item.getStrnd())
00169 assert(itemType == Initial::getInstance().getScoreIdx() || itemType == Single::getInstance().getScoreIdx());
00170 else if(dsu::eNeg==item.getStrnd()) {
00171 assert(itemType == Terminal::getInstance().getScoreIdx() || itemType == Single::getInstance().getScoreIdx());
00172 }
00173
00174 bool isAdjStop = checkForAdjacentStopCodon(pred.getRegionPrediction(),item.getRegionPrediction(),str);
00175 if(item.getStrnd() == dsu::ePos && isStop) {
00176 return false;
00177
00178 } else if( item.getStrnd() == dsu::eNeg && (!isStop && !isAdjStop) )
00179 return false;
00180 }
00181 }
00182 }
00183 }
00184
00185 return true;
00186 }
00187
00188 static bool predEndsModel(const AnnotationItem& predItem, const UnitLoc& pul, int len) {
00189 const UnitScore& pc = predItem.getMatrix().getUnitScore(pul);
00190 if( len == predItem.length() ) return true;
00191 else if(pc.getPredCell()) {
00192 const AnnotationItem& prev = *pc.getPredCell();
00193 if( prev.getStrnd() != predItem.getStrnd() ||
00194 !genemodels::isContiguous(prev.getEnd3(),pc.getPredStrnd(),predItem.getEnd5(),predItem.getStrnd()) ) {
00195 return false;
00196 } else {
00197 const UnitLoc& npc = pc.getPredUnitLoc();
00198 return predEndsModel(prev,npc,len-predItem.length());
00199 }
00200 } else {
00201 return false;
00202 }
00203 }
00204
00205
00210 bool doProcess(const AnnotationItem& item, const AnnotationItem& predItem, const UnitLoc& pul) {
00211 bool isContig = genemodels::isContiguous(predItem.getEnd3(), predItem.getStrnd(), item.getEnd5(), item.getStrnd());
00212 if(isContig) return true;
00213
00214
00215
00216
00217 const UnitScore& unit = predItem.getMatrix().getUnitScore(pul);
00218 if(!unit.isValid()) return false;
00219 if(predItem.length() >= 3) return true;
00220
00221
00222 const AnnotationItem* predPredIter = unit.getPredCell();
00223 if(!predPredIter) return false;
00224 const AnnotationItem& predPredItem = *predPredIter;
00225 int end3 = predPredItem.getEnd3();
00226
00227 if(end3+1 != predItem.getEnd5()) return false;
00228 if( predPredItem.length() + predItem.length() >= 3) return true;
00229
00230
00231 const UnitScore& nextUnit =
00232 predPredItem.getMatrix().getUnitScore(unit.getPredUnitLoc());
00233 if(!nextUnit.isValid()) return false;
00234
00235 const AnnotationItem* nextPredIter = nextUnit.getPredCell();
00236 if(!nextPredIter) return false;
00237 const AnnotationItem& nextPredItem = *nextPredIter;
00238 int nextEnd3 = nextPredItem.getEnd3();
00239 if(nextEnd3+1 != end3) return false;
00240 return true;
00241 }
00242
00243 static int currentFrame2NumNeed(int frm) {
00244 switch(frm) {
00245 case 2: return 2;
00246 case 1: return 1;
00247 case 0: return 0;
00248 default:
00249 assert(0);
00250 return -1;
00251 }
00252 }
00253
00254 static int getProperFrameForCurrFrameOne(int len) {
00255 int predFrame = -1;
00256 const int curr_frame = 0;
00257 if(len>=3) {
00258 int leading_bp = (len-currentFrame2NumNeed(curr_frame)) % 3;
00259 predFrame = AnnotationItem::leadingBp2Frame(leading_bp);
00260 } else if(len == 2) {
00261 predFrame = 1;
00262 } else {
00263 predFrame = 2;
00264 }
00265 return predFrame;
00266 }
00267
00268 #if _RUNTIME_DEBUG >= 1
00269 static bool noStops(const DnaStr& seq, int start, int stop, bool verbose) {
00270 #else
00271 static bool noStops(const DnaStr& seq, int start, int stop) {
00272 #endif
00273 for(int i = start; i <= stop-2; i+=3) {
00274 #if _RUNTIME_DEBUG >= 1
00275 if(verbose)
00276 seq.printCodon(i);
00277 #endif
00278 if(seq.isStop(i)) {
00279 return false;
00280 }
00281 }
00282 return true;
00283 }
00284
00285 static bool hasAdjStop(const AnnotationItem& curr, const AnnotationItem& pred) {
00286 assert(curr.getStrnd() == dsu::eNeg);
00287 if((curr.getEnd5()-pred.getEnd3()) >= 4) {
00288 const DnaStr& str = curr.getSeq();
00289 if(str.isStop(curr.getEnd5()-3)) return true;
00290 }
00291 return false;
00292 }
00293
00294 static bool check4PredContig(const AnnotationItem& item, const UnitLoc& ul, int bp_need) {
00295 const int start = item.getEnd5(), stop = item.getEnd3();
00296 int clen = stop-start+1;
00297 bp_need -= clen;
00298 if( bp_need <= 0 ) return true;
00299 const UnitScore& unit = item.getMatrix().getUnitScore(ul);
00300 const AnnotationItem* predCell = unit.getPredCell();
00301 const UnitLoc& pul = unit.getPredUnitLoc();
00302 if(predCell) {
00303 const AnnotationItem& predItem = *predCell;
00304 bool isContig = genemodels::isContiguous(predItem.getEnd3(),pul.getStrnd(),item.getEnd5(),item.getStrnd());
00305 if( !isContig && bp_need == 1) {
00306 return false;
00307 } else
00308 return check4PredContig(predItem,pul,bp_need);
00309 }
00310 return false;
00311 }
00312
00313 inline static bool ignoreHelp(int itemPost, int itemPre, dsu::Strand_t strnd, bool isContig) {
00314
00315 if(itemPost == TERMINAL_INT_ID && itemPre == TERMINAL_INT_ID && !isContig) return true;
00316 if(itemPost == INITIAL_INT_ID && itemPre == INITIAL_INT_ID && !isContig) return true;
00317
00318 if(strnd == dsu::ePos) {
00319 if(itemPost == SINGLE_INT_ID &&
00320 (itemPre == INITIAL_INT_ID || itemPre == INTERNAL_INT_ID)) return true;
00321 if(itemPost == TERMINAL_INT_ID &&
00322 itemPre == SINGLE_INT_ID) return true;
00323 if(itemPost == INTERNAL_INT_ID &&
00324 (itemPre != INTERNAL_INT_ID && itemPre != INITIAL_INT_ID))
00325 return true;
00326 if(itemPost == INITIAL_INT_ID && itemPre == INTERNAL_INT_ID) return true;
00327 } else if(strnd == dsu::eNeg) {
00328 if(itemPost == SINGLE_INT_ID &&
00329 (itemPre == TERMINAL_INT_ID || itemPre == INTERNAL_INT_ID)) return true;
00330 if(itemPost == INITIAL_INT_ID &&
00331 itemPre == SINGLE_INT_ID) return true;
00332 if(itemPost == INTERNAL_INT_ID &&
00333 (itemPre != INTERNAL_INT_ID && itemPre != TERMINAL_INT_ID))
00334 return true;
00335 if(itemPost == TERMINAL_INT_ID && itemPre == INTERNAL_INT_ID) return true;
00336 } else assert(0);
00337
00338 return false;
00339 }
00340
00341 inline static bool ignoreConnection(int currType, dsu::Strand_t currStrnd, int predType, dsu::Strand_t predStrnd, bool isContig) {
00342 if(currStrnd == predStrnd && isContig) return false;
00343
00344 if(currStrnd == dsu::ePos && predStrnd == dsu::ePos) {
00345 return ignoreHelp(currType,predType,currStrnd,isContig);
00346 } else if(currStrnd == dsu::ePos && predStrnd == dsu::eNeg) {
00347 if((currType==SINGLE_INT_ID||currType==INITIAL_INT_ID) &&
00348 (predType==SINGLE_INT_ID||predType==INITIAL_INT_ID)) return false;
00349 return true;
00350 } else if(currStrnd == dsu::eNeg && predStrnd == dsu::ePos) {
00351 if((currType==SINGLE_INT_ID||currType==TERMINAL_INT_ID) &&
00352 (predType==SINGLE_INT_ID||predType==TERMINAL_INT_ID)) return false;
00353 return true;
00354 } else if(currStrnd == dsu::eNeg && predStrnd == dsu::eNeg) {
00355 return ignoreHelp(currType,predType,currStrnd,isContig);
00356 }
00357 return true;
00358 }
00359
00363 static bool isEndOfStartCodonPass(const AnnotationItem& item, int itemType, int currFrame,
00364 const AnnotationItem& pred, int predType, int predFrame,
00365 const DnaStr& theSequence) {
00366 return true;
00367 }