/* This program utilizes string information in the preexisting file t4klist. It makes no use of the file of strings; all data are in the file T4klist.Thetaktree. The file t4klist contains, not the treelengths of the various trees, but rather the numbers of sites favoring each of the three binary trees for the given quartet. Thus kaabb usually tells the number of sites at which the symbol SA = SB, SC = SD, SA <> SC; kabab tells the number where SA = SC, SB = SD, SA <> SB; etc. The species must be arranged so that a record is A B C D kaabb kabab kabba with kaabb >= kabab and kaabb >= kabba. The file ends with a fake record. 0 0 0 0 0 0 0 The "absolute" inconsistency measure uses the statistic gap = min {kaabb - kabab, kaabb - kabba} {to tell the reliability of a given quartet. This is recorded as current^.abs. When it is large, the quartet is very reliable. When it is small, the quartet is questionable. The input will be a data file T4klist.Thetaktree with the following format: (1) The first line gives number of species then a code. The code is 1 if gaps were ignored to the extent possible in the production of the list, 0 if gaps were treated as a new symbol. (2) The second line consists of a string identifying the source of the data, terminated by a line break. (3) the next successive lines contain an integer label and then strings of length labellength identifying the species (4) the next successive lines give the data for T4list in the form first second third fourth strength strength3 strength4 where strength is the site strength of ((first second) (third fourth)) and this is the maximum parsimony tree (tree of largest strength), strength3 is the site strength of ((first third) (second fourth)) strength4 is the site strength of ((first fourth) (second third)). (5) The last line has form 0 0 0 0 0 0 0 For example, T4list might begin as follows: 11 1 File ROO9DOM1.PIL from 5/15/97. 1 btrac 2 r22 3 mopn 4 6bc 5 b577atcc 6 fpatcc 7 gpicatcc 8 fml12 9 e58 10 kahane 11 pmar 1 2 3 4 127 15 12 1 2 3 5 131 110 8 1 2 3 6 129 22 22 There should be no blank lines after the last data line. The output will be another file Thetaktree.output. */ #define maxleaves 40 #define maxnodes 100 #define labellength 100 #define maxstringlength 800 #define boolean int #define true 1 #define false 0 #define CR '\015' #include #include struct t4 { int first; int second; int third; int fourth; float aabb; float abab; float abba; float abs; struct t4 *next; }; /* {description of a tree with 4 nodes; the tree has form (( first second) ( third fourth)) aabb gives the number of sites favoring ((1 2) (3 4)) abab gives the number of sites favoring ((1 3) (2 4)) abba gives the number of sites favoring ((1 4) (2 3))} */ struct insertionpossibilities{ int above; int below; float eliminationabs; /* eliminationrel: real; eliminationscale: real; eliminationnorm: real; eliminationexpt: real;*/ int inconstreecount; }; struct bestinsertion { int bestplace; float bestabs; float runnerup; }; /* {tells the result of using quartets on the insertion between node above and node below into the tree treetrue.} */ int gap = 3, mismatch=1; /*{penalties, so d(A,B) = mismatch, d(A,-) = gap}*/ char ch; int u, vv; int datatype; boolean aligned, trial; boolean ignoregaps; int counter[maxleaves], stringlength[maxleaves]; int actualleaves; int actuallength; int flag; int idenlength; int ssize; int newest; char labels[maxleaves+1][labellength]; char result[10]; int errorcode; char identification[maxstringlength]; FILE *t4list, *outcomment; struct t4 t4record; char result[10]; int outgroup; float globalinconsabs, localinconsabs, inconsabs; float globalinconsnorm, localinconsnorm, inconsnorm; float globalinconsexpt, localinconsexpt, inconsexpt; float globalinconsscale, localinconsscale, inconsscale; int inconstreecount; float inconsabsavg, inconsnormavg; int temp; int blocklength; int insertioncounter; float diff, diff2; struct t4 *head, * current, *pending, *bestpt, *last; boolean writemonitor, quickly, writeperm, writequartets, place, casechanged; struct insertionpossibilities possibilities[maxleaves+1][maxnodes+1]; /* {possibilities[i,k] tells the result of using quartets on the insertion #k // of species i between node above and node below into the tree treetrue.} */ struct bestinsertion overall[maxleaves+1] ; int insertioncounter; float minstrength, maxstrength; float bestdiff; float theta; float topdiff; int testflag; int totalallowed; float totalsite; int insertioncount; int actualleaves; FILE *outcomment; FILE *int4list; boolean speciescheck[maxleaves+1] ; boolean consider[maxleaves+1]; boolean localonly; int localspecies; int actualnode; int toptrue[maxnodes], topnew[maxnodes]; int belowtrueleft[maxnodes], belownewleft[maxnodes]; int belowtrueright[maxnodes], belownewright[maxnodes] ; int speciestrue[maxnodes], speciesnew[maxnodes]; int species[maxnodes]; float minsignalabs; int relabel[maxnodes+1], backlabel[maxnodes+1] ; int prettylabels; boolean nodedone[maxnodes+1] ; boolean passfoundone; int topneat[maxnodes+1]; int belowneatleft[maxnodes+1], belowneatright[maxnodes+1]; int speciesneat[maxnodes+1]; int rootneat; boolean neatnodedone[maxnodes+1]; int locationneat, pastlocationneat; void addpermanently( int,int,int); void readlnt4(void); void finishup(void); void addnewestinteractively(void); void trynewestwrite(void); void checkconsistencytrue(void); void checkconsistencylocal(int, int); void summarizechoices(void); void writetree(int); void setupbestsignal(void); void readlnt4(void) { char ch; ch = fgetc(int4list); while ( (ch != '\r') && (ch != EOF) && (ch != '\n')) {ch = fgetc(int4list); } } void finishup(void) { fclose(outcomment); fclose(int4list); printf("Program has ended."); } int main(void) { int i1, j1, i; int temp1, temp2, temp3; int answer; void readlnt4(void); void finishup(void); void addnewestinteractively(void); void trynewestwrite(void); void checkconsistencytrue(void); void setupbestsignal(void); void writetree(); boolean found, passdone, done; float bestdiff; blocklength = 4* sizeof (t4record.first) + 4*sizeof (t4record.aabb) + sizeof (t4record.next); writeperm = false; writequartets = false; int4list = fopen("T4klist.Thetaktree", "r"); outcomment = fopen ("Thetaktree.output", "w"); fscanf(int4list, "%d%d", &actualleaves, &flag); printf("Actualleaves = %d, flag = %d\n", actualleaves, flag); readlnt4(); if (flag == 1) ignoregaps = true; else ignoregaps = false; if ((actualleaves > maxleaves) || (actualleaves < 0)) { fprintf (outcomment, "Wrong number of leaves\n"); printf("Wrong number of leaves\n"); finishup(); } for (i1 = 0; i1= maxstringlength) done = true; } } if (idenlength >= maxstringlength) readlnt4(); printf("Idenlength = %d\n", idenlength); for (i1 = 1; i1 <= idenlength; i1++) { fputc( identification[i1], outcomment); putchar(identification[i1]); } fputc('\n',outcomment); putchar('\n'); /* {read in the labels for species} */ for (i1 = 1; i1 <= actualleaves; i1++) for (j1 = 0; j1= labellength-1) done = true; } } if (i != i1) { fprintf(outcomment, "****** Mismatch of species in file T4list.Thetaktree\n"); fprintf(outcomment, " Species should be in numerical order.\n"); finishup(); } } /* print out the labels of the species */ for (i1=1; i1 <= actualleaves; i1++) {fprintf(outcomment, "%2d ", i1); for (j1=0; j1< labellength; j1++) fputc(labels[i1][j1], outcomment); fputc('\n', outcomment); } for (i1=1; i1 <= actualleaves; i1++) {printf( "%2d ", i1); for (j1=0; j1< labellength; j1++) putchar(labels[i1][j1]); putchar('\n'); } printf("Enter the number of the outgroup taxon.\n"); scanf("%d", &outgroup); if ((outgroup < 1) || (outgroup > actualleaves) ) { printf(" Illegal choice of outgroup. "); outgroup = actualleaves; } printf("Outgroup selected is species %d\n", outgroup); fprintf(outcomment, "Outgroup selected is species %d\n", outgroup); /* {read in the actual quartets} */ head = (struct t4 *) malloc(blocklength); done = false; current = head; while (! done) { fscanf(int4list,"%d%d%d%d%f%f%f", &((*current).first), &((*current).second), &((*current).third), &((*current).fourth), &((*current).aabb), &((*current).abab), &((*current).abba)); diff= (*current).aabb - (*current).abab; diff2 = (*current).aabb - (*current).abba; if (diff2 < diff) diff = diff2; (*current).abs = diff; if ((*current).first == 0) { done = true; (*last).next = NULL; } else { pending = (struct t4 *) malloc(blocklength); (*current).next = pending; last = current; current = pending; } } fputc('\n', outcomment); fprintf(outcomment, "The head quartet is %d %d %d %d with sites %5.2f %5.2f %5.2f\n", (*head).first, (*head).second, (*head).third, (*head).fourth, (*head).aabb, (*head).abab, (*head).abba); fprintf(outcomment, "The last quartet is %d %d %d %d with sites %5.2f %5.2f %5.2f\n", (*last).first, (*last).second, (*last).third, (*last).fourth, (*last).aabb, (*last).abab, (*last).abba); /* {find a quartet for starting the induction} // {we suggest the best allowed quartet} */ found = false; current = head; bestdiff = -100000.0; bestpt = NULL; while (! found) { if ((*current).abs > bestdiff) { bestpt = current; bestdiff = (*current).abs; } if ((*current).next == NULL) found = true; else current = (*current).next; } /*{Now bestpt contains the best quartet that is allowed, or else NULL if none is allowed.} */ if (bestpt == NULL) finishup(); globalinconsabs = -(*bestpt).abs; localinconsabs = -(*bestpt).abs; species[1] = (*bestpt).first; species[2] = (*bestpt).second; species[3] = (*bestpt).third; species[4] = (*bestpt).fourth; ssize = 4; speciescheck[species[1]] = true; speciescheck[species[2]] = true; speciescheck[species[3]] = true; speciescheck[species[4]] = true; fprintf(outcomment, "We start with the following tree: "); fprintf(outcomment, "(( %d %d ) ( %d %d))\n", (*bestpt).first, (*bestpt).second, (*bestpt).third, (*bestpt).fourth ); fprintf(outcomment, "It has site measure %6.3f and runnerup measures %6.3f and %6.3f\n\n", (*bestpt).aabb, (*bestpt).abab, (*bestpt).abba ); printf( "We start with the following tree: "); printf( "(( %d %d ) ( %d %d))\n", (*bestpt).first, (*bestpt).second, (*bestpt).third, (*bestpt).fourth ); printf( "It has site measure %6.3f and runnerup measures %6.3f and %6.3f\n\n", (*bestpt).aabb, (*bestpt).abab, (*bestpt).abba ); /*{create the initial tree using the most parsimonious tree of the first four species} */ toptrue[1] = 5; belowtrueleft[1] = 0; belowtrueright[1] = 0; speciestrue[1] = (*bestpt).first; toptrue[2] = 5; belowtrueleft[2] = 0; belowtrueright[2] = 0; speciestrue[2] = (*bestpt).second; toptrue[3] = 6; belowtrueleft[3] = 0; belowtrueright[3] = 0; speciestrue[3] = (*bestpt).third; toptrue[4] = 6; belowtrueleft[4] = 0; belowtrueright[4] = 0; speciestrue[4] = (*bestpt).fourth; toptrue[5] = 7; belowtrueleft[5] = 1; belowtrueright[5] = 2; speciestrue[5] = 0; toptrue[6] = 7; belowtrueleft[6] = 3; belowtrueright[6] = 4; speciestrue[6] = 0; toptrue[7] = 0; belowtrueleft[7] = 5; belowtrueright[7] = 6; speciestrue[7] = 0; actualnode = 7; done = false; while (! done) { line40: printf("Enter 1 to quit, doing slow summaries for the remaining species.\n"); printf("Enter 2 to quit quickly, without slow computations for remaining species.\n"); printf("Enter 3 to give the results of placements for all species and use one placement with strongest signal.\n"); printf("Enter 4 to use strongest signal placements automatically until all possible species are placed.\n"); printf("Enter 5 to choose a placement interactively.\n"); scanf("%d", &answer); if ((answer <1) || (answer > 5)) goto line40; if (answer == 3) { place = true; setupbestsignal(); goto line40; } if (answer == 4) { place = true; done = false; while (! done) { casechanged = false; setupbestsignal(); if (! casechanged) done = true; } goto line40; } if (answer == 5) { printf("The following species remain:"); for (i1 = 1; i1<= actualleaves; i1++) if ( ! speciescheck[i1]) printf("%d ", i1); putchar('\n'); printf(" Enter the next species to be used, or enter 0 to exit loop.\n"); scanf("%d", &newest); if (newest < 1) goto line40; if (newest > actualleaves) { printf(" Species meaningless."); goto line40; } printf("We try to insert species %d\n", newest); for (i1 = 1; i1<= ssize; i1++) if (species[i1]==newest) { printf(" Species was already used. Try again. \n"); goto line40; } addnewestinteractively(); goto line40; } if (answer == 1) { done = true; quickly = false; } if (answer == 2) { done = true; quickly = true; } } for (i1 = 1; i1<= maxnodes; i1++) { relabel[i1] = 0; nodedone[i1] = false; topnew[i1] = 0; speciesnew[i1] = 0; belownewleft[i1] = 0; belownewright[i1] = 0; } for (i1 = 1; i1<= maxnodes; i1++) backlabel[i1] = 0; for (i1 = 1; i1<= actualleaves; i1++) speciesnew[i1] = i1; /*{identify the leaves with their species} */ for (i1 = 1; i1<= actualnode;i1++) if (belowtrueleft[i1] == 0) { relabel[i1] = speciestrue[i1]; backlabel[speciestrue[i1]]= i1; nodedone[i1] = true; } prettylabels = actualleaves; relabel[0] = 0; backlabel[0] = 0; /*{go through the actual tree. If both nodes below it have relabel defined, //then give it a pretty label relabel.} */ done = false; while (! done) { passfoundone= false; for (i1 = 1; i1<= actualnode; i1++) if(! nodedone[i1]) if (nodedone[belowtrueleft[i1]] && nodedone[belowtrueright[i1]]) { prettylabels = prettylabels + 1; nodedone[i1] = true; relabel[i1] = prettylabels; backlabel[prettylabels] = i1; passfoundone = true; belownewleft[prettylabels] = relabel[ belowtrueleft[i1]]; belownewright[prettylabels] = relabel[belowtrueright[i1]]; topnew[ relabel[belowtrueleft[i1]]] = prettylabels; topnew[ relabel[belowtrueright[i1]]] = prettylabels; } if (! passfoundone) done = true; } /* {prepare a neat version of the tree that takes into account the location of the outgroup. The root is at rootneat. It has three children. One child is the outgroup. Its other children form the rest of the tree. Species correspond to their own number.} {initially the neat tree is a copy of the pretty tree} */ for (i1 = 1; i1<= maxnodes; i1++) { topneat[i1] = 0; belowneatleft[i1] = 0; belowneatright[i1] = 0; speciesneat[i1] = 0; neatnodedone[i1]= false; } neatnodedone[0] = false; for (i1 = 1; i1<= prettylabels; i1++) { topneat[i1] = relabel[toptrue[backlabel[i1]]]; belowneatleft[i1] = relabel[belowtrueleft[backlabel[i1]]]; belowneatright[i1] = relabel[belowtrueright[backlabel[i1]]]; } for (i1 = 1; i1<= actualleaves; i1++) speciesneat[i1] = i1; if (topneat[outgroup] == 0) { for (i1 = 1; i1<= actualleaves; i1++) if (topneat[i1] != 0) outgroup = i1; fprintf(outcomment, "The indicated outgroup was missing from the final tree.\n"); fprintf(outcomment, "The outgroup was instead selected to be %d.\n", outgroup); } rootneat = topneat[outgroup]; temp1 = topneat[rootneat]; temp2 = belowneatleft[rootneat]; temp3 = belowneatright[rootneat]; if (temp2 == outgroup) { if (temp1 < temp3) { belowneatleft[rootneat] = temp1; belowneatright[rootneat] = temp3; } else { belowneatleft[rootneat] = temp3; belowneatright[rootneat] = temp1; } } else { if (temp1 < temp2) { belowneatleft[rootneat] = temp1; belowneatright[rootneat] = temp2; } else { belowneatleft[rootneat] = temp2; belowneatright[rootneat] = temp1; } } topneat[rootneat] = 0; neatnodedone[rootneat] = true; neatnodedone[outgroup] = true; /* {now we modify the neat tree, changing directions; for each node other // than root or outgroup, down means away from the root} */ done = false; while (! done) { passdone = true; for (i1 = 1; i1<= prettylabels; i1++) { if (! neatnodedone[i1]) if ( neatnodedone[topneat[i1]] || neatnodedone[belowneatleft[i1]] || neatnodedone[belowneatright[i1]]) { passdone= false; temp1 = topneat[i1]; temp2 = belowneatleft[i1]; temp3 = belowneatright[i1]; if (neatnodedone[temp1]) { topneat[i1] = temp1; if (temp2 < temp3) { belowneatleft[i1] = temp2; belowneatright[i1] = temp3; } else { belowneatleft[i1] = temp3; belowneatright[i1] = temp2; } } else if (neatnodedone[temp2]) { topneat[i1] = temp2; if (temp1 < temp3) { belowneatleft[i1] = temp1; belowneatright[i1] = temp3; } else { belowneatleft[i1] = temp3; belowneatright[i1] = temp1; } } else { topneat[i1] = temp3; if (temp1 < temp2) { belowneatleft[i1] = temp1; belowneatright[i1] = temp2; } else { belowneatleft[i1] = temp2; belowneatright[i1] = temp1; } } neatnodedone[i1] = true; } } if (passdone) done = true; } fprintf(outcomment, "Here is a description of the final tree in neat form.\n"); fprintf(outcomment, "The root is at node %d.\n", rootneat); for (i1 = 1; i1<= prettylabels; i1++) { fputc('\n',outcomment); fprintf(outcomment, "Node %d\n", i1); if (speciesneat[i1] != 0) { fprintf(outcomment, "It is a leaf for "); for (j1=0; j1< labellength; j1++) fputc( labels[speciesneat[i1]][j1], outcomment); fputc('\n', outcomment); } else { fprintf(outcomment, "This node reaches down to left and right nodes "); fprintf(outcomment,"%d and %d\n", belowneatleft[i1], belowneatright[i1]); if (i1 == rootneat) fprintf(outcomment, "This node is the root and also reaches down to the outgroup %d\n", outgroup); } if (topneat[i1] != 0) { fprintf(outcomment, "This node reaches up to node %d\n", topneat[i1]); } } fputc('\n',outcomment); fprintf(outcomment, "\n**********************************************\n\n"); fprintf(outcomment, "Here is a description of the final tree in parenthesis form.\n"); writetree(rootneat); fprintf(outcomment, "\n\n"); done = true; for (i1 = 1; i1<= actualleaves; i1++) if (! speciescheck[i1]) done = false; if (! done) if (! quickly) { fputc('\n', outcomment); fprintf(outcomment, "Here is placement information on the remaining species.\n"); for (newest = 1; newest <= actualleaves; newest++) if (! speciescheck[newest]) { writequartets = false; writeperm = true; writemonitor = false; trynewestwrite(); } fputc('\n', outcomment); } fprintf(outcomment, "Following are any quartets which are inconsistent with the final tree.\n\n"); writemonitor = false; writequartets = true; localonly = false; checkconsistencytrue(); finishup(); return(1); } void addnewestinteractively(void) { int j1, k1, l1; int bestplacement; void addpermanently(); void checkconsistencylocal(); void summarizechoices(); insertioncounter= 0; for (j1 = 1; j1<= actualnode; j1++) for (k1 = 1; k1 <= actualnode; k1++) if ((k1 == belowtrueleft[j1]) || (k1 == belowtrueright[j1])) { insertioncounter = insertioncounter+1; possibilities[newest][ insertioncounter].above = j1; possibilities[newest][ insertioncounter].below = k1; } for (l1 = 1; l1<= insertioncounter; l1++) { checkconsistencylocal(newest,l1); } printf("Possible placements for species %d\n", newest); fputc('\n',outcomment); fprintf(outcomment, "Possible placements for species %d\n", newest); for (l1 = 1; l1<= insertioncounter; l1++) { printf("Placement %d has Iabs= %6.3f, badtreecount = %d\n", l1, possibilities[newest][l1].eliminationabs, possibilities[newest][l1].inconstreecount); fprintf(outcomment, "Placement %d has Iabs= %6.3f, badtreecount = %d\n", l1, possibilities[newest][l1].eliminationabs, possibilities[newest][l1].inconstreecount); } printf("** The placements "); fprintf(outcomment, "** The placements "); for (l1 = 1; l1<= insertioncounter; l1++) if ((possibilities[newest][l1].above == 7) || (possibilities[newest][l1].below == 7)) { printf("%d ", l1); fprintf(outcomment, "%d ", l1); } printf("are equivalent and redundant.\n"); fprintf(outcomment, "are equivalent and redundant.\n"); writemonitor = true; summarizechoices(); printf("Enter the placement number to use for placing species %d or enter 0 to choose none.\n", newest); scanf("%d", &bestplacement); if ((1 <= bestplacement) && (bestplacement <= insertioncounter)) { fprintf(outcomment,"******Add species %d at placement %d.\n",newest, bestplacement); printf("******Add species %d at placement %d.\n",newest, bestplacement); addpermanently(newest,possibilities[newest][bestplacement].above, possibilities[newest][bestplacement].below); } else fprintf(outcomment, "******No placement was selected.\n"); } void trynewestwrite(void) /* {This procedure tries to tell the result of inserting species 'newest' into the tree. It assumes that newest has not already been used. It summarizes the results of the trial but it never actually changes the tree. It writes out the results of trials. */ { int j1, k1, l1; void addpermanently(); void checkconsistencylocal(); void summarizechoices(); insertioncounter= 0; for (j1 = 1; j1<= actualnode; j1++) for (k1 = 1; k1 <= actualnode; k1++) if ((k1 == belowtrueleft[j1]) || (k1 == belowtrueright[j1])) /* {insert the link between j1 (above) and k1 (below)} */ { insertioncounter = insertioncounter+1; possibilities[newest][ insertioncounter].above = j1; possibilities[newest][ insertioncounter].below = k1; } for (l1 = 1; l1<= insertioncounter; l1++) { checkconsistencylocal(newest,l1); } fputc('\n',outcomment); fprintf(outcomment, "Possible placements for species %d\n", newest); for (l1 = 1; l1<= insertioncounter; l1++) { fprintf(outcomment, "Placement %d between nodes %d and %d has Iabs= %6.3f, badtreecount = %d\n", l1, relabel[possibilities[newest][l1].above], relabel[possibilities[newest][l1].below], possibilities[newest][l1].eliminationabs, possibilities[newest][l1].inconstreecount); } fprintf(outcomment, "** The placements "); for (l1 = 1; l1<= insertioncounter; l1++) if ((possibilities[newest][l1].above == 7) || (possibilities[newest][l1].below == 7)) { fprintf(outcomment, "%d ", l1); } fprintf(outcomment, "are equivalent and redundant.\n"); writemonitor = false; summarizechoices(); } void addpermanently( besti,above,below) int besti; int above; int below; /*{procedure addpermanently(besti,bestplace: integer); //this procedure adds species besti at placement between nodes above and below //permanently to treetrue} */ { void checkconsistencytrue(); actualnode = actualnode + 1; speciestrue[actualnode] = besti; toptrue[actualnode] = actualnode + 1; belowtrueleft[actualnode] = 0; belowtrueright[actualnode] = 0; actualnode = actualnode + 1; speciestrue[actualnode] = 0; belowtrueleft[actualnode] = actualnode -1; belowtrueright[actualnode] = below; toptrue[actualnode] = above; toptrue[below]= actualnode; if (belowtrueleft[above] == below) belowtrueleft[above] = actualnode; else belowtrueright[above] = actualnode; speciescheck[besti] = true; ssize = ssize + 1; species[ssize] = besti; localonly = true; localspecies = besti; checkconsistencytrue(); fprintf(outcomment, "With regard to quartets involving species %d we have:\n", besti); fprintf(outcomment, " The measure of absolute inconsistency was %6.3f\n",inconsabs); fprintf(outcomment, " The number of inconsistent quartets was %d.\n\n", inconstreecount); printf( "With regard to quartets involving species %d we have:\n", besti); printf( " The measure of absolute inconsistency was %6.3f\n",inconsabs); printf( " The number of inconsistent quartets was %d.\n\n", inconstreecount); } void checkconsistencytrue() /*{This procedure tells whether the tree with toptrue, speciestrue, belowtrueleft, etc. is consistent with the members of t4list. If so, then allconsistent is set equal to true, while otherwise allconsistent is set equal to false. It is possible that not all the species lie in the tree; in that case, consistency with a tree of fewer than 4 vertices is regarded as trivial and is regarded as true. The absolute inconsistency is placed in inconsabs; the normed inconsistency in inconsnorm; the experimental inconsistency in inconsexpt; the number of trees inconsistent in inconstreecount. Nothing is written out. This procedure is the workhorse for much of the program. If the flag localonly = true, then only quartets including the species localspecies are considered in the computations. } */ { int first2,second2,third2,fourth2; int k1; float temp; int topcopy[maxnodes+1] ; boolean done; int target[5]; int belowcopyright[maxnodes+1], belowcopyleft[maxnodes+1] ; int speciescopy[maxnodes+1] ; int leafspecies; int targetleaf; int father, brother, grandpa; int test1, test2; int countinconsistent; float maxabs; boolean remainingleaves[maxleaves+1]; current = head; done = false; countinconsistent = 0; maxabs= -100000.0; while (! done) { /* {make a copy of the treetrue to be manipulated; the copy is topcopy,nodecopy,belowcopy} */ for (k1 = 1; k1<= actualnode; k1++) { topcopy[k1] = toptrue[k1]; belowcopyleft[k1] = belowtrueleft[k1]; belowcopyright[k1] = belowtrueright[k1]; speciescopy[k1] = speciestrue[k1]; } first2 = (*current).first; second2 = (*current).second; third2 = (*current).third; fourth2 = (*current).fourth; target[1] = first2; target[2] = second2; target[3] = third2; target[4] = fourth2; if (localonly) if (!(localspecies == target[1])) if (!(localspecies == target[2])) if (!(localspecies == target[3])) if (!(localspecies == target[4])) { goto line35; } for (k1= 1; k1 <= actualnode; k1++) { if ((belowcopyleft[k1] == 0) && (belowcopyright[k1] == 0)) if (topcopy[k1] != 0) { leafspecies = speciescopy[k1]; if (leafspecies == (*current).first) targetleaf = k1; if (!(leafspecies == target[1])) if (!(leafspecies == target[2])) if (!(leafspecies == target[3])) if (!(leafspecies == target[4])) { father = topcopy[k1]; grandpa = topcopy[father]; brother = belowcopyleft[father]; if (brother == k1) brother = belowcopyright[father]; if (grandpa != 0) { topcopy[k1] = 0; topcopy[brother] = grandpa; topcopy[father] = 0; if (belowcopyright[grandpa] == father) belowcopyright[grandpa] = brother; else belowcopyleft[grandpa] = brother; belowcopyright[father] = 0; belowcopyleft[father] = 0; } else { topcopy[k1] = 0; topcopy[father] = 0; topcopy[brother] = 0; belowcopyright[father] = 0; belowcopyleft[father] = 0; } } } } /* {now the copied tree should contain only the leaves of the target plus internal nodes plus some isolated vertices with top 0 and empty below} {we need to check for consistency only if all four of the target leaves are still leaves of the main tree body; otherwise consistency is trivial} {find the set of leaves connected to the main tree body} */ for (k1 = 1; k1 <= actualleaves; k1++) remainingleaves [k1] = false; for (k1 = 1; k1 <= actualnode; k1++) { if ((belowcopyleft[k1] == 0) && (belowcopyright[k1] == 0)) if (topcopy[k1] != 0) remainingleaves [speciescopy[k1]] = true; } if (remainingleaves[target[1]]) if (remainingleaves[target[2]]) if (remainingleaves[target[3]]) if (remainingleaves[target[4]]) { father = topcopy[targetleaf]; brother = belowcopyright[father]; if (brother == targetleaf) brother = belowcopyleft[father]; /* {This is the true brother if it is a leaf; otherwise we must switch} */ if (belowcopyleft[brother] != 0) if (topcopy[father] == 0) { /* {if the father is the root then we must switch so the new brother is the son of the apparent brother that is also a leaf} */ test1 = belowcopyleft[brother]; test2 = belowcopyright[brother]; if (belowcopyleft[test1] == 0) brother = test1; else brother = test2; } else /* {if the father is not the root then // we must switch so the new brother is the leaf on the other side of the root} */ { grandpa = topcopy[father]; brother = belowcopyleft[grandpa]; if (brother == father) brother = belowcopyright[grandpa]; } if (speciescopy[brother] == (*current).second) { if (maxabs < -(*current).abs) maxabs = -(*current).abs; } else { if (speciescopy[brother] == (*current).third) temp = (*current).aabb - (*current).abab; else temp = (*current).aabb - (*current).abba; if (temp > 0.0) { countinconsistent = countinconsistent + 1; if (maxabs < temp) maxabs = temp; if (writequartets) { fprintf(outcomment, "Tree ((%d %d) (%d %d)) has gap %6.3f\n", (*current).first, (*current).second, (*current).third, (*current).fourth, temp); fprintf(outcomment, " It has site measures %6.3f %6.3f %6.3f while the tree matches %d with %d.\n", (*current).aabb, (*current).abab, (*current).abba, (*current).first, speciescopy[brother]); } } } } line35: if ((*current).next == NULL) done = true; else current = (*current).next; } inconstreecount = countinconsistent; inconsabs = maxabs; if (writequartets) { fprintf(outcomment, "\n\nThe global inconsistency measure is %6.3f.\n", maxabs); fprintf(outcomment, "The number of inconsistent quartets is %d.\n", countinconsistent); } } void checkconsistencylocal(i1,l1) int i1; int l1; /*{This procedure tells whether the tree with toptrue, speciestrue, belowtrueleft, etc. with insertions of i1 at possibility l1 is consistent with the members of t4list. It alters the entry possibilities[i1,l1].eliminationabs to the local absolute inconsistency, and the entry possibilities[i1,l1].eliminationnorm to the local normed inconsistency. Only quartets containing i1 are considered. } */ { actualnode = actualnode + 1; speciestrue[actualnode] = i1; newest = i1; toptrue[actualnode] = actualnode + 1; belowtrueleft[actualnode] = 0; belowtrueright[actualnode] = 0; actualnode = actualnode + 1; speciestrue[actualnode] = 0; belowtrueleft[actualnode] = actualnode -1; belowtrueright[actualnode] = possibilities[i1][l1].below; toptrue[actualnode] = possibilities[i1][l1].above; toptrue[possibilities[i1][l1].below] = actualnode; if (belowtrueleft[possibilities[i1][l1].above] == possibilities[i1][l1].below) belowtrueleft[possibilities[i1][l1].above] = actualnode; else belowtrueright[possibilities[i1][l1].above] = actualnode; localonly = true; localspecies = i1; checkconsistencytrue(); possibilities[i1][l1].eliminationabs = inconsabs; possibilities[i1][l1].inconstreecount = inconstreecount; toptrue[possibilities[i1][l1].below] = possibilities[i1][l1].above; if (belowtrueleft[possibilities[i1][l1].above] == actualnode) belowtrueleft[possibilities[i1][l1].above] = possibilities[i1][l1].below; else belowtrueright[possibilities[i1][l1].above] = possibilities[i1][l1].below; actualnode = actualnode - 2; } void summarizechoices() /*{this procedure summarizes all the possible placement choices //for placing species newest.} */ { int l1; float minimum, minimumrunnerup; int minimumint, minimumrunnerupint; boolean minatroot; int minimumcount; minimum = 100000.0; minimumrunnerup = 100000.0; minatroot = false; for (l1 = 1; l1 <= insertioncounter; l1++) if (possibilities[newest][l1].eliminationabs < minimum) minimum = possibilities[newest][l1].eliminationabs; minimumcount = 0; for (l1 = 1; l1<= insertioncounter; l1++) if (possibilities[newest][l1].eliminationabs == minimum) { minimumcount = minimumcount + 1; if ((possibilities[newest][l1].above == 7) || (possibilities[newest][l1].below == 7)) minatroot = true; } if (minatroot) minimumcount = minimumcount -1; for (l1 = 1; l1<= insertioncounter; l1++) if ((possibilities[newest][l1].eliminationabs > minimum) && (possibilities[newest][l1].eliminationabs < minimumrunnerup)) minimumrunnerup = possibilities[newest][l1].eliminationabs; if (writemonitor) printf("** Lowest absolute inconsistency is at placements "); fprintf(outcomment, "** Lowest absolute inconsistency is at placements "); for (l1 = 1; l1<= insertioncounter; l1++) if ( possibilities[newest][l1].eliminationabs == minimum) { if (writemonitor) printf("%d ", l1); fprintf(outcomment, "%d ", l1); } if (writemonitor) printf("with I= %6.3f and group signal strength %6.3f\n",minimum, (minimumrunnerup - minimum)); fprintf(outcomment, "with I= %6.3f and group signal strength %6.3f\n",minimum, (minimumrunnerup - minimum)); minimumint = 32000; minimumrunnerupint = 32000; minatroot = false; for (l1 = 1; l1<= insertioncounter; l1++) if (possibilities[newest][l1].inconstreecount < minimumint) minimumint= possibilities[newest][l1].inconstreecount; minimumcount = 0; for (l1 = 1; l1<= insertioncounter; l1++) if (possibilities[newest][l1].inconstreecount == minimumint) { minimumcount = minimumcount + 1; if ((possibilities[newest][l1].above == 7) || (possibilities[newest][l1].below == 7)) minatroot = true; } if (minatroot) minimumcount = minimumcount -1; for (l1 = 1; l1<= insertioncounter; l1++) if ((possibilities[newest][l1].inconstreecount > minimumint) && (possibilities[newest][l1].inconstreecount < minimumrunnerupint)) minimumrunnerupint = possibilities[newest][l1].inconstreecount; if (writemonitor) printf("** Lowest number of bad quartets is at placements "); fprintf(outcomment, "** Lowest number of bad quartets is at placements "); for (l1 = 1; l1<= insertioncounter; l1++) if (possibilities[newest][l1].inconstreecount == minimumint) { if (writemonitor) printf("%d ", l1); fprintf(outcomment,"%d ", l1); } if (writemonitor) printf("with count= %d and group signal strength %d\n", minimumint, (minimumrunnerupint - minimumint)); fprintf(outcomment, "with count= %d and group signal strength %d\n", minimumint, (minimumrunnerupint - minimumint)); } void writetree(node) int node; { if (node == rootneat) { fprintf(outcomment, "(%d ", outgroup); if (belowneatleft[node] != 0 ) writetree(belowneatleft[node]); fputc(' ',outcomment); if (belowneatright[node] != 0) writetree(belowneatright[node]); fputc(')', outcomment); } else if (speciesneat[node] != 0) fprintf(outcomment,"%d", speciesneat[node]); else if ((belowneatleft[node] != 0) && (belowneatleft[node] != 0)) { fputc('(', outcomment); writetree(belowneatleft[node]); fputc(' ', outcomment); writetree(belowneatright[node]); fputc(')', outcomment); } else { if (belowneatleft[node] != 0) writetree(belowneatleft[node]); if (belowneatright[node] != 0) writetree(belowneatright[node]); } } void setupbestsignal(void) /* {this procedure sets up the data structure for comparing choices of which species to be inserted into the tree treetrue. For each allowed species, it computes for each possible insertion the theta value if any that would eliminate that possibility using quartets with that species and species already in the tree. It then selects the insertion of strongest signal strength.} */ { int i1, j1, k1, l1; float minimum, minimumrunnerup; int bestsp; float bestabs; int bestplacement, minimumcount; boolean minatroot; insertioncounter = 0; for (i1 = 1; i1<= actualleaves; i1++) if (!speciescheck[i1]) { for (j1 = 1; j1<= actualnode; j1++) for (k1 = 1; k1 <= actualnode; k1++) if ((k1 == belowtrueleft[j1]) || (k1 == belowtrueright[j1])) { insertioncounter = insertioncounter+1; possibilities[i1][ insertioncounter].above = j1; possibilities[i1][ insertioncounter].below = k1; } for (l1 = 1; l1<= insertioncounter; l1++) { checkconsistencylocal(i1,l1); } minimum = 100000.0; minimumrunnerup = 100000.0; minatroot = false; bestplacement = 0; for (l1 = 1; l1 <= insertioncounter; l1++) if (possibilities[i1][l1].eliminationabs < minimum) { minimum = possibilities[i1][l1].eliminationabs; bestplacement = l1; } minimumcount = 0; for (l1 = 1; l1<= insertioncounter; l1++) if (possibilities[i1][l1].eliminationabs == minimum) { minimumcount = minimumcount + 1; if ((possibilities[i1][l1].above == 7) || (possibilities[i1][l1].below == 7)) minatroot = true; } if (minatroot) minimumcount = minimumcount -1; for (l1 = 1; l1<= insertioncounter; l1++) if ((possibilities[i1][l1].eliminationabs > minimum) && (possibilities[i1][l1].eliminationabs < minimumrunnerup)) minimumrunnerup = possibilities[i1][l1].eliminationabs; if (minimumcount == 1) { overall[i1].bestabs = minimum; overall[i1].runnerup = minimumrunnerup; overall[i1].bestplace = bestplacement; } else { overall[i1].bestabs = minimum; overall[i1].runnerup = minimum; overall[i1].bestplace = 0; } } printf("Summary of the requirements for insertion.\n"); fprintf(outcomment, "Summary of the requirements for insertion.\n"); for (i1 = 1; i1<= actualleaves; i1++) if (! speciescheck[i1]) { if (overall[i1].bestabs < overall[i1].runnerup) { printf("Species %d can be placed with strength = %6.3f and with inconsistency %6.3f.\n", i1, (overall[i1].runnerup-overall[i1].bestabs), overall[i1].bestabs); fprintf(outcomment, "Species %d can be placed with strength = %6.3f and with inconsistency %6.3f.\n", i1, (overall[i1].runnerup-overall[i1].bestabs), overall[i1].bestabs); } else { printf("Species %d cannot be placed because the best and the runnerup are %6.3f.\n", i1, overall[i1].bestabs); fprintf(outcomment, "Species %d cannot be placed because the best and the runnerup are %6.3f.\n", i1, overall[i1].bestabs); } } bestsp = 0; bestabs = -100000.0; for (i1 = 1l; i1<= actualleaves; i1++) if (! speciescheck[i1]) if (overall[i1].runnerup - overall[i1].bestabs > bestabs) { bestsp = i1; bestabs = overall[i1].runnerup - overall[i1].bestabs; } if (bestsp != 0) { if (overall[bestsp].bestplace != 0) { casechanged = true; printf("****Placement of species %d via signal strength %6.3f.\n", bestsp, overall[bestsp].runnerup - overall[bestsp].bestabs); fprintf(outcomment, "****Placement of species %d via signal strength %6.3f.\n", bestsp, overall[bestsp].runnerup - overall[bestsp].bestabs); addpermanently(bestsp,possibilities[bestsp][overall[bestsp].bestplace].above, possibilities[bestsp][overall[bestsp].bestplace].below); } } else casechanged = false; }