24 #ifndef DOXYGEN_SHOULD_SKIP_THIS 27 #define NO_CHILD ((int32_t)-1073741824) 29 #define WEIGHTS_IN_TRIE 32 #ifdef TRIE_CHECK_EVERYTHING 33 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x) 35 #define TRIE_ASSERT_EVERYTHING(x) 39 #define TRIE_ASSERT(x) 41 #define TRIE_TERMINAL_CHARACTER 7 59 #ifdef TRIE_CHECK_EVERYTHING 88 #ifdef TRIE_CHECK_EVERYTHING 106 struct TreeParseInfo {
133 #endif // DOXYGEN_SHOULD_SKIP_THIS 137 #define IGNORE_IN_CLASSLIST 168 CTrie(int32_t d,
bool p_use_compact_terminal_nodes=
true);
171 CTrie(
const CTrie & to_copy);
175 const CTrie & operator=(
const CTrie & to_copy);
184 bool compare_traverse(
185 int32_t
node,
const CTrie & other, int32_t other_node);
192 bool compare(
const CTrie & other);
200 bool find_node(int32_t node, int32_t * trace, int32_t &trace_len)
const;
208 int32_t find_deepest_node(
209 int32_t start_node, int32_t &deepest_node)
const;
215 void display_node(int32_t node)
const;
224 void set_degree(int32_t d);
232 void create(int32_t len,
bool p_use_compact_terminal_nodes=
true);
239 void delete_trees(
bool p_use_compact_terminal_nodes=
true);
252 int32_t i, int32_t seq_offset, int32_t* vec,
float32_t alpha,
253 float64_t *weights,
bool degree_times_position_weights);
261 float64_t compute_abs_weights_tree(int32_t tree, int32_t depth);
268 float64_t* compute_abs_weights(int32_t &len);
283 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
285 bool degree_times_position_weights) ;
301 void compute_by_tree_helper(
302 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
304 int32_t mkl_stepsize,
float64_t * weights,
305 bool degree_times_position_weights);
321 void compute_scoring_helper(
322 int32_t tree, int32_t i, int32_t j,
float64_t weight, int32_t d,
323 int32_t max_degree, int32_t num_feat, int32_t num_sym,
324 int32_t sym_offset, int32_t offs,
float64_t* result);
338 void add_example_to_tree_mismatch_recursion(
339 int32_t tree, int32_t i,
float64_t alpha, int32_t *vec,
340 int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec,
341 int32_t max_mismatch,
float64_t * weights);
353 int32_t tree,
const int32_t p,
struct TreeParseInfo info,
354 const int32_t depth, int32_t*
const x,
const int32_t k);
367 const struct TreeParseInfo info,
const int32_t p, int32_t* x,
376 int32_t compact_nodes(int32_t start_node, int32_t depth,
float64_t * weights);
387 int32_t pos, uint64_t seq, int32_t deg,
float64_t* weights);
398 void fill_backtracking_table_recursion(
399 Trie* tree, int32_t depth, uint64_t seq,
float64_t value,
400 DynArray<ConsensusEntry>* table,
float64_t* weights);
410 void fill_backtracking_table(
411 int32_t pos, DynArray<ConsensusEntry>* prev,
412 DynArray<ConsensusEntry>* cur,
bool cumulative,
420 void POIMs_extract_W(
float64_t*
const*
const W,
const int32_t K);
426 void POIMs_precalc_SLR(
const float64_t*
const distrib);
438 const int32_t parentIdx,
const int32_t sym,
const int32_t depth,
448 float64_t*
const*
const poims,
const int32_t K,
449 const int32_t debug);
457 return use_compact_terminal_nodes ;
466 bool p_use_compact_terminal_nodes)
468 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
486 position_weights=p_position_weights;
495 int32_t ret = TreeMemPtr++;
500 for (int32_t q=0; q<4; q++)
501 TreeMem[ret].child_weights[q]=0.0;
505 for (int32_t q=0; q<4; q++)
506 TreeMem[ret].children[q]=NO_CHILD;
508 #ifdef TRIE_CHECK_EVERYTHING 509 TreeMem[ret].has_seq=false ;
510 TreeMem[ret].has_floats=false ;
512 TreeMem[ret].weight=0.0;
519 if (TreeMemPtr+10 < TreeMemPtrMax)
521 SG_DEBUG(
"Extending TreeMem from %i to %i elements\n",
522 TreeMemPtrMax, (int32_t) ((
float64_t)TreeMemPtrMax*1.2));
523 int32_t old_sz=TreeMemPtrMax;
524 TreeMemPtrMax = (int32_t) ((
float64_t)TreeMemPtrMax*1.2);
525 TreeMem = SG_REALLOC(Trie, TreeMem, old_sz, TreeMemPtrMax);
534 weights_in_tree = weights_in_tree_;
543 return weights_in_tree;
555 void POIMs_extract_W_helper(
556 const int32_t nodeIdx,
const int32_t depth,
const int32_t offset,
557 const int32_t y0,
float64_t*
const*
const W,
const int32_t K);
571 void POIMs_calc_SLR_helper1(
572 const float64_t*
const distrib,
const int32_t i,
573 const int32_t nodeIdx, int32_t left_tries_idx[4],
574 const int32_t depth, int32_t
const lastSym,
float64_t* S,
588 void POIMs_calc_SLR_helper2(
589 const float64_t*
const distrib,
const int32_t i,
590 const int32_t nodeIdx, int32_t left_tries_idx[4],
603 void POIMs_add_SLR_helper1(
604 const int32_t nodeIdx,
const int32_t depth,
const int32_t i,
605 const int32_t y0,
float64_t*
const*
const poims,
const int32_t K,
606 const int32_t debug);
621 void POIMs_add_SLR_helper2(
622 float64_t*
const*
const poims,
const int32_t K,
const int32_t k,
623 const int32_t i,
const int32_t y,
const float64_t valW,
625 const int32_t debug);
628 virtual const char*
get_name()
const {
return "Trie"; }
660 template <
class Trie>
662 :
CSGObject(), degree(0), position_weights(NULL),
663 use_compact_terminal_nodes(false),
664 weights_in_tree(true)
677 template <
class Trie>
693 template <
class Trie>
698 if (to_copy.position_weights!=NULL)
715 for (int32_t i=0; i<
length; i++)
716 trees[i]=to_copy.trees[i];
721 template <
class Trie>
729 if (to_copy.position_weights!=NULL)
749 for (int32_t i=0; i<
length; i++)
750 trees[i]=to_copy.trees[i] ;
755 template <
class Trie>
757 int32_t start_node, int32_t& deepest_node)
const 759 #ifdef TRIE_CHECK_EVERYTHING 761 SG_DEBUG(
"start_node=%i\n", start_node)
763 if (start_node==NO_CHILD)
765 for (int32_t i=0; i<
length; i++)
767 int32_t my_deepest_node ;
769 SG_DEBUG(
"start_node %i depth=%i\n", i, depth)
772 deepest_node=my_deepest_node ;
779 if (
TreeMem[start_node].has_seq)
781 for (int32_t q=0; q<16; q++)
782 if (
TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER)
784 deepest_node=start_node ;
787 if (
TreeMem[start_node].has_floats)
789 deepest_node=start_node ;
793 for (int32_t q=0; q<4; q++)
795 int32_t my_deepest_node ;
796 if (
TreeMem[start_node].children[q]==NO_CHILD)
801 deepest_node=my_deepest_node ;
812 template <
class Trie>
814 int32_t start_node, int32_t depth,
float64_t * weights)
820 if (start_node==NO_CHILD)
822 for (int32_t i=0; i<
length; i++)
831 TRIE_ASSERT_EVERYTHING(
TreeMem[start_node].has_floats)
833 for (int32_t q=0; q<4; q++)
834 if (
TreeMem[start_node].child_weights[q]!=0.0)
840 TRIE_ASSERT_EVERYTHING(!
TreeMem[start_node].has_floats)
842 int32_t num_used = 0 ;
845 for (int32_t q=0; q<4; q++)
847 if (
TreeMem[start_node].children[q]==NO_CHILD)
856 for (int32_t q=0; q<4; q++)
858 if (
TreeMem[start_node].children[q]==NO_CHILD)
865 int32_t last_node=
TreeMem[start_node].children[q] ;
868 ASSERT(weights[depth]!=0.0)
874 #ifdef TRIE_CHECK_EVERYTHING 877 memset(
TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
878 for (int32_t n=0; n<num; n++)
881 ASSERT(last_node!=NO_CHILD)
884 TRIE_ASSERT_EVERYTHING(
TreeMem[last_node].has_floats)
887 if (
TreeMem[last_node].child_weights[k]!=0.0)
896 TRIE_ASSERT_EVERYTHING(!
TreeMem[last_node].has_floats)
899 if (
TreeMem[last_node].children[k]!=NO_CHILD)
904 last_node=
TreeMem[last_node].children[k] ;
907 TreeMem[start_node].children[q]=-node ;
921 template <
class Trie>
925 SG_DEBUG(
"checking nodes %i and %i\n", node, other_node)
926 if (fabs(
TreeMem[node].weight-other.TreeMem[other_node].weight)>=1e-5)
928 SG_DEBUG(
"CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node,
TreeMem[node].weight, other_node,other.TreeMem[other_node].weight)
929 SG_DEBUG(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n")
931 SG_DEBUG(
"============================================================\n")
932 other.display_node(other_node) ;
933 SG_DEBUG(
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n")
937 #ifdef TRIE_CHECK_EVERYTHING 938 if (
TreeMem[node].has_seq!=other.TreeMem[other_node].has_seq)
940 SG_DEBUG(
"CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node,
TreeMem[node].has_seq, other_node,other.TreeMem[other_node].has_seq)
941 SG_DEBUG(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n")
943 SG_DEBUG(
"============================================================\n")
944 other.display_node(other_node) ;
945 SG_DEBUG(
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n")
948 if (
TreeMem[node].has_floats!=other.TreeMem[other_node].has_floats)
950 SG_DEBUG(
"CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node,
TreeMem[node].has_floats, other_node, other.TreeMem[other_node].has_floats)
953 if (other.TreeMem[other_node].has_floats)
955 for (int32_t q=0; q<4; q++)
956 if (fabs(
TreeMem[node].child_weights[q]-other.TreeMem[other_node].child_weights[q])>1e-5)
958 SG_DEBUG(
"CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,
TreeMem[node].child_weights[q], other_node,q,other.TreeMem[other_node].child_weights[q])
959 SG_DEBUG(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n")
961 SG_DEBUG(
"============================================================\n")
962 other.display_node(other_node) ;
963 SG_DEBUG(
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n")
967 if (other.TreeMem[other_node].has_seq)
969 for (int32_t q=0; q<16; q++)
970 if ((
TreeMem[node].seq[q]!=other.TreeMem[other_node].seq[q]) && ((
TreeMem[node].seq[q]<4)||(other.TreeMem[other_node].seq[q]<4)))
972 SG_DEBUG(
"CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,
TreeMem[node].seq[q], other_node,q,other.TreeMem[other_node].seq[q])
973 SG_DEBUG(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n")
975 SG_DEBUG(
"============================================================\n")
976 other.display_node(other_node) ;
977 SG_DEBUG(
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n")
981 if (!other.TreeMem[other_node].has_seq && !other.TreeMem[other_node].has_floats)
983 for (int32_t q=0; q<4; q++)
985 if ((
TreeMem[node].children[q]==NO_CHILD) && (other.TreeMem[other_node].children[q]==NO_CHILD))
987 if ((
TreeMem[node].children[q]==NO_CHILD)!=(other.TreeMem[other_node].children[q]==NO_CHILD))
989 SG_DEBUG(
"CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,
TreeMem[node].children[q], other_node,q,other.TreeMem[other_node].children[q])
990 SG_DEBUG(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n")
992 SG_DEBUG(
"============================================================\n")
993 other.display_node(other_node) ;
994 SG_DEBUG(
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n")
1008 template <
class Trie>
1012 for (int32_t i=0; i<
length; i++)
1016 SG_DEBUG(
"two tries at %i identical\n", i)
1021 template <
class Trie>
1023 int32_t
node, int32_t * trace, int32_t& trace_len)
const 1025 #ifdef TRIE_CHECK_EVERYTHING 1028 if (
TreeMem[trace[trace_len-1]].has_seq)
1030 if (
TreeMem[trace[trace_len-1]].has_floats)
1033 for (int32_t q=0; q<4; q++)
1035 if (
TreeMem[trace[trace_len-1]].children[q]==NO_CHILD)
1037 int32_t tl=trace_len+1 ;
1038 if (
TreeMem[trace[trace_len-1]].children[q]>=0)
1039 trace[trace_len]=
TreeMem[trace[trace_len-1]].children[q] ;
1041 trace[trace_len]=-
TreeMem[trace[trace_len-1]].children[q] ;
1043 if (trace[trace_len]==node)
1062 template <
class Trie>
1065 #ifdef TRIE_CHECK_EVERYTHING 1066 int32_t * trace=SG_MALLOC(int32_t, 2*
degree);
1067 int32_t trace_len=-1 ;
1068 bool found = false ;
1070 for (tree=0; tree<
length; tree++)
1072 trace[0]=
trees[tree] ;
1074 found=
find_node(node, trace, trace_len) ;
1079 SG_PRINT(
"position %i trace: ", tree)
1081 for (int32_t i=0; i<trace_len-1; i++)
1084 for (int32_t q=0; q<4; q++)
1085 if (abs(
TreeMem[trace[i]].children[q])==trace[i+1])
1091 char acgt[5]=
"ACGT" ;
1097 for (int32_t q=0; q<4; q++)
1098 SG_PRINT(
"child_weighs[%i] = %f\n", q,
TreeMem[node].child_weights[q])
1102 for (int32_t q=0; q<16; q++)
1107 for (int32_t q=0; q<4; q++)
1109 if (
TreeMem[node].children[q]!=NO_CHILD)
1115 SG_PRINT(
"children[%i] = NO_CHILD -| \n", q,
TreeMem[node].children[q])
1139 for (int32_t i=0; i<
length; i++)
1140 trees[i] = NO_CHILD;
1156 int32_t len,
bool p_use_compact_terminal_nodes)
1160 trees=SG_MALLOC(int32_t, len);
1162 for (int32_t i=0; i<len; i++)
1171 bool p_use_compact_terminal_nodes)
1177 for (int32_t i=0; i<
length; i++)
1183 template <
class Trie>
1190 TRIE_ASSERT(tree>=0)
1196 for (int32_t k=0; k<4; k++)
1197 ret+=(
TreeMem[tree].child_weights[k]) ;
1204 for (int32_t i=0; i<4; i++)
1205 if (
TreeMem[tree].children[i]!=NO_CHILD)
1212 template <
class Trie>
1216 for (int32_t i=0; i<
length*4; i++)
1220 for (int32_t i=0; i<
length; i++)
1222 TRIE_ASSERT(
trees[i]!=NO_CHILD)
1223 for (int32_t k=0; k<4; k++)
1232 template <
class Trie>
1234 int32_t tree, int32_t i,
float64_t alpha,
1235 int32_t *vec, int32_t len_rem,
1236 int32_t degree_rec, int32_t mismatch_rec,
1237 int32_t max_mismatch,
float64_t * weights)
1241 TRIE_ASSERT(tree!=NO_CHILD)
1243 if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>
degree))
1245 const int32_t other[4][3] = { {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ;
1247 int32_t subtree = NO_CHILD ;
1249 if (degree_rec==
degree-1)
1251 TRIE_ASSERT_EVERYTHING(
TreeMem[tree].has_floats)
1253 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+
degree*mismatch_rec];
1255 if (weights[degree_rec]!=0.0)
1256 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+
degree*mismatch_rec]/weights[degree_rec];
1257 if (mismatch_rec+1<=max_mismatch)
1258 for (int32_t o=0; o<3; o++)
1261 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+
degree*(mismatch_rec+1)];
1263 if (weights[degree_rec]!=0.0)
1264 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+
degree*(mismatch_rec+1)]/weights[degree_rec];
1270 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_floats)
1271 if (
TreeMem[tree].children[vec[0]]!=NO_CHILD)
1273 subtree=
TreeMem[tree].children[vec[0]] ;
1275 TreeMem[subtree].weight += alpha*weights[degree_rec+
degree*mismatch_rec];
1277 if (weights[degree_rec]!=0.0)
1278 TreeMem[subtree].weight += alpha*weights[degree_rec+
degree*mismatch_rec]/weights[degree_rec];
1284 TreeMem[tree].children[vec[0]]=tmp ;
1286 #ifdef TRIE_CHECK_EVERYTHING 1287 if (degree_rec==
degree-2)
1288 TreeMem[subtree].has_floats=true ;
1291 TreeMem[subtree].weight = alpha*weights[degree_rec+
degree*mismatch_rec] ;
1293 if (weights[degree_rec]!=0.0)
1294 TreeMem[subtree].weight = alpha*weights[degree_rec+
degree*mismatch_rec]/weights[degree_rec] ;
1296 TreeMem[subtree].weight = 0.0 ;
1300 degree_rec+1, mismatch_rec, max_mismatch, weights) ;
1302 if (mismatch_rec+1<=max_mismatch)
1304 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_floats)
1305 for (int32_t o=0; o<3; o++)
1307 int32_t ot = other[vec[0]][o] ;
1308 if (
TreeMem[tree].children[ot]!=NO_CHILD)
1310 subtree=
TreeMem[tree].children[ot] ;
1312 TreeMem[subtree].weight += alpha*weights[degree_rec+
degree*(mismatch_rec+1)];
1314 if (weights[degree_rec]!=0.0)
1315 TreeMem[subtree].weight += alpha*weights[degree_rec+
degree*(mismatch_rec+1)]/weights[degree_rec];
1321 TreeMem[tree].children[ot]=tmp ;
1323 #ifdef TRIE_CHECK_EVERYTHING 1324 if (degree_rec==
degree-2)
1325 TreeMem[subtree].has_floats=true ;
1329 TreeMem[subtree].weight = alpha*weights[degree_rec+
degree*(mismatch_rec+1)] ;
1331 if (weights[degree_rec]!=0.0)
1332 TreeMem[subtree].weight = alpha*weights[degree_rec+
degree*(mismatch_rec+1)]/weights[degree_rec] ;
1334 TreeMem[subtree].weight = 0.0 ;
1339 degree_rec+1, mismatch_rec+1, max_mismatch, weights) ;
1345 template <
class Trie>
1347 int32_t tree, int32_t i, int32_t j,
float64_t weight, int32_t d,
1348 int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset,
1359 for (int32_t k=0; k<num_sym; k++)
1361 if (
TreeMem[tree].children[k]!=NO_CHILD)
1363 int32_t child=
TreeMem[tree].children[k];
1366 compute_scoring_helper(child, i, j+1, weight+decay*
TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
1368 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*
TreeMem[child].weight;
1372 compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result);
1378 for (int32_t k=0; k<num_sym; k++)
1381 if (d<max_degree-1 && i<num_feat-1)
1382 compute_scoring_helper(
trees[i+1], i+1, 0, weight+decay*
TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
1384 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*
TreeMem[tree].child_weights[k];
1390 template <
class Trie>
1392 int32_t tree,
const int32_t p,
struct TreeParseInfo info,
1393 const int32_t depth, int32_t*
const x,
const int32_t k)
1395 const int32_t num_sym = info.num_sym;
1396 const int32_t y0 = info.y0;
1397 const int32_t y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] );
1408 for( sym=0; sym<num_sym; ++sym ) {
1409 const int32_t childNum =
TreeMem[tree].children[ sym ];
1410 if( childNum != NO_CHILD ) {
1411 int32_t child = childNum ;
1413 info.substrs[depth+1] = y0 + sym;
1414 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
1417 traverse( child, p, info, depth+1, x, k );
1422 else if( depth ==
degree-1 )
1424 for( sym=0; sym<num_sym; ++sym ) {
1428 info.substrs[depth+1] = y0 + sym;
1429 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
1431 count( w, depth, info, p, x, k );
1440 template <
class Trie>
1442 const float64_t w,
const int32_t depth,
const struct TreeParseInfo info,
1443 const int32_t p, int32_t* x,
const int32_t k)
1452 const int32_t nofKmers = info.nofsKmers[k];
1453 const float64_t margWeight = w * info.margFactors[ depth-k ];
1454 const int32_t m_a = depth - k + 1;
1455 const int32_t m_b = info.num_feat - p;
1456 const int32_t m = ( m_a < m_b ) ? m_a : m_b;
1458 const int32_t offset0 = nofKmers * p;
1460 register int32_t offset;
1462 for( i = 0; i < m; ++i ) {
1463 const int32_t y = info.substrs[i+k+1];
1464 info.C_k[ y + offset ] += margWeight;
1469 const int32_t offsR = info.substrs[k+1] + offset0;
1470 info.R_k[offsR] += margWeight;
1472 if( p+depth-k < info.num_feat ) {
1473 const int32_t offsL = info.substrs[depth+1] + nofKmers * (p+depth-k);
1474 info.L_k[offsL] += margWeight;
1494 template <
class Trie>
1496 int32_t i, int32_t seq_offset, int32_t * vec,
float32_t alpha,
1497 float64_t *weights,
bool degree_times_position_weights)
1499 int32_t tree =
trees[i] ;
1502 int32_t max_depth = 0 ;
1504 if (degree_times_position_weights)
1505 weights_column = &weights[(i+seq_offset)*
degree] ;
1507 weights_column = weights ;
1519 for (int32_t j=0; (j<max_depth) && (i+j+seq_offset<
length); j++)
1521 TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4))
1522 if ((j<
degree-1) && (
TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD))
1524 if (
TreeMem[tree].children[vec[i+j+seq_offset]]<0)
1527 TRIE_ASSERT(j >=
degree-16)
1529 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_seq)
1530 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_floats)
1531 int32_t
node = -
TreeMem[tree].children[vec[i+j+seq_offset]] ;
1534 TRIE_ASSERT_EVERYTHING(
TreeMem[node].has_seq)
1535 TRIE_ASSERT_EVERYTHING(!
TreeMem[node].has_floats)
1538 int32_t mismatch_pos = -1 ;
1541 for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<
length); k++)
1543 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4))
1545 if ((
TreeMem[node].seq[k]>=4) && (
TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER))
1546 fprintf(stderr,
"+++i=%i j=%i seq[%i]=%i\n", i, j, k,
TreeMem[node].seq[k]) ;
1547 TRIE_ASSERT((
TreeMem[node].seq[k]<4) || (
TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER))
1549 if (
TreeMem[node].seq[k]!=vec[i+j+seq_offset+k])
1559 if (mismatch_pos==-1)
1569 int32_t last_node=tree ;
1573 for (k=0; k<mismatch_pos; k++)
1575 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4))
1576 TRIE_ASSERT(vec[i+j+seq_offset+k]==
TreeMem[node].seq[k])
1579 TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ;
1582 TreeMem[last_node].weight = (
TreeMem[node].weight+alpha)*weights_column[j+k] ;
1585 TRIE_ASSERT(j+k!=
degree-1)
1587 if ((
TreeMem[node].seq[mismatch_pos]>=4) && (
TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER))
1588 fprintf(stderr,
"**i=%i j=%i seq[%i]=%i\n", i, j, k,
TreeMem[node].seq[mismatch_pos]) ;
1589 ASSERT((
TreeMem[node].seq[mismatch_pos]<4) || (
TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER))
1590 TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=
TreeMem[node].seq[mismatch_pos])
1597 for (int32_t q=0; q<4; q++)
1598 TreeMem[last_node].child_weights[q]=0.0 ;
1601 if (
TreeMem[node].seq[mismatch_pos]<4)
1603 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[
degree-1] ;
1607 if (
TreeMem[node].seq[mismatch_pos]<4)
1609 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ;
1612 #ifdef TRIE_CHECK_EVERYTHING 1613 TreeMem[last_node].has_floats=true ;
1619 if (
TreeMem[node].seq[mismatch_pos]<4)
1621 TreeMem[last_node].children[
TreeMem[node].seq[mismatch_pos]] = -node ;
1624 for (int32_t q=0; q<16; q++)
1626 if ((j+q+mismatch_pos<
degree) && (i+j+seq_offset+q+mismatch_pos<
length))
1629 TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ;
1631 #ifdef TRIE_CHECK_EVERYTHING 1637 TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4))
1639 TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ;
1642 TreeMem[last_node].weight = alpha ;
1643 #ifdef TRIE_CHECK_EVERYTHING 1644 TreeMem[last_node].has_seq = true ;
1646 memset(
TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
1647 for (int32_t q=0; (j+q+mismatch_pos<
degree) && (i+j+seq_offset+q+mismatch_pos<
length); q++)
1648 TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ;
1655 tree=
TreeMem[tree].children[vec[i+j+seq_offset]] ;
1657 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_seq)
1659 TreeMem[tree].weight += alpha*weights_column[j];
1661 TreeMem[tree].weight += alpha ;
1667 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_seq)
1668 TRIE_ASSERT_EVERYTHING(
TreeMem[tree].has_floats)
1670 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ;
1672 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha;
1679 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_seq)
1680 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_floats)
1684 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ;
1686 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ;
1690 #ifdef TRIE_CHECK_EVERYTHING 1691 TreeMem[tree].has_seq = use_seq ;
1695 TreeMem[tree].weight = alpha ;
1697 memset(
TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ;
1698 for (int32_t q=0; (j+q<
degree) && (i+j+seq_offset+q<
length); q++)
1701 TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ;
1708 TreeMem[tree].weight = alpha*weights_column[j] ;
1710 TreeMem[tree].weight = alpha ;
1711 #ifdef TRIE_CHECK_EVERYTHING 1713 TreeMem[tree].has_floats = true ;
1720 template <
class Trie>
1722 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
1724 bool degree_times_position_weights)
1726 int32_t tree =
trees[tree_pos] ;
1732 if (degree_times_position_weights)
1733 weights_column=&weights[weight_pos*
degree] ;
1735 weights_column=weights ;
1738 for (int32_t j=0; seq_pos+j < len; j++)
1740 TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0))
1742 if ((j<
degree-1) && (
TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
1744 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_floats)
1745 if (
TreeMem[tree].children[vec[seq_pos+j]]<0)
1747 tree = -
TreeMem[tree].children[vec[seq_pos+j]];
1748 TRIE_ASSERT(tree>=0)
1749 TRIE_ASSERT_EVERYTHING(
TreeMem[tree].has_seq)
1751 for (int32_t k=0; (j+k<
degree) && (seq_pos+j+k<
length); k++)
1753 TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0))
1754 if (
TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
1756 this_weight += weights_column[j+k] ;
1758 sum +=
TreeMem[tree].weight * this_weight ;
1763 tree=
TreeMem[tree].children[vec[seq_pos+j]];
1764 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_seq)
1768 sum +=
TreeMem[tree].weight * weights_column[j] ;
1773 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_seq)
1776 TRIE_ASSERT_EVERYTHING(
TreeMem[tree].has_floats)
1778 sum +=
TreeMem[tree].child_weights[vec[seq_pos+j]] ;
1780 sum +=
TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ;
1783 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_floats)
1795 template <
class Trie>
1797 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
1799 int32_t mkl_stepsize,
float64_t * weights,
1800 bool degree_times_position_weights)
1802 int32_t tree =
trees[tree_pos] ;
1811 if (!degree_times_position_weights)
1813 for (int32_t j=0; seq_pos+j<len; j++)
1815 if ((j<
degree-1) && (
TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
1817 if (
TreeMem[tree].children[vec[seq_pos+j]]<0)
1819 tree = -
TreeMem[tree].children[vec[seq_pos+j]];
1820 TRIE_ASSERT_EVERYTHING(
TreeMem[tree].has_seq)
1821 for (int32_t k=0; (j+k<
degree) && (seq_pos+j+k<
length); k++)
1823 if (
TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
1826 LevelContrib[weight_pos/mkl_stepsize] += factor*
TreeMem[tree].weight ;
1828 LevelContrib[weight_pos/mkl_stepsize] += factor*
TreeMem[tree].weight*weights[j+k] ;
1834 tree=
TreeMem[tree].children[vec[seq_pos+j]];
1835 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_seq)
1837 LevelContrib[weight_pos/mkl_stepsize] += factor*
TreeMem[tree].weight ;
1839 LevelContrib[weight_pos/mkl_stepsize] += factor*
TreeMem[tree].weight*weights[j] ;
1844 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_seq)
1848 LevelContrib[weight_pos/mkl_stepsize] += factor*
TreeMem[tree].child_weights[vec[seq_pos+j]] ;
1850 LevelContrib[weight_pos/mkl_stepsize] += factor*
TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
1857 for (int32_t j=0; seq_pos+j<len; j++)
1859 if ((j<
degree-1) && (
TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
1861 if (
TreeMem[tree].children[vec[seq_pos+j]]<0)
1863 tree = -
TreeMem[tree].children[vec[seq_pos+j]];
1864 TRIE_ASSERT_EVERYTHING(
TreeMem[tree].has_seq)
1865 for (int32_t k=0; (j+k<
degree) && (seq_pos+j+k<
length); k++)
1867 if (
TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
1870 LevelContrib[weight_pos/mkl_stepsize] += factor*
TreeMem[tree].weight ;
1872 LevelContrib[weight_pos/mkl_stepsize] += factor*
TreeMem[tree].weight*weights[j+k+weight_pos*
degree] ;
1878 tree=
TreeMem[tree].children[vec[seq_pos+j]];
1879 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_seq)
1881 LevelContrib[weight_pos/mkl_stepsize] += factor*
TreeMem[tree].weight ;
1883 LevelContrib[weight_pos/mkl_stepsize] += factor*
TreeMem[tree].weight*weights[j+weight_pos*
degree] ;
1888 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_seq)
1892 LevelContrib[weight_pos/mkl_stepsize] += factor*
TreeMem[tree].child_weights[vec[seq_pos+j]] ;
1894 LevelContrib[weight_pos/mkl_stepsize] += factor*
TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*
degree] ;
1901 else if (!degree_times_position_weights)
1903 for (int32_t j=0; seq_pos+j<len; j++)
1905 if ((j<
degree-1) && (
TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
1907 if (
TreeMem[tree].children[vec[seq_pos+j]]<0)
1909 tree = -
TreeMem[tree].children[vec[seq_pos+j]];
1910 TRIE_ASSERT_EVERYTHING(
TreeMem[tree].has_seq)
1911 for (int32_t k=0; (j+k<
degree) && (seq_pos+j+k<
length); k++)
1913 if (
TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
1916 LevelContrib[(j+k)/mkl_stepsize] += factor*
TreeMem[tree].weight ;
1918 LevelContrib[(j+k)/mkl_stepsize] += factor*
TreeMem[tree].weight*weights[j+k] ;
1924 tree=
TreeMem[tree].children[vec[seq_pos+j]];
1925 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_seq)
1927 LevelContrib[j/mkl_stepsize] += factor*
TreeMem[tree].weight ;
1929 LevelContrib[j/mkl_stepsize] += factor*
TreeMem[tree].weight*weights[j] ;
1934 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_seq)
1938 LevelContrib[j/mkl_stepsize] += factor*
TreeMem[tree].child_weights[vec[seq_pos+j]] ;
1940 LevelContrib[j/mkl_stepsize] += factor*
TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
1966 for (int32_t j=0; seq_pos+j<len; j++)
1968 if ((j<
degree-1) && (
TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
1970 if (
TreeMem[tree].children[vec[seq_pos+j]]<0)
1972 tree = -
TreeMem[tree].children[vec[seq_pos+j]];
1973 TRIE_ASSERT_EVERYTHING(
TreeMem[tree].has_seq)
1974 for (int32_t k=0; (j+k<
degree) && (seq_pos+j+k<
length); k++)
1976 if (
TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
1979 LevelContrib[(j+k+
degree*weight_pos)/mkl_stepsize] += factor*
TreeMem[tree].weight ;
1981 LevelContrib[(j+k+
degree*weight_pos)/mkl_stepsize] += factor*
TreeMem[tree].weight*weights[j+k+weight_pos*
degree] ;
1987 tree=
TreeMem[tree].children[vec[seq_pos+j]];
1988 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_seq)
1990 LevelContrib[(j+
degree*weight_pos)/mkl_stepsize] += factor *
TreeMem[tree].weight ;
1992 LevelContrib[(j+
degree*weight_pos)/mkl_stepsize] += factor *
TreeMem[tree].weight * weights[j+weight_pos*
degree] ;
1997 TRIE_ASSERT_EVERYTHING(!
TreeMem[tree].has_seq)
2001 LevelContrib[(j+
degree*weight_pos)/mkl_stepsize] += factor *
TreeMem[tree].child_weights[vec[seq_pos+j]] ;
2003 LevelContrib[(j+
degree*weight_pos)/mkl_stepsize] += factor *
TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*
degree] ;
2011 template <
class Trie>
2013 Trie* tree, int32_t depth, uint64_t seq,
float64_t value,
2019 value+=tree->weight;
2023 value+=weights[depth-1]*tree->weight;
2028 for (int32_t sym=0; sym<4; sym++)
2033 ConsensusEntry entry;
2035 entry.score=value+v;
2036 entry.string=seq | ((uint64_t) sym) << (2*(
degree-depth-1));
2044 for (int32_t sym=0; sym<4; sym++)
2046 uint64_t str=seq | ((uint64_t) sym) << (2*(
degree-depth-1));
2047 if (tree->children[sym] != NO_CHILD)
2053 template <
class Trie>
2055 int32_t pos, uint64_t seq, int32_t deg,
float64_t* weights)
2061 for (int32_t i=pos; i<pos+deg && i<
length; i++)
2066 for (int32_t d=0; d<deg-i+pos; d++)
2070 int32_t sym = (int32_t) (seq >> (2*(deg-1-d-i+pos)) & 3);
2076 ASSERT(tree->children[sym] != NO_CHILD)
2077 tree=&
TreeMem[tree->children[sym]];
2078 result+=w*tree->weight;
2085 template <
class Trie>
2101 for (int32_t i=0; i<num_cur; i++)
2117 for (int32_t i=0; i<num_cur; i++)
2120 uint64_t str_cur= cur->
get_element(i).string >> 2;
2126 for (int32_t j=0; j<num_prev; j++)
2130 ((((uint64_t)0)-1) ^ (((uint64_t) 3) << (2*(
degree-1))));
2131 uint64_t str_prev= mask & prev->
get_element(j).string;
2134 if (str_cur == str_prev)
2137 if (bt==-1 || sc>max_score)
2148 ConsensusEntry entry;
2150 entry.score=max_score;
2158 #endif // _TRIE_H___
void add_example_to_tree_mismatch_recursion(int32_t tree, int32_t i, float64_t alpha, int32_t *vec, int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec, int32_t max_mismatch, float64_t *weights)
T get_element(int32_t index) const
void set_degree(int32_t d)
float compare(const d_node< P > *p1, const d_node< P > *p2)
float64_t * compute_abs_weights(int32_t &len)
void display_node(int32_t node) const
bool append_element(T element)
virtual const char * get_name() const
void fill_backtracking_table(int32_t pos, DynArray< ConsensusEntry > *prev, DynArray< ConsensusEntry > *cur, bool cumulative, float64_t *weights)
bool compare_traverse(int32_t node, const CTrie &other, int32_t other_node)
float64_t * position_weights
float64_t compute_by_tree_helper(int32_t *vec, int32_t len, int32_t seq_pos, int32_t tree_pos, int32_t weight_pos, float64_t *weights, bool degree_times_position_weights)
void set_use_compact_terminal_nodes(bool p_use_compact_terminal_nodes)
Template class Trie implements a suffix trie, i.e. a tree in which all suffixes up to a certain lengt...
void fill_backtracking_table_recursion(Trie *tree, int32_t depth, uint64_t seq, float64_t value, DynArray< ConsensusEntry > *table, float64_t *weights)
int32_t get_num_elements() const
void add_to_trie(int32_t i, int32_t seq_offset, int32_t *vec, float32_t alpha, float64_t *weights, bool degree_times_position_weights)
void create(int32_t len, bool p_use_compact_terminal_nodes=true)
void compute_scoring_helper(int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d, int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset, int32_t offs, float64_t *result)
bool get_use_compact_terminal_nodes()
void traverse(int32_t tree, const int32_t p, struct TreeParseInfo info, const int32_t depth, int32_t *const x, const int32_t k)
int32_t get_num_used_nodes()
Class SGObject is the base class of all shogun objects.
void set_weights_in_tree(bool weights_in_tree_)
#define IGNORE_IN_CLASSLIST
Template Dynamic array class that creates an array that can be used like a list or an array...
void count(const float64_t w, const int32_t depth, const struct TreeParseInfo info, const int32_t p, int32_t *x, const int32_t k)
int32_t compact_nodes(int32_t start_node, int32_t depth, float64_t *weights)
bool set_element(T element, int32_t index)
void delete_trees(bool p_use_compact_terminal_nodes=true)
int32_t get_node(bool last_node=false)
const CTrie & operator=(const CTrie &to_copy)
bool use_compact_terminal_nodes
bool compare(const CTrie &other)
all of classes and functions are contained in the shogun namespace
int32_t find_deepest_node(int32_t start_node, int32_t &deepest_node) const
bool find_node(int32_t node, int32_t *trace, int32_t &trace_len) const
bool get_weights_in_tree()
float64_t get_cumulative_score(int32_t pos, uint64_t seq, int32_t deg, float64_t *weights)
void set_position_weights(float64_t *p_position_weights)
static T abs(T a)
return the absolute value of a number
float64_t compute_abs_weights_tree(int32_t tree, int32_t depth)