25 #define VAL_MACRO log((default_value == 0) ? (CMath::random(MIN_RAND, MAX_RAND)) : default_value)
26 #define ARRAY_SIZE 65336
75 const char Model::FIX_DISALLOWED=0 ;
76 const char Model::FIX_ALLOWED=1 ;
77 const char Model::FIX_DEFAULT=-1 ;
116 fix_pos_state[i] = FIX_DEFAULT ;
138 SG_FREE(fix_pos_state);
150 trans_list_forward_cnt=NULL;
151 trans_list_backward_cnt=NULL;
152 trans_list_forward=NULL;
153 trans_list_backward=NULL;
154 trans_list_forward_val=NULL;
166 #ifdef USE_LOGSUMARRAY
169 #ifdef USE_HMMPARALLEL_STRUCTURES
181 mem_initialized =
false;
187 #ifdef USE_HMMPARALLEL_STRUCTURES
205 #ifdef USE_HMMPARALLEL_STRUCTURES
221 #ifdef USE_HMMPARALLEL_STRUCTURES
236 trans_list_forward = NULL ;
237 trans_list_forward_cnt = NULL ;
238 trans_list_forward_val = NULL ;
239 trans_list_backward = NULL ;
240 trans_list_backward_cnt = NULL ;
242 mem_initialized = false ;
251 #ifdef USE_HMMPARALLEL_STRUCTURES
267 mem_initialized = true ;
289 trans_list_forward = NULL ;
290 trans_list_forward_cnt = NULL ;
291 trans_list_forward_val = NULL ;
292 trans_list_backward = NULL ;
293 trans_list_backward_cnt = NULL ;
295 mem_initialized = false ;
304 #ifdef USE_HMMPARALLEL_STRUCTURES
320 mem_initialized = true ;
322 trans_list_forward_cnt=NULL ;
324 trans_list_forward = SG_MALLOC(
T_STATES*,
N);
325 trans_list_forward_val = SG_MALLOC(
float64_t*,
N);
326 trans_list_forward_cnt = SG_MALLOC(
T_STATES,
N);
329 for (int32_t j=0; j<
N; j++)
331 int32_t old_start_idx=start_idx;
333 while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
337 if (start_idx>1 && start_idx<num_trans)
338 ASSERT(a_trans[start_idx+num_trans-1]<=
339 a_trans[start_idx+num_trans]);
342 if (start_idx>1 && start_idx<num_trans)
343 ASSERT(a_trans[start_idx+num_trans-1]<=
344 a_trans[start_idx+num_trans]);
346 int32_t len=start_idx-old_start_idx;
349 trans_list_forward_cnt[j] = 0 ;
353 trans_list_forward[j] = SG_MALLOC(
T_STATES, len);
354 trans_list_forward_val[j] = SG_MALLOC(
float64_t, len);
358 trans_list_forward[j] = NULL;
359 trans_list_forward_val[j] = NULL;
363 for (int32_t i=0; i<num_trans; i++)
365 int32_t from = (int32_t)a_trans[i+num_trans] ;
366 int32_t to = (int32_t)a_trans[i] ;
372 trans_list_forward[from][trans_list_forward_cnt[from]]=to ;
373 trans_list_forward_val[from][trans_list_forward_cnt[from]]=val ;
374 trans_list_forward_cnt[from]++ ;
392 #ifdef USE_HMMPARALLEL_STRUCTURES
403 if (trans_list_forward_cnt)
404 SG_FREE(trans_list_forward_cnt);
405 if (trans_list_backward_cnt)
406 SG_FREE(trans_list_backward_cnt);
407 if (trans_list_forward)
409 for (int32_t i=0; i<trans_list_len; i++)
410 if (trans_list_forward[i])
411 SG_FREE(trans_list_forward[i]);
412 SG_FREE(trans_list_forward);
414 if (trans_list_forward_val)
416 for (int32_t i=0; i<trans_list_len; i++)
417 if (trans_list_forward_val[i])
418 SG_FREE(trans_list_forward_val[i]);
419 SG_FREE(trans_list_forward_val);
421 if (trans_list_backward)
423 for (int32_t i=0; i<trans_list_len; i++)
424 if (trans_list_backward[i])
425 SG_FREE(trans_list_backward[i]);
426 SG_FREE(trans_list_backward);
433 #ifdef USE_HMMPARALLEL_STRUCTURES
448 #else // USE_HMMPARALLEL_STRUCTURES
453 #endif // USE_HMMPARALLEL_STRUCTURES
459 #ifdef USE_LOGSUMARRAY
460 #ifdef USE_HMMPARALLEL_STRUCTURES
469 #else //USE_HMMPARALLEL_STRUCTURES
471 #endif //USE_HMMPARALLEL_STRUCTURES
472 #endif //USE_LOGSUMARRAY
476 #ifdef USE_HMMPARALLEL_STRUCTURES
484 #endif //USE_HMMPARALLEL_STRUCTURES
496 SG_ERROR(
"Expected features of class string type word\n")
517 #ifdef USE_HMMPARALLEL_STRUCTURES
523 #else //USE_HMMPARALLEL_STRUCTURES
526 #endif //USE_HMMPARALLEL_STRUCTURES
529 #ifdef USE_HMMPARALLEL_STRUCTURES
531 arrayS[i]=SG_MALLOC(
float64_t, (int32_t)(this->
N/2+1));
532 #else //USE_HMMPARALLEL_STRUCTURES
533 arrayS=SG_MALLOC(
float64_t, (int32_t)(this->
N/2+1));
534 #endif //USE_HMMPARALLEL_STRUCTURES
535 #endif //LOG_SUMARRAY
541 #ifdef USE_HMMPARALLEL_STRUCTURES
543 #else //USE_HMMPARALLEL_STRUCTURES
545 #endif //USE_HMMPARALLEL_STRUCTURES
562 #ifdef USE_HMMPARALLEL_STRUCTURES
603 trans_list_forward = NULL ;
604 trans_list_forward_cnt = NULL ;
605 trans_list_forward_val = NULL ;
606 trans_list_backward = NULL ;
607 trans_list_backward_cnt = NULL ;
609 mem_initialized = false ;
620 #ifdef USE_HMMPARALLEL_STRUCTURES
628 this->beta_cache[i].table=NULL;
630 this->beta_cache[i].dimension=0;
631 this->states_per_observation_psi[i]=NULL ;
634 #else // USE_HMMPARALLEL_STRUCTURES
636 this->beta_cache.table=NULL;
638 this->beta_cache.dimension=0;
639 this->states_per_observation_psi=NULL ;
640 #endif //USE_HMMPARALLEL_STRUCTURES
645 #ifdef USE_HMMPARALLEL_STRUCTURES
654 #else // USE_HMMPARALLEL_STRUCTURES
657 #endif //USE_HMMPARALLEL_STRUCTURES
659 #ifdef USE_HMMPARALLEL_STRUCTURES
662 #endif //USE_HMMPARALLEL_STRUCTURES
665 #ifdef USE_HMMPARALLEL_STRUCTURES
667 #endif // USE_HMMPARALLEL_STRUCTURES
668 #endif //LOG_SUMARRAY
673 mem_initialized = true ;
676 return ((files_ok) &&
696 int32_t wanted_time=time;
698 if (ALPHA_CACHE(dimension).table)
700 alpha=&ALPHA_CACHE(dimension).table[0];
701 alpha_new=&ALPHA_CACHE(dimension).table[
N];
715 for (int32_t i=0; i<
N; i++)
722 for (int32_t j=0; j<
N; j++)
724 register int32_t i, num = trans_list_forward_cnt[j] ;
726 for (i=0; i < num; i++)
728 int32_t ii = trans_list_forward[j][i] ;
735 if (!ALPHA_CACHE(dimension).table)
749 if (time<p_observations->get_vector_length(dimension))
751 register int32_t i, num=trans_list_forward_cnt[state];
753 for (i=0; i<num; i++)
755 int32_t ii = trans_list_forward[state][i] ;
770 if (!ALPHA_CACHE(dimension).table)
774 ALPHA_CACHE(dimension).dimension=dimension;
775 ALPHA_CACHE(dimension).updated=
true;
776 ALPHA_CACHE(dimension).sum=sum;
778 if (wanted_time<p_observations->get_vector_length(dimension))
779 return ALPHA_CACHE(dimension).table[wanted_time*N+state];
781 return ALPHA_CACHE(dimension).sum;
799 int32_t wanted_time=time;
801 if (ALPHA_CACHE(dimension).table)
803 alpha=&ALPHA_CACHE(dimension).table[0];
804 alpha_new=&ALPHA_CACHE(dimension).table[
N];
818 for (int32_t i=0; i<
N; i++)
825 for (int32_t j=0; j<
N; j++)
828 #ifdef USE_LOGSUMARRAY
829 for (i=0; i<(N>>1); i++)
831 alpha[(i<<1)+1] +
get_a((i<<1)+1,j));
835 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
838 #else //USE_LOGSUMARRAY
844 #endif //USE_LOGSUMARRAY
847 if (!ALPHA_CACHE(dimension).table)
861 if (time<p_observations->get_vector_length(dimension))
864 #ifdef USE_LOGSUMARRAY
865 for (i=0; i<(N>>1); i++)
867 alpha[(i<<1)+1] +
get_a((i<<1)+1,state));
871 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
874 #else //USE_LOGSUMARRAY
880 #endif //USE_LOGSUMARRAY
887 #ifdef USE_LOGSUMARRAY
888 for (i=0; i<(N>>1); i++)
890 alpha[(i<<1)+1] +
get_q((i<<1)+1));
893 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
895 sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
896 #else //USE_LOGSUMARRAY
900 #endif //USE_LOGSUMARRAY
902 if (!ALPHA_CACHE(dimension).table)
906 ALPHA_CACHE(dimension).dimension=dimension;
907 ALPHA_CACHE(dimension).updated=
true;
908 ALPHA_CACHE(dimension).sum=sum;
910 if (wanted_time<p_observations->get_vector_length(dimension))
911 return ALPHA_CACHE(dimension).table[wanted_time*N+state];
913 return ALPHA_CACHE(dimension).sum;
928 int32_t wanted_time=time;
931 forward(time, state, dimension);
933 if (BETA_CACHE(dimension).table)
952 for (
register int32_t i=0; i<
N; i++)
958 for (
register int32_t i=0; i<
N; i++)
960 register int32_t j, num=trans_list_backward_cnt[i] ;
962 for (j=0; j<num; j++)
964 int32_t jj = trans_list_backward[i][j] ;
970 if (!BETA_CACHE(dimension).table)
985 register int32_t j, num=trans_list_backward_cnt[state] ;
987 for (j=0; j<num; j++)
989 int32_t jj = trans_list_backward[state][j] ;
996 if (BETA_CACHE(dimension).table)
999 for (
register int32_t j=0; j<
N; j++)
1001 BETA_CACHE(dimension).sum=sum;
1002 BETA_CACHE(dimension).dimension=dimension;
1003 BETA_CACHE(dimension).updated=
true;
1005 if (wanted_time<p_observations->get_vector_length(dimension))
1006 return BETA_CACHE(dimension).table[wanted_time*N+state];
1008 return BETA_CACHE(dimension).sum;
1013 for (
register int32_t j=0; j<
N; j++)
1023 int32_t time, int32_t state, int32_t dimension)
1028 int32_t wanted_time=time;
1031 forward(time, state, dimension);
1033 if (BETA_CACHE(dimension).table)
1048 return get_q(state);
1052 for (
register int32_t i=0; i<
N; i++)
1058 for (
register int32_t i=0; i<
N; i++)
1060 register int32_t j ;
1061 #ifdef USE_LOGSUMARRAY
1062 for (j=0; j<(N>>1); j++)
1068 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
1070 beta_new[i]=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
1071 #else //USE_LOGSUMARRAY
1077 #endif //USE_LOGSUMARRAY
1080 if (!BETA_CACHE(dimension).table)
1095 register int32_t j ;
1096 #ifdef USE_LOGSUMARRAY
1097 for (j=0; j<(N>>1); j++)
1103 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
1105 return CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
1106 #else //USE_LOGSUMARRAY
1112 #endif //USE_LOGSUMARRAY
1116 if (BETA_CACHE(dimension).table)
1118 #ifdef USE_LOGSUMARRAY//AAA
1119 for (int32_t j=0; j<(N>>1); j++)
1124 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
1126 BETA_CACHE(dimension).sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
1127 #else //USE_LOGSUMARRAY
1129 for (
register int32_t j=0; j<
N; j++)
1131 BETA_CACHE(dimension).sum=sum;
1132 #endif //USE_LOGSUMARRAY
1133 BETA_CACHE(dimension).dimension=dimension;
1134 BETA_CACHE(dimension).updated=
true;
1136 if (wanted_time<p_observations->get_vector_length(dimension))
1137 return BETA_CACHE(dimension).table[wanted_time*N+state];
1139 return BETA_CACHE(dimension).sum;
1144 for (
register int32_t j=0; j<
N; j++)
1163 SG_INFO(
"computing full viterbi likelihood\n")
1175 if (!STATES_PER_OBSERVATION_PSI(dimension))
1181 if (PATH_PROB_UPDATED(dimension) && dimension==PATH_PROB_DIMENSION(dimension))
1186 register float64_t* delta_new= ARRAYN1(dimension);
1189 for (
register int32_t i=0; i<
N; i++)
1196 #ifdef USE_PATHDEBUG
1203 register int32_t NN=
N ;
1204 for (
register int32_t j=0; j<NN; j++)
1207 register float64_t maxj=delta[0] + matrix_a[0];
1208 register int32_t argmax=0;
1210 for (
register int32_t i=1; i<NN; i++)
1212 register float64_t temp = delta[i] + matrix_a[i];
1221 if ((!
model) || (
model->get_fix_pos_state(t,j,NN)!=Model::FIX_DISALLOWED))
1228 set_psi(t, j, argmax, dimension);
1231 #ifdef USE_PATHDEBUG
1233 for (int32_t jj=0; jj<
N; jj++)
1234 if (delta_new[jj]>best)
1235 best=delta_new[jj] ;
1239 SG_DEBUG(
"worst case at %i: %e:%e\n", t, best, worst)
1252 register int32_t argmax=0;
1254 for (
register int32_t i=1; i<
N; i++)
1272 PATH(dimension)[t-1]=
get_psi(t, PATH(dimension)[t], dimension);
1275 PATH_PROB_UPDATED(dimension)=
true;
1276 PATH_PROB_DIMENSION(dimension)=dimension;
1281 #ifndef USE_HMMPARALLEL
1300 SG_INFO(
"computing full model probablity\n")
1303 for (int32_t cpu=0; cpu<
parallel->get_num_threads(); cpu++)
1305 params[cpu].hmm=this ;
1312 pthread_create(&threads[cpu], NULL, bw_dim_prefetch, (
void*)¶ms[cpu]);
1317 pthread_join(threads[cpu], NULL);
1323 SG_FREE(params[i].p_buf);
1324 SG_FREE(params[i].q_buf);
1325 SG_FREE(params[i].a_buf);
1326 SG_FREE(params[i].b_buf);
1336 void* CHMM::bw_dim_prefetch(
void* params)
1338 CHMM* hmm=((S_BW_THREAD_PARAM*) params)->hmm;
1339 int32_t start=((S_BW_THREAD_PARAM*) params)->dim_start;
1340 int32_t stop=((S_BW_THREAD_PARAM*) params)->dim_stop;
1341 float64_t* p_buf=((S_BW_THREAD_PARAM*) params)->p_buf;
1342 float64_t* q_buf=((S_BW_THREAD_PARAM*) params)->q_buf;
1343 float64_t* a_buf=((S_BW_THREAD_PARAM*) params)->a_buf;
1344 float64_t* b_buf=((S_BW_THREAD_PARAM*) params)->b_buf;
1345 ((S_BW_THREAD_PARAM*)params)->ret=0;
1347 for (int32_t dim=start; dim<stop; dim++)
1352 hmm->ab_buf_comp(p_buf, q_buf, a_buf, b_buf, dim) ;
1353 ((S_BW_THREAD_PARAM*)params)->ret+= modprob;
1358 void* CHMM::bw_single_dim_prefetch(
void * params)
1360 CHMM* hmm=((S_BW_THREAD_PARAM*)params)->hmm ;
1361 int32_t dim=((S_DIM_THREAD_PARAM*)params)->dim ;
1366 void* CHMM::vit_dim_prefetch(
void * params)
1368 CHMM* hmm=((S_DIM_THREAD_PARAM*)params)->hmm ;
1369 int32_t dim=((S_DIM_THREAD_PARAM*)params)->dim ;
1370 ((S_DIM_THREAD_PARAM*)params)->prob_sum = hmm->
best_path(dim);
1374 #endif //USE_HMMPARALLEL
1377 #ifdef USE_HMMPARALLEL
1379 void CHMM::ab_buf_comp(
1405 a_buf[N*i+j]=a_sum-dimmodprob ;
1419 b_buf[M*i+j]=b_sum-dimmodprob ;
1457 pthread_t *threads=SG_MALLOC(pthread_t, num_threads);
1458 S_BW_THREAD_PARAM *params=SG_MALLOC(S_BW_THREAD_PARAM, num_threads);
1463 for (cpu=0; cpu<num_threads; cpu++)
1465 params[cpu].p_buf=SG_MALLOC(
float64_t, N);
1466 params[cpu].q_buf=SG_MALLOC(
float64_t, N);
1467 params[cpu].a_buf=SG_MALLOC(
float64_t, N*N);
1468 params[cpu].b_buf=SG_MALLOC(
float64_t, N*M);
1470 params[cpu].hmm=hmm;
1478 params[cpu].dim_start=start;
1479 params[cpu].dim_stop=stop;
1481 pthread_create(&threads[cpu], NULL, bw_dim_prefetch, ¶ms[cpu]);
1484 for (cpu=0; cpu<num_threads; cpu++)
1486 pthread_join(threads[cpu], NULL);
1503 fullmodprob+=params[cpu].ret;
1507 for (cpu=0; cpu<num_threads; cpu++)
1509 SG_FREE(params[cpu].p_buf);
1510 SG_FREE(params[cpu].q_buf);
1511 SG_FREE(params[cpu].a_buf);
1512 SG_FREE(params[cpu].b_buf);
1527 #else // USE_HMMPARALLEL
1566 fullmodprob+=dimmodprob ;
1574 int32_t num = trans_list_backward_cnt[i] ;
1577 for (j=0; j<num; j++)
1579 int32_t jj = trans_list_backward[i][j] ;
1652 fullmodprob+=dimmodprob ;
1697 #endif // USE_HMMPARALLEL
1734 fullmodprob+=dimmodprob ;
1742 int32_t num = trans_list_backward_cnt[i] ;
1744 for (j=0; j<num; j++)
1746 int32_t jj = trans_list_backward[i][j] ;
1773 int32_t i,j,old_i,k,t,dim;
1807 #ifdef USE_HMMPARALLEL
1809 pthread_t *threads=SG_MALLOC(pthread_t, num_threads);
1810 S_DIM_THREAD_PARAM *params=SG_MALLOC(S_DIM_THREAD_PARAM, num_threads);
1819 #ifdef USE_HMMPARALLEL
1820 if (dim%num_threads==0)
1822 for (i=0; i<num_threads; i++)
1824 if (dim+i<p_observations->get_num_vectors())
1826 params[i].hmm=estimate ;
1827 params[i].dim=dim+i ;
1828 pthread_create(&threads[i], NULL, bw_single_dim_prefetch, (
void*)¶ms[i]) ;
1831 for (i=0; i<num_threads; i++)
1833 if (dim+i<p_observations->get_num_vectors())
1835 pthread_join(threads[i], NULL);
1836 dimmodprob = params[i].prob_sum;
1842 #endif // USE_HMMPARALLEL
1845 fullmodprob+= dimmodprob;
1912 #ifdef USE_HMMPARALLEL
1971 #ifdef USE_HMMPARALLEL
1973 pthread_t *threads=SG_MALLOC(pthread_t, num_threads);
1974 S_DIM_THREAD_PARAM *params=SG_MALLOC(S_DIM_THREAD_PARAM, num_threads);
1983 #ifdef USE_HMMPARALLEL
1984 if (dim%num_threads==0)
1986 for (i=0; i<num_threads; i++)
1988 if (dim+i<p_observations->get_num_vectors())
1990 params[i].hmm=estimate ;
1991 params[i].dim=dim+i ;
1992 pthread_create(&threads[i], NULL, vit_dim_prefetch, (
void*)¶ms[i]) ;
1995 for (i=0; i<num_threads; i++)
1997 if (dim+i<p_observations->get_num_vectors())
1999 pthread_join(threads[i], NULL);
2000 allpatprob += params[i].prob_sum;
2007 #endif // USE_HMMPARALLEL
2012 set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1],
get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
2018 P[estimate->PATH(dim)[0]]++;
2022 #ifdef USE_HMMPARALLEL
2059 set_p(i, log(P[i]/sum));
2067 set_q(i, log(Q[i]/sum));
2096 #ifdef USE_HMMPARALLEL
2098 pthread_t *threads=SG_MALLOC(pthread_t, num_threads);
2099 S_DIM_THREAD_PARAM *params=SG_MALLOC(S_DIM_THREAD_PARAM, num_threads);
2106 #ifdef USE_HMMPARALLEL
2107 if (dim%num_threads==0)
2109 for (i=0; i<num_threads; i++)
2111 if (dim+i<p_observations->get_num_vectors())
2113 params[i].hmm=estimate ;
2114 params[i].dim=dim+i ;
2115 pthread_create(&threads[i], NULL, vit_dim_prefetch, (
void*)¶ms[i]) ;
2118 for (i=0; i<num_threads; i++)
2120 if (dim+i<p_observations->get_num_vectors())
2122 pthread_join(threads[i], NULL);
2123 allpatprob += params[i].prob_sum;
2127 #else // USE_HMMPARALLEL
2130 #endif // USE_HMMPARALLEL
2136 set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1],
get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
2142 P[estimate->PATH(dim)[0]]++;
2146 #ifdef USE_HMMPARALLEL
2262 SG_INFO(
"log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n",
2269 SG_INFO(
"\ntransition matrix\n")
2282 if (fabs(checksum)>1e-5)
2283 SG_DEBUG(
" checksum % E ******* \n",checksum)
2285 SG_DEBUG(
" checksum % E\n",checksum)
2289 SG_INFO(
"\ndistribution of start states\n")
2298 if (fabs(checksum)>1e-5)
2299 SG_DEBUG(
" checksum % E ******* \n",checksum)
2301 SG_DEBUG(
" checksum=% E\n", checksum)
2304 SG_INFO(
"\ndistribution of terminal states\n")
2313 if (fabs(checksum)>1e-5)
2314 SG_DEBUG(
" checksum % E ******* \n",checksum)
2316 SG_DEBUG(
" checksum=% E\n", checksum)
2319 SG_INFO(
"\ndistribution of observations given the state\n")
2330 if (fabs(checksum)>1e-5)
2331 SG_DEBUG(
" checksum % E ******* \n",checksum)
2333 SG_DEBUG(
" checksum % E\n",checksum)
2347 SG_INFO(
"log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n",
2354 SG_INFO(
"\ntransition matrix\n")
2372 SG_INFO(
"\n\ndistribution of observations given the state\n")
2559 SG_FREE(R); R=NULL ;
2588 SG_FREE(R); R=NULL ;
2723 if (mem_initialized)
2725 if (trans_list_forward_cnt)
2726 SG_FREE(trans_list_forward_cnt);
2727 trans_list_forward_cnt=NULL ;
2728 if (trans_list_backward_cnt)
2729 SG_FREE(trans_list_backward_cnt);
2730 trans_list_backward_cnt=NULL ;
2731 if (trans_list_forward)
2733 for (int32_t i=0; i<trans_list_len; i++)
2734 if (trans_list_forward[i])
2735 SG_FREE(trans_list_forward[i]);
2736 SG_FREE(trans_list_forward);
2737 trans_list_forward=NULL ;
2739 if (trans_list_backward)
2741 for (int32_t i=0; i<trans_list_len; i++)
2742 if (trans_list_backward[i])
2743 SG_FREE(trans_list_backward[i]);
2744 SG_FREE(trans_list_backward);
2745 trans_list_backward = NULL ;
2748 trans_list_len =
N ;
2749 trans_list_forward = SG_MALLOC(
T_STATES*, N);
2750 trans_list_forward_cnt = SG_MALLOC(
T_STATES, N);
2752 for (int32_t j=0; j<
N; j++)
2754 trans_list_forward_cnt[j]= 0 ;
2755 trans_list_forward[j]= SG_MALLOC(
T_STATES, N);
2756 for (int32_t i=0; i<
N; i++)
2759 trans_list_forward[j][trans_list_forward_cnt[j]]=i ;
2760 trans_list_forward_cnt[j]++ ;
2764 trans_list_backward = SG_MALLOC(
T_STATES*, N);
2765 trans_list_backward_cnt = SG_MALLOC(
T_STATES, N);
2767 for (int32_t i=0; i<
N; i++)
2769 trans_list_backward_cnt[i]= 0 ;
2770 trans_list_backward[i]= SG_MALLOC(
T_STATES, N);
2771 for (int32_t j=0; j<
N; j++)
2774 trans_list_backward[i][trans_list_backward_cnt[i]]=j ;
2775 trans_list_backward_cnt[i]++ ;
2785 #ifdef USE_HMMPARALLEL_STRUCTURES
2795 #else // USE_HMMPARALLEL_STRUCTURES
2801 #endif // USE_HMMPARALLEL_STRUCTURES
2807 while (((value=fgetc(file)) != EOF) && (value!=
'['))
2814 error(
line,
"expected \"[\" in input file");
2816 while (((value=fgetc(file)) != EOF) && (isspace(value)))
2822 ungetc(value, file);
2828 while (((value=fgetc(file)) != EOF) && (value!=
']'))
2835 error(
line,
"expected \"]\" in input file");
2841 while (((value=fgetc(file)) != EOF) && (value!=
',') && (value!=
';') && (value!=
']'))
2848 ungetc(value, file);
2849 SG_ERROR(
"found ']' instead of ';' or ','\n")
2854 error(
line,
"expected \";\" or \",\" in input file");
2856 while (((value=fgetc(file)) != EOF) && (isspace(value)))
2861 ungetc(value, file);
2869 while (((value=fgetc(file)) != EOF) &&
2870 !isdigit(value) && (value!=
'A')
2871 && (value!=
'C') && (value!=
'G') && (value!=
'T')
2872 && (value!=
'N') && (value!=
'n')
2873 && (value!=
'.') && (value!=
'-') && (value!=
'e') && (value!=
']'))
2880 ungetc(value,file) ;
2904 while (((value=fgetc(file)) != EOF) &&
2905 (isdigit(value) || (value==
'.') || (value==
'-') || (value==
'e')
2906 || (value==
'A') || (value==
'C') || (value==
'G')|| (value==
'T')
2907 || (value==
'N') || (value==
'n')) && (i<length))
2923 case '1':
case '2':
case'3':
case '4':
case'5':
2924 case '6':
case '7':
case'8':
case '9':
case '0': break ;
2925 case '.':
case 'e':
case '-': break ;
2927 SG_ERROR(
"found crap: %i %c (pos:%li)\n",i,value,ftell(file))
2931 ungetc(value, file);
2934 return (i<=length) && (i>0);
2976 int32_t received_params=0;
2989 int32_t value=fgetc(file);
3001 if (received_params &
GOTN)
3002 error(
line,
"in model file: \"p double defined\"");
3006 else if (value==
'M')
3008 if (received_params &
GOTM)
3009 error(
line,
"in model file: \"p double defined\"");
3013 else if (value==
'%')
3020 if (received_params &
GOTp)
3021 error(
line,
"in model file: \"p double defined\"");
3027 if (received_params &
GOTq)
3028 error(
line,
"in model file: \"q double defined\"");
3032 else if (value==
'a')
3034 if (received_params &
GOTa)
3035 error(
line,
"in model file: \"a double defined\"");
3039 else if (value==
'b')
3041 if (received_params &
GOTb)
3042 error(
line,
"in model file: \"b double defined\"");
3046 else if (value==
'%')
3056 this->N= atoi(buffer);
3057 received_params|=
GOTN;
3069 this->M= atoi(buffer);
3070 received_params|=
GOTM;
3084 for (i=0; i<this->
N; i++)
3088 for (j=0; j<this->
N ; j++)
3091 if (fscanf( file,
"%le", &f ) != 1)
3107 received_params|=
GOTa;
3118 for (i=0; i<this->
N; i++)
3122 for (j=0; j<this->
M ; j++)
3125 if (fscanf( file,
"%le", &f ) != 1)
3141 received_params|=
GOTb;
3152 for (i=0; i<this->
N ; i++)
3154 if (fscanf( file,
"%le", &f ) != 1)
3164 received_params|=
GOTp;
3175 for (i=0; i<this->
N ; i++)
3177 if (fscanf( file,
"%le", &f ) != 1)
3187 received_params|=
GOTq;
3194 else if (value==
'\n')
3208 SG_WARNING(
"not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n")
3278 int32_t received_params=0x0000000;
3305 int32_t value=fgetc(file);
3318 if (fgetc(file)==
'e' && fgetc(file)==
'a' && fgetc(file)==
'r' && fgetc(file)==
'n' && fgetc(file)==
'_')
3335 error(
line,
"a,b,p or q expected in train definition file");
3339 else if (value==
'c')
3341 if (fgetc(file)==
'o' && fgetc(file)==
'n' && fgetc(file)==
's'
3342 && fgetc(file)==
't' && fgetc(file)==
'_')
3359 error(
line,
"a,b,p or q expected in train definition file");
3363 else if (value==
'%')
3367 else if (value==EOF)
3376 bool finished=
false;
3380 SG_DEBUG(
"\nlearn for transition matrix: ")
3396 SG_ERROR(
"invalid value for learn_a(%i,0): %i\n",i/2,(
int)value)
3414 SG_ERROR(
"invalid value for learn_a(%i,1): %i\n",i/2-1,(
int)value)
3439 bool finished=
false;
3443 SG_DEBUG(
"\nlearn for emission matrix: ")
3451 for (int32_t j=0; j<2; j++)
3467 SG_ERROR(
"invalid value for learn_b(%i,0): %i\n",i/2,(
int)value)
3483 SG_ERROR(
"invalid value for learn_b(%i,1): %i\n",i/2-1,(
int)value)
3487 SG_DEBUG(
"%i Entries",(
int)(i/2-1))
3502 bool finished=
false;
3521 SG_ERROR(
"invalid value for learn_p(%i): %i\n",i-1,(
int)value)
3546 bool finished=
false;
3550 SG_DEBUG(
"\nlearn terminal states: ")
3564 SG_ERROR(
"invalid value for learn_q(%i): %i\n",i-1,(
int)value)
3589 bool finished=
false;
3594 SG_DEBUG(
"\nconst for transition matrix: \n")
3596 SG_DEBUG(
"\nconst for transition matrix: ")
3615 SG_ERROR(
"invalid value for const_a(%i,0): %i\n",i/2,(
int)value)
3634 SG_ERROR(
"invalid value for const_a(%i,1): %i\n",i/2-1,(
int)value)
3651 if ((dvalue>1.0) || (dvalue<0.0))
3652 SG_ERROR(
"invalid value for const_a_val(%i): %e\n",(
int)i/2-1,dvalue)
3681 bool finished=
false;
3686 SG_DEBUG(
"\nconst for emission matrix: \n")
3688 SG_DEBUG(
"\nconst for emission matrix: ")
3694 for (int32_t j=0; j<3; j++)
3711 SG_ERROR(
"invalid value for const_b(%i,0): %i\n",i/2-1,(
int)value)
3722 if ((dvalue>1.0) || (dvalue<0.0))
3723 SG_ERROR(
"invalid value for const_b_val(%i,1): %e\n",i/2-1,dvalue)
3747 SG_ERROR(
"invalid value for const_b(%i,1): %i\n",i/2-1, combine)
3749 if (verbose && !finished)
3770 bool finished=
false;
3775 SG_DEBUG(
"\nconst for start states: \n")
3777 SG_DEBUG(
"\nconst for start states: ")
3795 SG_ERROR(
"invalid value for const_p(%i): %i\n",i,(
int)value)
3813 if ((dvalue>1) || (dvalue<0))
3814 SG_ERROR(
"invalid value for const_p_val(%i): %e\n",i,dvalue)
3844 bool finished=
false;
3847 SG_DEBUG(
"\nconst for terminal states: \n")
3849 SG_DEBUG(
"\nconst for terminal states: ")
3867 SG_ERROR(
"invalid value for const_q(%i): %i\n",i,(
int)value)
3884 if ((dvalue>1) || (dvalue<0))
3885 SG_ERROR(
"invalid value for const_q_val(%i): %e\n",i,(
double) dvalue)
3913 else if (value==
'\n')
3985 fprintf(file,
"%s",
"% HMM - specification\n% N - number of states\n% M - number of observation_tokens\n% a is state_transition_matrix\n% size(a)= [N,N]\n%\n% b is observation_per_state_matrix\n% size(b)= [N,M]\n%\n% p is initial distribution\n% size(p)= [1, N]\n\n% q is distribution of end states\n% size(q)= [1, N]\n");
3986 fprintf(file,
"N=%d;\n",N);
3987 fprintf(file,
"M=%d;\n",M);
3989 fprintf(file,
"p=[");
3994 fprintf(file,
"%e,", (
double)
get_p(i));
3996 fprintf(file,
"%f,", NAN_REPLACEMENT);
4000 fprintf(file,
"%e", (
double)
get_p(i));
4002 fprintf(file,
"%f", NAN_REPLACEMENT);
4006 fprintf(file,
"];\n\nq=[");
4011 fprintf(file,
"%e,", (
double)
get_q(i));
4013 fprintf(file,
"%f,", NAN_REPLACEMENT);
4017 fprintf(file,
"%e", (
double)
get_q(i));
4019 fprintf(file,
"%f", NAN_REPLACEMENT);
4022 fprintf(file,
"];\n\na=[");
4026 fprintf(file,
"\t[");
4032 fprintf(file,
"%e,", (
double)
get_a(i,j));
4034 fprintf(file,
"%f,", NAN_REPLACEMENT);
4038 fprintf(file,
"%e];\n", (
double)
get_a(i,j));
4040 fprintf(file,
"%f];\n", NAN_REPLACEMENT);
4045 fprintf(file,
" ];\n\nb=[");
4049 fprintf(file,
"\t[");
4055 fprintf(file,
"%e,", (
double)
get_b(i,j));
4057 fprintf(file,
"%f,", NAN_REPLACEMENT);
4061 fprintf(file,
"%e];\n", (
double)
get_b(i,j));
4063 fprintf(file,
"%f];\n", NAN_REPLACEMENT);
4067 result= (fprintf(file,
" ];\n") == 5);
4081 result[i]=PATH(dim)[i];
4097 fprintf(file,
"%i. path probability:%e\nstate sequence:\n", dim, prob);
4099 fprintf(file,
"%d ", PATH(dim)[i]);
4101 fprintf(file,
"\n\n") ;
4119 fwrite(&prob,
sizeof(
float32_t), 1, file);
4133 fprintf(file,
"%% likelihood of model per observation\n%% P[O|model]=[ P[O|model]_1 P[O|model]_2 ... P[O|model]_dim ]\n%%\n");
4135 fprintf(file,
"P=[");
4146 #define FLOATWRITE(file, value) { float32_t rrr=float32_t(value); fwrite(&rrr, sizeof(float32_t), 1, file); num_floats++;}
4150 int32_t i,j,q, num_floats=0 ;
4162 SG_INFO(
"wrote %i parameters for p\n",N)
4166 SG_INFO(
"wrote %i parameters for q\n",N)
4172 SG_INFO(
"wrote %i parameters for a\n",N*N)
4177 SG_INFO(
"wrote %i parameters for b\n",N*M)
4196 int32_t num_p, num_q, num_a, num_b ;
4204 SG_INFO(
"wrote %i parameters for p\n",num_p)
4209 SG_INFO(
"wrote %i parameters for q\n",num_q)
4221 SG_INFO(
"wrote %i parameters for a\n",num_a)
4232 SG_INFO(
"wrote %i parameters for b\n",num_b)
4256 fprintf(logfile,
"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%% \n%% calculating derivatives of P[O,Q|lambda]=p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
4257 fprintf(logfile,
"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
4258 fprintf(logfile,
"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
4259 fprintf(logfile,
"%% ............................. \n");
4260 fprintf(logfile,
"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
4261 fprintf(logfile,
"%% ];\n%%\n\ndPr(log()) = [\n");
4270 fprintf(logfile,
"[ ");
4288 fseek(logfile,ftell(logfile)-1,SEEK_SET);
4289 fprintf(logfile,
" ];\n");
4292 fprintf(logfile,
"];");
4302 int32_t num_floats=0 ;
4306 SG_WARNING(
"No definitions loaded -- writing derivatives of all weights\n")
4308 SG_INFO(
"writing derivatives of changed weights only\n")
4384 int32_t num_floats=0 ;
4387 SG_WARNING(
"No definitions loaded -- writing derivatives of all weights\n")
4389 SG_INFO(
"writing derivatives of changed weights only\n")
4391 #ifdef USE_HMMPARALLEL
4393 pthread_t *threads=SG_MALLOC(pthread_t, num_threads);
4394 S_DIM_THREAD_PARAM *params=SG_MALLOC(S_DIM_THREAD_PARAM, num_threads);
4408 #ifdef USE_HMMPARALLEL
4409 if (dim%num_threads==0)
4411 for (i=0; i<num_threads; i++)
4413 if (dim+i<p_observations->get_num_vectors())
4415 params[i].hmm=this ;
4416 params[i].dim=dim+i ;
4417 pthread_create(&threads[i], NULL, bw_dim_prefetch, (
void*)¶ms[i]) ;
4421 for (i=0; i<num_threads; i++)
4423 if (dim+i<p_observations->get_num_vectors())
4424 pthread_join(threads[i], NULL);
4454 SG_INFO(
"Number of parameters (including posterior prob.): %i\n", num_floats)
4485 SG_INFO(
"Number of parameters (including posterior prob.): %i\n", num_floats)
4491 #ifdef USE_HMMPARALLEL
4509 fprintf(file,
"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%%\n%% calculating derivatives of P[O|lambda]=sum_{all Q}p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
4510 fprintf(file,
"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
4511 fprintf(file,
"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
4512 fprintf(file,
"%% ............................. \n");
4513 fprintf(file,
"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
4514 fprintf(file,
"%% ];\n%%\n\nlog(dPr) = [\n");
4519 fprintf(file,
"[ ");
4537 fseek(file,ftell(file)-1,SEEK_SET);
4538 fprintf(file,
" ];\n");
4542 fprintf(file,
"];");
4588 for (int32_t j=0; j<
M; j++)
4592 set_b(i,j, log(exp(old_b)-delta)) ;
4596 set_b(i,j, log(exp(old_b)+delta)) ;
4600 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4610 SG_INFO(
"deriv_calc[%i]=%e\n",dim,deriv_calc)
4613 SG_ERROR(
"b(%i,%i)=%e db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j,exp(old_b),i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc)
4630 for (int32_t j=0; j<
N; j++)
4634 set_a(i,j, log(exp(old_a)-delta)) ;
4638 set_a(i,j, log(exp(old_a)+delta)) ;
4642 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4648 SG_DEBUG(
"da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc)
4654 for (int32_t j=0; j<
M; j++)
4658 set_b(i,j, log(exp(old_b)-delta)) ;
4662 set_b(i,j, log(exp(old_b)+delta)) ;
4666 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4672 SG_DEBUG(
"db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc))
4681 set_p(i, log(exp(old_p)-delta)) ;
4685 set_p(i, log(exp(old_p)+delta)) ;
4688 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4695 SG_DEBUG(
"dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc)
4701 set_q(i, log(exp(old_q)-delta)) ;
4705 set_q(i, log(exp(old_q)+delta)) ;
4709 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4716 SG_DEBUG(
"dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc)
4724 bool CHMM::check_path_derivatives()
4735 for (int32_t j=0; j<
N; j++)
4739 set_a(i,j, log(exp(old_a)-delta)) ;
4743 set_a(i,j, log(exp(old_a)+delta)) ;
4747 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4753 SG_DEBUG(
"da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc)
4758 for (int32_t j=0; j<
M; j++)
4762 set_b(i,j, log(exp(old_b)-delta)) ;
4766 set_b(i,j, log(exp(old_b)+delta)) ;
4770 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4776 SG_DEBUG(
"db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc))
4784 set_p(i, log(exp(old_p)-delta)) ;
4788 set_p(i, log(exp(old_p)+delta)) ;
4791 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4798 SG_DEBUG(
"dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc)
4804 set_q(i, log(exp(old_q)-delta)) ;
4808 set_q(i, log(exp(old_q)+delta)) ;
4812 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
4819 SG_DEBUG(
"dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc)
4824 #endif // USE_HMMDEBUG
4865 const int32_t num_states=app_model->
get_N();
4879 for (i=0; i<N+num_states; i++)
4884 for (j=0; j<N+num_states; j++)
4901 n_a[(N+num_states)*j+i]=
get_a(i,j);
4905 n_b[M*i+j]=
get_b(i,j);
4910 for (i=0; i<app_model->
get_N(); i++)
4912 n_q[i+
N]=app_model->
get_q(i);
4914 for (j=0; j<app_model->
get_N(); j++)
4915 n_a[(N+num_states)*(j+
N)+(i+N)]=app_model->
get_a(i,j);
4916 for (j=0; j<app_model->
get_M(); j++)
4917 n_b[M*(i+N)+j]=app_model->
get_b(i,j);
4924 for (j=N; j<N+num_states; j++)
4944 SG_WARNING(
"not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n")
4949 SG_ERROR(
"number of observations is different for append model, doing nothing!\n")
4957 const int32_t num_states=app_model->
get_N()+2;
4969 for (i=0; i<N+num_states; i++)
4974 for (j=0; j<N+num_states; j++)
4991 n_a[(N+num_states)*j+i]=
get_a(i,j);
4995 n_b[M*i+j]=
get_b(i,j);
5000 for (i=0; i<app_model->
get_N(); i++)
5002 n_q[i+N+2]=app_model->
get_q(i);
5004 for (j=0; j<app_model->
get_N(); j++)
5005 n_a[(N+num_states)*(j+N+2)+(i+N+2)]=app_model->
get_a(i,j);
5006 for (j=0; j<app_model->
get_M(); j++)
5007 n_b[M*(i+N+2)+j]=app_model->
get_b(i,j);
5015 n_b[M*N+i]=cur_out[i];
5016 n_b[M*(N+1)+i]=app_out[i];
5020 for (i=0; i<N+num_states; i++)
5024 n_a[(N+num_states)*i + N]=0;
5029 n_a[(N+num_states)*N+i]=
get_q(i);
5034 n_a[(N+num_states)*i+(N+1)]=app_model->
get_p(i-(N+2));
5053 SG_WARNING(
"not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n")
5082 n_a[(N+num_states)*j+i]=
get_a(i,j);
5085 n_b[M*i+j]=
get_b(i,j);
5088 for (i=N; i<N+num_states; i++)
5096 for (j=0; j<N+num_states; j++)
5124 for (int32_t i=0; i<
N; i++)
5128 if (exp(
get_p(i)) < value)
5131 if (exp(
get_q(i)) < value)
5136 if (exp(
get_a(i,j)) < value)
5142 if (exp(
get_b(i,j)) < value)
5155 int32_t* hist=SG_MALLOC(int32_t, histsize);
5156 int32_t* startendhist=SG_MALLOC(int32_t,
get_N());
5161 for (i=0; i<histsize; i++)
5164 for (i=0; i<
get_N(); i++)
5176 startendhist[(
get_N()-len)]++;
5185 for (i=0; i<
get_N()-1; i++)
5188 for (i=0; i<
get_N(); i++)
5191 for (i=0;i<
get_N();i++)
5193 for (int32_t j=0; j<
get_N(); j++)
5212 hist[i*
get_M() + *obs++]++;
5214 startendhist[len-1]++;
5220 for (i=1; i<
get_N(); i++)
5223 for (i=0; i<
get_N(); i++)
5228 for (i=0;i<
get_N();i++)
5230 total-= startendhist[i] ;
5232 for (int32_t j=0; j<
get_N(); j++)
5243 for (i=0;i<
get_N();i++)
5245 for (int32_t j=0; j<
get_M(); j++)
5258 SG_FREE(startendhist);
5279 #ifdef USE_HMMPARALLEL_STRUCTURES
5303 #endif //USE_HMMPARALLEL_STRUCTURES
5335 #ifdef USE_HMMPARALLEL_STRUCTURES
5359 #endif //USE_HMMPARALLEL_STRUCTURES
5368 #ifdef USE_HMMPARALLEL_STRUCTURES
5381 #endif //USE_HMMPARALLEL_STRUCTURES
5388 #ifdef USE_HMMPARALLEL_STRUCTURES
5389 SG_INFO(
"allocating mem for path-table of size %.2f Megabytes (%d*%d) each:\n", ((
float32_t)max_T)*N*
sizeof(
T_STATES)/(1024*1024), max_T, N)
5393 SG_DEBUG(
"path_table[%i] successfully allocated\n",i)
5395 SG_ERROR(
"failed allocating memory for path_table[%i].\n",i)
5398 #else // no USE_HMMPARALLEL_STRUCTURES
5399 SG_INFO(
"allocating mem of size %.2f Megabytes (%d*%d) for path-table ....", ((
float32_t)max_T)*N*
sizeof(
T_STATES)/(1024*1024), max_T, N)
5406 #endif // USE_HMMPARALLEL_STRUCTURES
5410 #ifdef USE_HMMPARALLEL_STRUCTURES
5414 SG_DEBUG(
"alpha_cache[%i].table successfully allocated\n",i)
5416 SG_ERROR(
"allocation of alpha_cache[%i].table failed\n",i)
5419 SG_DEBUG(
"beta_cache[%i].table successfully allocated\n",i)
5421 SG_ERROR(
"allocation of beta_cache[%i].table failed\n",i)
5423 #else // USE_HMMPARALLEL_STRUCTURES
5425 SG_DEBUG(
"alpha_cache.table successfully allocated\n")
5427 SG_ERROR(
"allocation of alpha_cache.table failed\n")
5430 SG_DEBUG(
"beta_cache.table successfully allocated\n")
5432 SG_ERROR(
"allocation of beta_cache.table failed\n")
5434 #endif // USE_HMMPARALLEL_STRUCTURES
5435 #else // USE_HMMCACHE
5436 #ifdef USE_HMMPARALLEL_STRUCTURES
5442 #else //USE_HMMPARALLEL_STRUCTURES
5445 #endif //USE_HMMPARALLEL_STRUCTURES
5446 #endif //USE_HMMCACHE
5457 ( sequence_number<0 || sequence_number < p_observations->get_num_vectors()))
5459 int32_t min_sequence=sequence_number;
5460 int32_t max_sequence=sequence_number;
5462 if (sequence_number<0)
5466 SG_INFO(
"numseq: %d\n", max_sequence)
5469 SG_INFO(
"min_sequence: %d max_sequence: %d\n", min_sequence, max_sequence)
5470 for (sequence_number=min_sequence; sequence_number<max_sequence; sequence_number++)
5472 int32_t sequence_length=0;
5476 int32_t histsize=
get_M();
5477 int64_t* hist=SG_MALLOC(int64_t, histsize);
5480 for (i=0; i<sequence_length-window_width; i++)
5482 for (j=0; j<histsize; j++)
5485 uint16_t* ptr=&obs[i];
5486 for (j=0; j<window_width; j++)
5492 for (j=0; j<
get_M(); j++)
5497 perm_entropy+=p*log(p);
5516 else if (num_param<2*N)
5518 else if (num_param<N*(N+2))
5520 int32_t k=num_param-2*
N;
5525 else if (num_param<N*(N+2+M))
5527 int32_t k=num_param-N*(N+2);
5540 return get_p(num_param);
5541 else if (num_param<2*N)
5542 return get_q(num_param-N);
5543 else if (num_param<N*(N+2))
5545 else if (num_param<N*(N+2+M))
5607 if (prob_max<prob_train)
5608 prob_max=prob_train;
5611 if (estimate ==
this)
int32_t get_learn_p(int32_t offset) const
get entry out of learn_p vector
int32_t * learn_p
start states to be learned
virtual int32_t get_max_vector_length()
SGVector< ST > get_feature_vector(int32_t num)
void set_observation_nocache(CStringFeatures< uint16_t > *obs)
float64_t * transition_matrix_a
transition matrix
bool mod_prob_updated
true if model probability is up to date
void chop(float64_t value)
set any model parameter with probability smaller than value to ZERO
float64_t backward_comp(int32_t time, int32_t state, int32_t dimension)
int32_t N
number of states
void convert_to_log()
convert model to log probabilities
float64_t backward(int32_t time, int32_t state, int32_t dimension)
inline proxies for backward pass
float64_t get_const_p_val(int32_t offset) const
get value out of const_p_val vector
static const int32_t GOTp
bool save_likelihood(FILE *file)
void close_bracket(FILE *file)
expect closing bracket
static int is_finite(double f)
checks whether a float is finite
float64_t * const_a_val
values for transitions that have constant probability
Model()
Constructor - initializes all variables/structures.
void set_const_p(int32_t offset, int32_t value)
set value in const_p vector
bool get_numbuffer(FILE *file, char *buffer, int32_t length)
put a sequence of numbers into the buffer
int32_t get_M() const
access function for number of observations M
float64_t pat_prob
probability of best path
float64_t get_const_b_val(int32_t line) const
get value out of const_b_val vector
int32_t get_num_threads() const
bool save_model(FILE *file)
void set_observations(CStringFeatures< uint16_t > *obs, CHMM *hmm=NULL)
void set_const_p_val(int32_t offset, float64_t value)
set value in const_p_val vector
static const float64_t INFTY
infinity
T_STATES * states_per_observation_psi
backtracking table for viterbi can be terrible HUGE O(T*N)
float64_t forward(int32_t time, int32_t state, int32_t dimension)
inline proxies for forward pass
float64_t * const_q_val
values for end states that have constant probability
bool all_path_prob_updated
true if path probability is up to date
#define FLOATWRITE(file, value)
float64_t epsilon
convergence criterion epsilon
void estimate_model_baum_welch_defined(CHMM *train)
static const int32_t GOTconst_p
viterbi only for defined transitions/observations
virtual int32_t get_num_vectors() const
int32_t get_const_p(int32_t offset) const
get entry out of const_p vector
baum welch only for defined transitions/observations
bool linear_train(bool right_align=false)
estimates linear model from observations.
bool save_model_bin(FILE *file)
int32_t * learn_b
emissions to be learned
float64_t * transition_matrix_A
matrix of absolute counts of transitions
static const int32_t GOTlearn_a
float64_t path_derivative_b(T_STATES i, uint16_t j, int32_t dimension)
computes d log p(lambda,best_path)/d b_ij
T_ALPHA_BETA beta_cache
cache for backward variables can be terrible HUGE O(T*N)
float64_t get_b(T_STATES line_, uint16_t column) const
bool path_prob_updated
true if path probability is up to date
bool save_likelihood_bin(FILE *file)
bool baum_welch_viterbi_train(BaumWelchViterbiType type)
static const int32_t GOTO
float64_t forward_comp_old(int32_t time, int32_t state, int32_t dimension)
float64_t * observation_matrix_B
matrix of absolute counts of observations within each state
float64_t get_A(T_STATES line_, T_STATES column) const
float64_t T_ALPHA_BETA_TABLE
type for alpha/beta caching table
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
floatmax_t get_original_num_symbols()
Base class Distribution from which all methods implementing a distribution are derived.
bool check_model_derivatives()
numerically check whether derivates were calculated right
static const int32_t GOTlearn_p
void open_bracket(FILE *file)
expect open bracket.
int32_t get_learn_q(int32_t offset) const
get entry out of learn_q vector
float64_t get_pseudo() const
returns current pseudo value
float64_t get_B(T_STATES line_, uint16_t column) const
float64_t * const_b_val
values for emissions that have constant probability
static const int32_t GOTlearn_q
virtual bool train(CFeatures *data=NULL)
bool save_model_derivatives(FILE *file)
static const float64_t ALMOST_NEG_INFTY
almost neg (log) infinity
virtual float64_t get_log_model_parameter(int32_t num_param)
void estimate_model_baum_welch_old(CHMM *train)
static const float64_t epsilon
int32_t path_prob_dimension
dimension for which path_prob was calculated
virtual ~Model()
Destructor - cleans up.
int32_t * learn_q
end states to be learned
bool load_model(FILE *file)
void estimate_model_baum_welch_trans(CHMM *train)
float64_t model_probability(int32_t dimension=-1)
inline proxy for model probability.
int32_t path_deriv_dimension
dimension for which path_deriv was calculated
int32_t * const_a
transitions that have constant probability
static const int32_t GOTb
ST get_masked_symbols(ST symbol, uint8_t mask)
floatmax_t get_num_symbols()
float64_t mod_prob
probability of model
bool check_model_derivatives_combined()
float64_t model_derivative_q(T_STATES i, int32_t dimension)
float64_t * const_p_val
values for start states that have constant probability
T_STATES get_psi(int32_t time, T_STATES state, int32_t dimension) const
void set_p(T_STATES offset, float64_t value)
bool permutation_entropy(int32_t window_width, int32_t sequence_number)
compute permutation entropy
static const int32_t GOTconst_a
float64_t * end_state_distribution_q
distribution of end-states
void add_states(int32_t num_states, float64_t default_val=0)
float64_t PSEUDO
define pseudocounts against overfitting
int32_t get_num_symbols() const
float64_t all_pat_prob
probability of best path
float64_t path_derivative_a(T_STATES i, T_STATES j, int32_t dimension)
computes d log p(lambda,best_path)/d a_ij
void set_const_q_val(int32_t offset, float64_t value)
set value in const_q_val vector
bool save_model_derivatives_bin(FILE *file)
float64_t get_q(T_STATES offset) const
int32_t * const_q
end states that have constant probability
CStringFeatures< uint16_t > * p_observations
observation matrix
bool save_path_derivatives(FILE *file)
virtual ST get_feature(int32_t vec_num, int32_t feat_num)
virtual EFeatureClass get_feature_class() const =0
void set_learn_a(int32_t offset, int32_t value)
set value in learn_a matrix
void set_learn_q(int32_t offset, int32_t value)
set value in learn_q vector
void set_A(T_STATES line_, T_STATES column, float64_t value)
CAlphabet * get_alphabet()
void set_q(T_STATES offset, float64_t value)
void set_B(T_STATES line_, uint16_t column, float64_t value)
int32_t get_const_q(int32_t offset) const
get entry out of const_q vector
int32_t iterations
convergence criterion iterations
float64_t get_const_q_val(int32_t offset) const
get value out of const_q_val vector
static const int32_t GOTq
static const int32_t GOTlearn_b
float64_t * observation_matrix_b
distribution of observations within each state
T_STATES * get_path(int32_t dim, float64_t &prob)
float64_t model_derivative_a(T_STATES i, T_STATES j, int32_t dimension)
computes log dp(lambda)/d a_ij.
static bool cancel_computations()
float64_t best_path(int32_t dimension)
int32_t get_const_a(int32_t line, int32_t column) const
get entry out of const_a matrix
float64_t model_probability_comp()
void set_const_b_val(int32_t offset, float64_t value)
set value in const_b_val vector
bool path_deriv_updated
true if path derivative is up to date
virtual float64_t get_log_derivative(int32_t num_param, int32_t num_example)
bool append_model(CHMM *append_model, float64_t *cur_out, float64_t *app_out)
void estimate_model_viterbi(CHMM *train)
int32_t M
number of observation symbols eg. ACGT -> 0123
void sort_learn_a()
sorts learn_a matrix
void set_learn_p(int32_t offset, int32_t value)
set value in learn_p vector
bool load_definitions(FILE *file, bool verbose, bool initialize=true)
void set_learn_b(int32_t offset, int32_t value)
set value in learn_b matrix
float64_t * initial_state_distribution_p
initial distribution of states
bool save_path_derivatives_bin(FILE *file)
baum welch only for specified transitions
float64_t get_a(T_STATES line_, T_STATES column) const
all of classes and functions are contained in the shogun namespace
void clear_model_defined()
initializes only parameters in learn_x with log(PSEUDO)
float64_t path_derivative_p(T_STATES i, int32_t dimension)
computes d log p(lambda,best_path)/d p_i
float64_t forward_comp(int32_t time, int32_t state, int32_t dimension)
float64_t model_derivative_p(T_STATES i, int32_t dimension)
float64_t get_p(T_STATES offset) const
int32_t * const_b
emissions that have constant probability
bool comma_or_space(FILE *file)
expect comma or space.
void estimate_model_viterbi_defined(CHMM *train)
void sort_learn_b()
sorts learn_b matrix
The class Features is the base class of all feature objects.
void init_model_defined()
static const int32_t GOTconst_b
void free_state_dependend_arrays()
free memory that depends on N
void set_const_a_val(int32_t offset, float64_t value)
set value in const_a_val vector
bool alloc_state_dependend_arrays()
allocates memory that depends on N
void set_const_a(int32_t offset, int32_t value)
set value in const_a matrix
void set_b(T_STATES line_, uint16_t column, float64_t value)
float64_t path_derivative_q(T_STATES i, int32_t dimension)
computes d log p(lambda,best_path)/d q_i
static void swap(T &a, T &b)
swap e.g. floats a and b
int32_t get_learn_a(int32_t line, int32_t column) const
get entry out of learn_a matrix
float64_t backward_comp_old(int32_t time, int32_t state, int32_t dimension)
static const int32_t GOTM
virtual ~CHMM()
Destructor - Cleanup.
void copy_model(CHMM *l)
copies the the modelparameters from l
void clear_model()
initializes model with log(PSEUDO)
int32_t get_const_b(int32_t line, int32_t column) const
get entry out of const_b matrix
void output_model_defined(bool verbose=false)
performs output_model only for the defined transitions etc
void output_model(bool verbose=false)
static const int32_t GOTconst_q
void set_psi(int32_t time, T_STATES state, T_STATES value, int32_t dimension)
void set_a(T_STATES line_, T_STATES column, float64_t value)
bool initialize(Model *model, float64_t PSEUDO, FILE *model_file=NULL)
void normalize(bool keep_dead_states=false)
normalize the model to satisfy stochasticity
static float64_t logarithmic_sum(float64_t p, float64_t q)
static const int32_t GOTN
T_STATES * path
best path (=state sequence) through model
int32_t * const_p
start states that have constant probability
void init_model_random()
init model with random values
void set_const_b(int32_t offset, int32_t value)
set value in const_b matrix
T_STATES get_N() const
access function for number of states N
void set_const_q(int32_t offset, int32_t value)
set value in const_q vector
int32_t get_learn_b(int32_t line, int32_t column) const
get entry out of learn_b matrix
virtual EFeatureType get_feature_type() const =0
void estimate_model_baum_welch(CHMM *train)
int32_t * learn_a
transitions to be learned
void error(int32_t p_line, const char *str)
parse error messages
bool save_path(FILE *file)
virtual int32_t get_vector_length(int32_t vec_num)
float64_t model_derivative_b(T_STATES i, uint16_t j, int32_t dimension)
computes log dp(lambda)/d b_ij.
float64_t get_const_a_val(int32_t line) const
get value out of const_a_val vector
T_ALPHA_BETA alpha_cache
cache for forward variables can be terrible HUGE O(T*N)
static const int32_t GOTa