diff options
author | lnetsch | 2017-10-03 14:25:18 -0500 |
---|---|---|
committer | lnetsch | 2017-10-03 14:25:18 -0500 |
commit | ccbfc7bce1bcb20f546ab4ea7ed6b6bca81bd1d4 (patch) | |
tree | 1390df1b6545759e667563818bcb07e90320984f | |
parent | 9e54929ab43cf41899a0696544efdaf2bb8a464a (diff) | |
download | tiesr-ccbfc7bce1bcb20f546ab4ea7ed6b6bca81bd1d4.tar.gz tiesr-ccbfc7bce1bcb20f546ab4ea7ed6b6bca81bd1d4.tar.xz tiesr-ccbfc7bce1bcb20f546ab4ea7ed6b6bca81bd1d4.zip |
[#1468] Add arbitrary mixture tying and mean macro sharing
git-svn-id: https://gforge.ti.com/svn/tiesr/trunk@159 469fd053-429d-4955-9e4c-93c0d7a8ff27
-rwxr-xr-x | TIesr_Tools/HTKdata.pm | 576 |
1 files changed, 558 insertions, 18 deletions
diff --git a/TIesr_Tools/HTKdata.pm b/TIesr_Tools/HTKdata.pm index 70ce3c5..5bfae48 100755 --- a/TIesr_Tools/HTKdata.pm +++ b/TIesr_Tools/HTKdata.pm | |||
@@ -5,9 +5,9 @@ | |||
5 | # | 5 | # |
6 | # Module to parse HTK MMF files. | 6 | # Module to parse HTK MMF files. |
7 | # | 7 | # |
8 | # Copyright (C) 2010 Texas Instruments Incorporated - http://www.ti.com/ | 8 | # Copyright (C) 2010,2014 Texas Instruments Incorporated - http://www.ti.com/ |
9 | # | 9 | # |
10 | # This program is free software; you can redistribute it and/or modify | 10 | # This program is free software; you can redistribute it and/or modify |
11 | # it under the terms of the GNU Lesser General Public License as | 11 | # it under the terms of the GNU Lesser General Public License as |
12 | # published by the Free Software Foundation version 2.1 of the License. | 12 | # published by the Free Software Foundation version 2.1 of the License. |
13 | # | 13 | # |
@@ -16,7 +16,7 @@ | |||
16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 | # Lesser General Public License for more details. | 17 | # Lesser General Public License for more details. |
18 | # | 18 | # |
19 | 19 | ||
20 | #------------------------------------------------------------- | 20 | #------------------------------------------------------------- |
21 | 21 | ||
22 | 22 | ||
@@ -159,13 +159,34 @@ | |||
159 | # deleted. $mrgsfile is a file that contains sets of states to merge, where | 159 | # deleted. $mrgsfile is a file that contains sets of states to merge, where |
160 | # all of the state macro names on each line of the file are merged as a set. | 160 | # all of the state macro names on each line of the file are merged as a set. |
161 | 161 | ||
162 | # $hmms->mergevar( $mrgvfile ) | 162 | # $hmms->mergemix( $mrgmixfile ) |
163 | # This function merges multiple variances. Merging is performed by | 163 | # This function merges multiple Gaussian mixture components into a single |
164 | # collecting all of the variances specified, and substituting the macro | 164 | # clustered mixture component. The mrgmixfile contains multiple lines specifying |
165 | # name of the first variance for all of the remaining variance macro names. | 165 | # the mixture components to merge. The first entry on a line is the number |
166 | # The variance output will be the average for all variances. $mrgvfile is | 166 | # of mixture macros specified on the line for merging. This is followed by |
167 | # a file that contains sets of variance macros to merge, where each line | 167 | # text strings of the macro names themselves. Following the macro names are |
168 | # contains the macro names of variances to merge into a single variance. | 168 | # the mean vector and either the variance vector or inverse covariance. The |
169 | # first macro will be used as the macro name and will replace all other macro | ||
170 | # names to be merged. The other macro names will be deleted. | ||
171 | |||
172 | # $hmms->sharemixmean( $shrmufile ) | ||
173 | # This function shares a single mean vector among multiple Gaussian mixture | ||
174 | # components. The shrmufile contains multiple lines specifying mixture | ||
175 | # components and the mean vector that they share. The first entry on a line is | ||
176 | # the number of mixture macros specified on the line which will share the mean | ||
177 | # vector. This is followed by text strings of the mixture macro names | ||
178 | # themselves. Following the macro names is the shared mean vector. The first | ||
179 | # mixture macro encountered will be used to create a mean macro using the | ||
180 | # shared mean vector. Each mixture component will have its mean macro name | ||
181 | # changed to the newly created mean macro. | ||
182 | |||
183 | # $hmms->mergevar( $mrgvfile ) | ||
184 | # This function merges multiple variances. Merging is performed by collecting | ||
185 | # all of the variances specified, and substituting the macro name of the first | ||
186 | # variance for all of the remaining variance macro names. The variance output | ||
187 | # will be the average for all variances. $mrgvfile is a file that contains | ||
188 | # sets of variance macros to merge, where each line contains the macro names | ||
189 | # of variances to merge into a single variance. | ||
169 | 190 | ||
170 | # $hmms->mergemean( $mrgmfile ) | 191 | # $hmms->mergemean( $mrgmfile ) |
171 | # This function merges multiple mean vectors. Merging is performed by | 192 | # This function merges multiple mean vectors. Merging is performed by |
@@ -175,6 +196,8 @@ | |||
175 | # a file that contains sets of mean macros to merge, where each line | 196 | # a file that contains sets of mean macros to merge, where each line |
176 | # contains the macro names of means to merge into a single mean. | 197 | # contains the macro names of means to merge into a single mean. |
177 | 198 | ||
199 | |||
200 | |||
178 | #---------------------------------------------------------------- | 201 | #---------------------------------------------------------------- |
179 | package HTKdata; | 202 | package HTKdata; |
180 | 203 | ||
@@ -1432,6 +1455,9 @@ sub checklist | |||
1432 | 1455 | ||
1433 | # Marks the usage of all substructures of an hmm. Also does checking | 1456 | # Marks the usage of all substructures of an hmm. Also does checking |
1434 | # to see if the hmm structure is consistent. | 1457 | # to see if the hmm structure is consistent. |
1458 | # | ||
1459 | # It will also mark usage for variance floor vectors with names of | ||
1460 | # the form varFloor(\d+) | ||
1435 | 1461 | ||
1436 | #-------------------------------- | 1462 | #-------------------------------- |
1437 | 1463 | ||
@@ -1545,6 +1571,16 @@ sub markusagehmm | |||
1545 | } | 1571 | } |
1546 | } | 1572 | } |
1547 | } | 1573 | } |
1574 | |||
1575 | |||
1576 | # Mark all variance floor vectors as used | ||
1577 | while( ($vname, $vref) = each %{ $this->{VARIANCE} } ) | ||
1578 | { | ||
1579 | if( $vname =~ /^varFloor(\d+)$/ ) | ||
1580 | { | ||
1581 | $vref->{USAGE}++ if( $vref->{USAGE} == 0 ); | ||
1582 | } | ||
1583 | } | ||
1548 | } | 1584 | } |
1549 | 1585 | ||
1550 | #================================================================ | 1586 | #================================================================ |
@@ -2634,10 +2670,9 @@ sub mergevar | |||
2634 | $vbref->{VECTOR}[$vix] += $vfact * $vref->{VECTOR}[$vix]; | 2670 | $vbref->{VECTOR}[$vix] += $vfact * $vref->{VECTOR}[$vix]; |
2635 | } | 2671 | } |
2636 | 2672 | ||
2673 | # remove the variance just merged with the base variance | ||
2674 | delete $this->{VARIANCE}{$vname}; | ||
2637 | } | 2675 | } |
2638 | |||
2639 | # remove the variance just merged with the base variance | ||
2640 | delete $this->{VARIANCE}{$vname}; | ||
2641 | } | 2676 | } |
2642 | 2677 | ||
2643 | close( MRG ); | 2678 | close( MRG ); |
@@ -2751,7 +2786,7 @@ sub mergemean | |||
2751 | # add this mean to the means merged with base mean | 2786 | # add this mean to the means merged with base mean |
2752 | $ubref->{MERGE}{NAME}{$uname} = ++$ubref->{MERGE}{COUNT}; | 2787 | $ubref->{MERGE}{NAME}{$uname} = ++$ubref->{MERGE}{COUNT}; |
2753 | 2788 | ||
2754 | # map this variance to its merged base variance name | 2789 | # map this mean to its merged base variance name |
2755 | $map{$uname} = $ubname; | 2790 | $map{$uname} = $ubname; |
2756 | 2791 | ||
2757 | # reference to data for mean to merge with base mean | 2792 | # reference to data for mean to merge with base mean |
@@ -2764,7 +2799,7 @@ sub mergemean | |||
2764 | die "Merge size conflict for ${ubref} and ${uref}\n"; | 2799 | die "Merge size conflict for ${ubref} and ${uref}\n"; |
2765 | } | 2800 | } |
2766 | 2801 | ||
2767 | # merge variance with base variance | 2802 | # merge mean with base mean |
2768 | 2803 | ||
2769 | $ufact = 1/( $ubref->{MERGE}{COUNT} ); | 2804 | $ufact = 1/( $ubref->{MERGE}{COUNT} ); |
2770 | 2805 | ||
@@ -2776,10 +2811,9 @@ sub mergemean | |||
2776 | $ubref->{VECTOR}[$uix] += $ufact * $uref->{VECTOR}[$uix]; | 2811 | $ubref->{VECTOR}[$uix] += $ufact * $uref->{VECTOR}[$uix]; |
2777 | } | 2812 | } |
2778 | 2813 | ||
2814 | # remove the mean just merged with the base mean | ||
2815 | delete $this->{MEAN}{$uname}; | ||
2779 | } | 2816 | } |
2780 | |||
2781 | # remove the mean just merged with the base mean | ||
2782 | delete $this->{MEAN}{$uname}; | ||
2783 | } | 2817 | } |
2784 | 2818 | ||
2785 | close( MRG ); | 2819 | close( MRG ); |
@@ -2805,3 +2839,509 @@ sub mergemean | |||
2805 | 2839 | ||
2806 | return; | 2840 | return; |
2807 | } | 2841 | } |
2842 | |||
2843 | |||
2844 | |||
2845 | #================================================================ | ||
2846 | # mergemix | ||
2847 | |||
2848 | # Merge a set of Gaussian mixtures into a single clustered mixture | ||
2849 | # component, which in effect ties the mixtures together. After | ||
2850 | # doing the tying, go through all HMM states to join mixtures that | ||
2851 | # may be identical. | ||
2852 | |||
2853 | #-------------------------------- | ||
2854 | sub mergemix | ||
2855 | { | ||
2856 | my( $this, $mrgfile ) = @_; | ||
2857 | my( $firstmix, $mname, $mbname, $mbref ); | ||
2858 | my( $lin, $nmix, @mixdata, @mixlist, %base, %map ); | ||
2859 | my( @mean, @var ); | ||
2860 | my( $uname, $uref, $usize ); | ||
2861 | my( $vname, $vref, $vsize ); | ||
2862 | my( $mname, $mname2, $mbref ); | ||
2863 | my( $ix, $strm, $nstream, $nsmix, $smix, $smix2 ); | ||
2864 | my( $sname, $sref ); | ||
2865 | |||
2866 | |||
2867 | # process all lines of merge file | ||
2868 | open( MRG, "$mrgfile" ) or die "Can not open merge file $mrgfile\n"; | ||
2869 | |||
2870 | while( $lin = <MRG> ) | ||
2871 | { | ||
2872 | chomp($lin); | ||
2873 | @mixdata = split( " ", $lin ); | ||
2874 | |||
2875 | # Number of mixtures to merge | ||
2876 | $nmix = shift( @mixdata ); | ||
2877 | |||
2878 | # mixture names | ||
2879 | @mixlist = splice @mixdata, 0, $nmix; | ||
2880 | |||
2881 | # first mix on the line is the base mix to merge into | ||
2882 | $firstmix = 1; | ||
2883 | |||
2884 | MIX: | ||
2885 | foreach $mname ( @mixlist ) | ||
2886 | { | ||
2887 | if( $firstmix ) | ||
2888 | { | ||
2889 | # initialize base mix | ||
2890 | exists $this->{MIXPDF}{$mname} or die "Mix pdf $mname does not exist\n"; | ||
2891 | $mbref = $this->{MIXPDF}{$mname}; | ||
2892 | |||
2893 | # base mix macro name | ||
2894 | $mbname = $mname; | ||
2895 | |||
2896 | # keep track of base mixes, can not merge base mixes with each other | ||
2897 | $base{$mbname} = 1; | ||
2898 | |||
2899 | # mapping to be used for converting mixture names, map base to itself | ||
2900 | $map{$mbname} = $mbname; | ||
2901 | |||
2902 | # initialize merge info if base model has not been used for merging yet | ||
2903 | unless( exists $mbref->{MERGE}{NAME}{$mbname} ) | ||
2904 | { | ||
2905 | $mbref->{MERGE}{NAME}{$mbname} = 1; | ||
2906 | $mbref->{MERGE}{COUNT} = 1; | ||
2907 | } | ||
2908 | |||
2909 | # extract the mean vector from data and put it in proper mean macro | ||
2910 | $uname = $mbref->{MEAN}; | ||
2911 | $uref = $this->{MEAN}{$uname}; | ||
2912 | $usize = $uref->{SIZE}; | ||
2913 | @mean = splice @mixdata, 0, $usize; | ||
2914 | for( $ix = 0; $ix < $usize; $ix++ ) | ||
2915 | { | ||
2916 | $uref->{VECTOR}[$ix] = $mean[$ix]; | ||
2917 | } | ||
2918 | |||
2919 | # put variance data in proper variance macro | ||
2920 | $vname = $mbref->{VARIANCE}; | ||
2921 | $vref = $this->{VARIANCE}{$vname}; | ||
2922 | $vsize = $vref->{SIZE}; | ||
2923 | die "Invalid variance size for ${mname}\n" if $vsize != scalar( @mixdata ); | ||
2924 | for( $ix = 0; $ix < $vsize; $ix++ ) | ||
2925 | { | ||
2926 | $vref->{VECTOR}[$ix] = $mixdata[$ix]; | ||
2927 | $var[$ix] = $mixdata[$ix]; | ||
2928 | } | ||
2929 | |||
2930 | # delete no longer valid GCONST, which is optional anyway | ||
2931 | delete $mbref->{GCONST}; | ||
2932 | |||
2933 | $firstmix = 0; | ||
2934 | next MIX; | ||
2935 | } | ||
2936 | else | ||
2937 | { | ||
2938 | unless( exists $this->{MIXPDF}{$mname} ) | ||
2939 | { | ||
2940 | print STDERR "Mix pdf $mname does not exist, not merged with $mbname\n"; | ||
2941 | next MIX; | ||
2942 | } | ||
2943 | |||
2944 | if( exists $base{$mname} ) | ||
2945 | { | ||
2946 | die "Attempt to merge base mix $mname into base mix $mbname\n"; | ||
2947 | } | ||
2948 | |||
2949 | if( exists $mbref->{MERGE}{NAME}{$mname} ) | ||
2950 | { | ||
2951 | print STDERR "Mix $mname multiple merge for $mbname, not merged\n"; | ||
2952 | next MIX; | ||
2953 | } | ||
2954 | |||
2955 | # add this mix to the mixes merged with base mix | ||
2956 | $mbref->{MERGE}{NAME}{$mname} = ++$mbref->{MERGE}{COUNT}; | ||
2957 | |||
2958 | # map this mixture to its merged base mix name | ||
2959 | $map{$mname} = $mbname; | ||
2960 | |||
2961 | # remove this no longer used mixture component macro | ||
2962 | delete $this->{MIXPDF}{$mname}; | ||
2963 | |||
2964 | } | ||
2965 | } | ||
2966 | } | ||
2967 | |||
2968 | close( MRG ); | ||
2969 | |||
2970 | # Update state to reflect clustered mixtures | ||
2971 | $nstream = $this->{STREAMINFO}{SIZE}; | ||
2972 | while( ($sname, $sref) = each %{ $this->{STATE} } ) | ||
2973 | { | ||
2974 | for( $strm = 0; $strm < $nstream; $strm++ ) | ||
2975 | { | ||
2976 | $nsmix = $sref->{NUMMIXES}[$strm]; | ||
2977 | for( $smix = 0; $smix < $nsmix; $smix++ ) | ||
2978 | { | ||
2979 | $mname = $sref->{STREAMMIX}[$strm][$smix]{MIXPDF}; | ||
2980 | |||
2981 | if( exists $map{$mname} ) | ||
2982 | { | ||
2983 | $sref->{STREAMMIX}[$strm][$smix]{MIXPDF} = $map{$mname}; | ||
2984 | } | ||
2985 | } | ||
2986 | } | ||
2987 | } | ||
2988 | |||
2989 | |||
2990 | # Update all state macros to merge any common mixtures | ||
2991 | while( ($sname, $sref) = each %{ $this->{STATE} } ) | ||
2992 | { | ||
2993 | for( $strm = 0; $strm < $nstream; $strm++ ) | ||
2994 | { | ||
2995 | $nsmix = $sref->{NUMMIXES}[$strm]; | ||
2996 | $smix = 0; | ||
2997 | while( $smix < $nsmix - 1 ) | ||
2998 | { | ||
2999 | $mname = $sref->{STREAMMIX}[$strm][$smix]{MIXPDF}; | ||
3000 | $smix2 = $smix + 1; | ||
3001 | |||
3002 | while( $smix2 < $nsmix ) | ||
3003 | { | ||
3004 | $mname2 = $sref->{STREAMMIX}[$strm][$smix2]{MIXPDF}; | ||
3005 | |||
3006 | # if duplicate mixture components found merge second into first | ||
3007 | if( $mname eq $mname2 ) | ||
3008 | { | ||
3009 | # Update probability | ||
3010 | $sref->{STREAMMIX}[$strm][$smix]{PROB} += | ||
3011 | $sref->{STREAMMIX}[$strm][$smix2]{PROB}; | ||
3012 | |||
3013 | # Remove second mixture component | ||
3014 | splice @{$sref->{STREAMMIX}[$strm]}, $smix2, 1; | ||
3015 | $nsmix--; | ||
3016 | } | ||
3017 | else | ||
3018 | { | ||
3019 | #only increment mixture index if second mix not removed | ||
3020 | $smix2++; | ||
3021 | } | ||
3022 | } | ||
3023 | $smix++; | ||
3024 | } | ||
3025 | |||
3026 | # final number of unique mixture components | ||
3027 | $sref->{NUMMIXES}[$strm] = $nsmix; | ||
3028 | } | ||
3029 | } | ||
3030 | |||
3031 | # clean up - remove base and map hashes and merge information | ||
3032 | undef %base; | ||
3033 | undef %map; | ||
3034 | |||
3035 | while( ($mname, $mbref) = each %{ $this->{MIXPDF} } ) | ||
3036 | { | ||
3037 | delete $mbref->{MERGE} if exists $mbref->{MERGE}; | ||
3038 | } | ||
3039 | |||
3040 | return; | ||
3041 | } | ||
3042 | |||
3043 | |||
3044 | |||
3045 | #================================================================ | ||
3046 | # sharemixmean | ||
3047 | |||
3048 | # Share a single mean vector among multiple Gaussian mixture components. The | ||
3049 | # shrfile contains multiple lines specifying mixture components and the | ||
3050 | # mean vector that they share. The first entry on a line is the number of | ||
3051 | # mixture macros specified on the line which will share the mean vector. This | ||
3052 | # is followed by text strings of the mixture macro names themselves. Following | ||
3053 | # the macro names is the shared mean vector. The first mixture macro | ||
3054 | # encountered will be used to create a unique mean macro using the shared mean | ||
3055 | # vector. Each mixture component will have its mean macro name changed to the | ||
3056 | # newly created mean macro. | ||
3057 | |||
3058 | |||
3059 | #-------------------------------- | ||
3060 | sub sharemixmean | ||
3061 | { | ||
3062 | my( $this, $shrmufile ) = @_; | ||
3063 | my( $firstmix, $mname, $mbname, $mbref, $mref ); | ||
3064 | my( $lin, $nmix, @mixdata, @mixlist, @mlist, %base, %map ); | ||
3065 | my( @mean, @var ); | ||
3066 | my( $uname, $uref, $usize, $uuname ); | ||
3067 | my( $vname, $vref, $vsize ); | ||
3068 | my( $ix, $strm, $nstream, $nsmix, $smix, $smix2 ); | ||
3069 | my( $sname, $sref ); | ||
3070 | my( $uuref, $uniqct ); | ||
3071 | |||
3072 | |||
3073 | # process all lines of merge file | ||
3074 | open( MRG, "$shrmufile" ) or die "Can not open merge file $shrmufile\n"; | ||
3075 | |||
3076 | while( $lin = <MRG> ) | ||
3077 | { | ||
3078 | chomp($lin); | ||
3079 | @mixdata = split( " ", $lin ); | ||
3080 | |||
3081 | # Number of mixtures to merge | ||
3082 | $nmix = shift( @mixdata ); | ||
3083 | |||
3084 | # mixture names | ||
3085 | @mixlist = splice @mixdata, 0, $nmix; | ||
3086 | |||
3087 | # first mix on the line is the base mix to merge into | ||
3088 | $firstmix = 1; | ||
3089 | |||
3090 | SHAREMIX: | ||
3091 | foreach $mname ( @mixlist ) | ||
3092 | { | ||
3093 | if( $firstmix ) | ||
3094 | { | ||
3095 | # initialize base mix | ||
3096 | exists $this->{MIXPDF}{$mname} or die "Mix pdf $mname does not exist\n"; | ||
3097 | $mbref = $this->{MIXPDF}{$mname}; | ||
3098 | |||
3099 | # base mix macro name | ||
3100 | $mbname = $mname; | ||
3101 | |||
3102 | # mixture can not already have been part of another mean share | ||
3103 | die "Mix pdf $mbname already shared\n" if exists $map{$mbname}; | ||
3104 | |||
3105 | # keep track of base mixes, can not merge base mixes with each other | ||
3106 | $base{$mbname} = 1; | ||
3107 | |||
3108 | # initialize merge info if base model has not been used for merging yet | ||
3109 | unless( exists $mbref->{MERGE}{NAME}{$mbname} ) | ||
3110 | { | ||
3111 | $mbref->{MERGE}{NAME}{$mbname} = 1; | ||
3112 | $mbref->{MERGE}{COUNT} = 1; | ||
3113 | } | ||
3114 | |||
3115 | # extract the mean vector from data and put it in new unique mean macro | ||
3116 | $uname = $mbref->{MEAN}; | ||
3117 | $uref = $this->{MEAN}{$uname}; | ||
3118 | $usize = $uref->{SIZE}; | ||
3119 | die "Invalid mean size for share vector for $uname\n" | ||
3120 | if scalar(@mixdata) != $usize; | ||
3121 | |||
3122 | # ensure the shared mean macro name is unique | ||
3123 | $uuname = $uname; | ||
3124 | $uniqct = 0; | ||
3125 | while( exists $this->{MEAN}->{$uuname} ) | ||
3126 | { | ||
3127 | $uniqct++; | ||
3128 | $uuname = "${uname}_S$uniqct"; | ||
3129 | } | ||
3130 | |||
3131 | |||
3132 | $this->{MEAN}{$uuname}{USAGE} = 0; | ||
3133 | |||
3134 | $uuref = $this->{MEAN}{$uuname}; | ||
3135 | |||
3136 | $uuref->{SIZE} = $usize; | ||
3137 | |||
3138 | $uuref->{STREAM} = $uref->{STREAM} | ||
3139 | if exists $uref->{STREAM}; | ||
3140 | |||
3141 | $uuref->{TMFRCLASS} = $uref->{TMFRCLASS} | ||
3142 | if exists $uref->{TMFRCLASS}; | ||
3143 | |||
3144 | $uuref->{VECTOR} = [@mixdata]; | ||
3145 | |||
3146 | # Assign new mean macro to base mixture | ||
3147 | $mbref->{MEAN} = $uuname; | ||
3148 | $map{$mbname} = $uuname; | ||
3149 | |||
3150 | $firstmix = 0; | ||
3151 | } | ||
3152 | else | ||
3153 | { | ||
3154 | # All other mixture components which will share the mean vector | ||
3155 | unless( exists $this->{MIXPDF}{$mname} ) | ||
3156 | { | ||
3157 | print STDERR "Mix pdf $mname does not exist, no mean sharing with $mbname\n"; | ||
3158 | next; | ||
3159 | } | ||
3160 | |||
3161 | if( exists $base{$mname} ) | ||
3162 | { | ||
3163 | die "Attempt to share mean for base mix $mname and base mix $mbname\n"; | ||
3164 | } | ||
3165 | |||
3166 | if( exists $mbref->{MERGE}{NAME}{$mname} ) | ||
3167 | { | ||
3168 | print STDERR "Mix $mname multiple sharing for $mbname, not shared again\n"; | ||
3169 | next; | ||
3170 | } | ||
3171 | |||
3172 | # check for mean compatibility | ||
3173 | $mref = $this->{MIXPDF}{$mname}; | ||
3174 | $uname = $mref->{MEAN}; | ||
3175 | $uref = $this->{MEAN}{$uname}; | ||
3176 | |||
3177 | die "Invalid share stream for $mname and $mbname\n" | ||
3178 | if( exists( $uref->{STREAM} ) && ( $uref->{STREAM} != $uuref->{STREAM} ) ); | ||
3179 | |||
3180 | die "Invalid share class for $mname and $mbname\n" | ||
3181 | if( exists( $uref->{TMFRCLASS} ) && $uref->{TMFRCLASS} != $uuref->{TMFRCLASS} ); | ||
3182 | |||
3183 | die "Invalid share size for $mname and $mbname\n" | ||
3184 | if( $uref->{SIZE} != $uuref->{SIZE} ); | ||
3185 | |||
3186 | # can not already have been part of another merge | ||
3187 | die "Mix pdf $mname already shared\n" if exists $map{$mname}; | ||
3188 | |||
3189 | # add this mix to the mixes shared with base mix | ||
3190 | $mbref->{MERGE}{NAME}{$mname} = ++$mbref->{MERGE}{COUNT}; | ||
3191 | |||
3192 | # change the mixture mean component | ||
3193 | $mref->{MEAN} = $uuname; | ||
3194 | $map{$mname} = $uuname; | ||
3195 | } | ||
3196 | } | ||
3197 | } | ||
3198 | |||
3199 | close( MRG ); | ||
3200 | |||
3201 | |||
3202 | # After all shared vectors are in place, check to see if any resulting | ||
3203 | # mixtures are identical and make hash of identical mixture mappings | ||
3204 | undef %map; | ||
3205 | while( ($mbname, $mbref) = each %{$this->{MIXPDF}} ) | ||
3206 | { | ||
3207 | # continue if no merging associated with this mixture | ||
3208 | next unless exists $mbref->{MERGE}; | ||
3209 | |||
3210 | # Check if mixtures sharing the mean vector are identical | ||
3211 | @mixlist = sort keys %{ $mbref->{MERGE}{NAME} }; | ||
3212 | |||
3213 | $nsmix = scalar( @mixlist ); | ||
3214 | $smix = 0; | ||
3215 | while( $smix < $nsmix - 1 ) | ||
3216 | { | ||
3217 | $mbname = $mixlist[$smix]; | ||
3218 | $mbref = $this->{MIXPDF}{$mbname}; | ||
3219 | |||
3220 | $smix2 = $smix + 1; | ||
3221 | while( $smix2 < $nsmix ) | ||
3222 | { | ||
3223 | $mname = $mixlist[$smix2]; | ||
3224 | |||
3225 | # Mixtures not identical if variance macros differ. | ||
3226 | $mref = $this->{MIXPDF}{$mname}; | ||
3227 | if ( $mbref->{VARIANCE} ne $mref->{VARIANCE} ) | ||
3228 | { | ||
3229 | $smix2++; | ||
3230 | next; | ||
3231 | } | ||
3232 | |||
3233 | # Mixtures not identical if RCLASSES do not match | ||
3234 | if( exists $mbref->{RCLASS} ) | ||
3235 | { | ||
3236 | if( ! exists $mref->{RCLASS} || | ||
3237 | $mbref->{RCLASS} != $mref->{RCLASS} ) | ||
3238 | { | ||
3239 | $smix2++; | ||
3240 | next; | ||
3241 | } | ||
3242 | } | ||
3243 | else | ||
3244 | { | ||
3245 | if( exists $mref->{RCLASS} ) | ||
3246 | { | ||
3247 | $smix2++; | ||
3248 | next; | ||
3249 | } | ||
3250 | } | ||
3251 | |||
3252 | #Mixtures are identical so add to hash for merging | ||
3253 | $map{$mname} = $mbname; | ||
3254 | |||
3255 | # Remove duplicate mixture from list | ||
3256 | # Do not increment $smix2, because @mixlist shifts down by 1 | ||
3257 | @mixlist = splice @mixlist, $smix2, 1; | ||
3258 | $nsmix--; | ||
3259 | |||
3260 | } | ||
3261 | # Go to next mixture | ||
3262 | $smix++; | ||
3263 | } | ||
3264 | |||
3265 | } | ||
3266 | |||
3267 | # Update states to reflect identical mixtures | ||
3268 | $nstream = $this->{STREAMINFO}{SIZE}; | ||
3269 | while( ($sname, $sref) = each %{ $this->{STATE} } ) | ||
3270 | { | ||
3271 | for( $strm = 0; $strm < $nstream; $strm++ ) | ||
3272 | { | ||
3273 | $nsmix = $sref->{NUMMIXES}[$strm]; | ||
3274 | for( $smix = 0; $smix < $nsmix; $smix++ ) | ||
3275 | { | ||
3276 | $mname = $sref->{STREAMMIX}[$strm][$smix]{MIXPDF}; | ||
3277 | |||
3278 | if( exists $map{$mname} ) | ||
3279 | { | ||
3280 | $sref->{STREAMMIX}[$strm][$smix]{MIXPDF} = $map{$mname}; | ||
3281 | } | ||
3282 | } | ||
3283 | } | ||
3284 | } | ||
3285 | |||
3286 | |||
3287 | # Update all state macros to merge any common mixtures | ||
3288 | while( ($sname, $sref) = each %{ $this->{STATE} } ) | ||
3289 | { | ||
3290 | for( $strm = 0; $strm < $nstream; $strm++ ) | ||
3291 | { | ||
3292 | $nsmix = $sref->{NUMMIXES}[$strm]; | ||
3293 | $smix = 0; | ||
3294 | while( $smix < $nsmix - 1 ) | ||
3295 | { | ||
3296 | $mbname = $sref->{STREAMMIX}[$strm][$smix]{MIXPDF}; | ||
3297 | $smix2 = $smix + 1; | ||
3298 | |||
3299 | while( $smix2 < $nsmix ) | ||
3300 | { | ||
3301 | $mname = $sref->{STREAMMIX}[$strm][$smix2]{MIXPDF}; | ||
3302 | |||
3303 | # if duplicate mixture components found merge second into first | ||
3304 | if( $mbname eq $mname ) | ||
3305 | { | ||
3306 | # Update probability | ||
3307 | $sref->{STREAMMIX}[$strm][$smix]{PROB} += | ||
3308 | $sref->{STREAMMIX}[$strm][$smix2]{PROB}; | ||
3309 | |||
3310 | # Remove second mixture component | ||
3311 | splice @{$sref->{STREAMMIX}[$strm]}, $smix2, 1; | ||
3312 | $nsmix--; | ||
3313 | } | ||
3314 | else | ||
3315 | { | ||
3316 | #only increment mixture index if second mix not removed | ||
3317 | $smix2++; | ||
3318 | } | ||
3319 | } | ||
3320 | $smix++; | ||
3321 | } | ||
3322 | |||
3323 | # final number of unique mixture components | ||
3324 | $sref->{NUMMIXES}[$strm] = $nsmix; | ||
3325 | } | ||
3326 | } | ||
3327 | |||
3328 | |||
3329 | # Remove unused mixture components | ||
3330 | foreach $mname ( keys %map ) | ||
3331 | { | ||
3332 | delete $this->{MIXPDF}{$mname}; | ||
3333 | } | ||
3334 | |||
3335 | |||
3336 | |||
3337 | # clean up - remove base and map hashes and merge information | ||
3338 | undef %base; | ||
3339 | undef %map; | ||
3340 | |||
3341 | while( ($mname, $mbref) = each %{ $this->{MIXPDF} } ) | ||
3342 | { | ||
3343 | delete $mbref->{MERGE} if exists $mbref->{MERGE}; | ||
3344 | } | ||
3345 | |||
3346 | return; | ||
3347 | } | ||