egs/wsj/s5/steps/libs/nnet3/report/log_parse.py

   1
   2
   3 # Copyright 2016    Vijayaditya Peddinti
   4 #                   Vimal Manohar
   5 # Apache 2.0.
   6
   7 from __future__ import division
   8 import datetime
   9 import logging
  10 import re
  11
  12 import libs.common as common_lib
  13
  14 logger = logging.getLogger(__name__)
  15 logger.addHandler(logging.NullHandler())
  16
  17
  18 def parse_progress_logs_for_nonlinearity_stats(exp_dir):
  19     """ Parse progress logs for mean and std stats for non-linearities.
  20
  21     e.g. for a line that is parsed from progress.*.log:
  22     exp/nnet3/lstm_self_repair_ld5_sp/log/progress.9.log:component name=Lstm3_i
  23     type=SigmoidComponent, dim=1280, self-repair-scale=1e-05, count=1.96e+05,
  24     value-avg=[percentiles(0,1,2,5 10,20,50,80,90
  25     95,98,99,100)=(0.05,0.09,0.11,0.15 0.19,0.27,0.50,0.72,0.83
  26     0.88,0.92,0.94,0.99), mean=0.502, stddev=0.23],
  27     deriv-avg=[percentiles(0,1,2,5 10,20,50,80,90
  28     95,98,99,100)=(0.009,0.04,0.05,0.06 0.08,0.10,0.14,0.17,0.18
  29     0.19,0.20,0.20,0.21), mean=0.134, stddev=0.0397]
  30     """
  31
  32     progress_log_files = "%s/log/progress.*.log" % (exp_dir)
  33     stats_per_component_per_iter = {}
  34
  35     progress_log_lines = common_lib.run_kaldi_command(
  36         'grep -e "value-avg.*deriv-avg" {0}'.format(progress_log_files))[0]
  37
  38     parse_regex = re.compile(
  39         ".*progress.([0-9]+).log:component name=(.+) "
  40         "type=(.*)Component,.*"
  41         "value-avg=\[.*mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\].*"
  42         "deriv-avg=\[.*mean=([0-9\.\-e]+), stddev=([0-9\.e\-]+)\]")
  43
  44     for line in progress_log_lines.split("\n"):
  45         mat_obj = parse_regex.search(line)
  46         if mat_obj is None:
  47             continue
  48         # groups = ('9', 'Lstm3_i', 'Sigmoid', '0.502', '0.23',
  49         # '0.134', '0.0397')
  50         groups = mat_obj.groups()
  51         iteration = int(groups[0])
  52         component_name = groups[1]
  53         component_type = groups[2]
  54         value_mean = float(groups[3])
  55         value_stddev = float(groups[4])
  56         deriv_mean = float(groups[5])
  57         deriv_stddev = float(groups[6])
  58         try:
  59             stats_per_component_per_iter[component_name][
  60                 'stats'][iteration] = [value_mean, value_stddev,
  61                                        deriv_mean, deriv_stddev]
  62         except KeyError:
  63             stats_per_component_per_iter[component_name] = {}
  64             stats_per_component_per_iter[component_name][
  65                 'type'] = component_type
  66             stats_per_component_per_iter[component_name]['stats'] = {}
  67             stats_per_component_per_iter[component_name][
  68                 'stats'][iteration] = [value_mean, value_stddev,
  69                                        deriv_mean, deriv_stddev]
  70
  71     return stats_per_component_per_iter
  72
  73
  74 def parse_difference_string(string):
  75     dict = {}
  76     for parts in string.split():
  77         sub_parts = parts.split(":")
  78         dict[sub_parts[0]] = float(sub_parts[1])
  79     return dict
  80
  81
  82 class MalformedClippedProportionLineException(Exception):
  83     def __init__(self, line):
  84         Exception.__init__(self,
  85                            "Malformed line encountered while trying to "
  86                            "extract clipped-proportions.\n{0}".format(line))
  87
  88
  89 def parse_progress_logs_for_clipped_proportion(exp_dir):
  90     """ Parse progress logs for clipped proportion stats.
  91
  92     e.g. for a line that is parsed from progress.*.log:
  93     exp/chain/cwrnn_trial2_ld5_sp/log/progress.245.log:component
  94     name=BLstm1_forward_c type=ClipGradientComponent, dim=512,
  95     norm-based-clipping=true, clipping-threshold=30,
  96     clipped-proportion=0.000565527,
  97     self-repair-clipped-proportion-threshold=0.01, self-repair-target=0,
  98     self-repair-scale=1
  99     """
 100
 101     progress_log_files = "%s/log/progress.*.log" % (exp_dir)
 102     component_names = set([])
 103     progress_log_lines = common_lib.run_kaldi_command(
 104         'grep -e "{0}" {1}'.format(
 105             "clipped-proportion", progress_log_files))[0]
 106     parse_regex = re.compile(".*progress\.([0-9]+)\.log:component "
 107                              "name=(.*) type=.* "
 108                              "clipped-proportion=([0-9\.e\-]+)")
 109
 110     cp_per_component_per_iter = {}
 111
 112     max_iteration = 0
 113     component_names = set([])
 114     for line in progress_log_lines.split("\n"):
 115         mat_obj = parse_regex.search(line)
 116         if mat_obj is None:
 117             if line.strip() == "":
 118                 continue
 119             raise MalformedClippedProportionLineException(line)
 120         groups = mat_obj.groups()
 121         iteration = int(groups[0])
 122         max_iteration = max(max_iteration, iteration)
 123         name = groups[1]
 124         clipped_proportion = float(groups[2])
 125         if clipped_proportion > 1:
 126             raise MalformedClippedProportionLineException(line)
 127         if iteration not in cp_per_component_per_iter:
 128             cp_per_component_per_iter[iteration] = {}
 129         cp_per_component_per_iter[iteration][name] = clipped_proportion
 130         component_names.add(name)
 131     component_names = list(component_names)
 132     component_names.sort()
 133
 134     # re arranging the data into an array
 135     # and into an cp_per_iter_per_component
 136     cp_per_iter_per_component = {}
 137     for component_name in component_names:
 138         cp_per_iter_per_component[component_name] = []
 139     data = []
 140     data.append(["iteration"]+component_names)
 141     for iter in range(max_iteration+1):
 142         if iter not in cp_per_component_per_iter:
 143             continue
 144         comp_dict = cp_per_component_per_iter[iter]
 145         row = [iter]
 146         for component in component_names:
 147             try:
 148                 row.append(comp_dict[component])
 149                 cp_per_iter_per_component[component].append(
 150                     [iter, comp_dict[component]])
 151             except KeyError:
 152                 # if clipped proportion is not available for a particular
 153                 # component it is set to None
 154                 # this usually happens during layer-wise discriminative
 155                 # training
 156                 row.append(None)
 157         data.append(row)
 158
 159     return {'table': data,
 160             'cp_per_component_per_iter': cp_per_component_per_iter,
 161             'cp_per_iter_per_component': cp_per_iter_per_component}
 162
 163
 164 def parse_progress_logs_for_param_diff(exp_dir, pattern):
 165     """ Parse progress logs for per-component parameter differences.
 166
 167     e.g. for a line that is parsed from progress.*.log:
 168     exp/chain/cwrnn_trial2_ld5_sp/log/progress.245.log:LOG
 169     (nnet3-show-progress:main():nnet3-show-progress.cc:144) Relative parameter
 170     differences per layer are [ Cwrnn1_T3_W_r:0.0171537
 171     Cwrnn1_T3_W_x:1.33338e-07 Cwrnn1_T2_W_r:0.048075 Cwrnn1_T2_W_x:1.34088e-07
 172     Cwrnn1_T1_W_r:0.0157277 Cwrnn1_T1_W_x:0.0212704 Final_affine:0.0321521
 173     Cwrnn2_T3_W_r:0.0212082 Cwrnn2_T3_W_x:1.33691e-07 Cwrnn2_T2_W_r:0.0212978
 174     Cwrnn2_T2_W_x:1.33401e-07 Cwrnn2_T1_W_r:0.014976 Cwrnn2_T1_W_x:0.0233588
 175     Cwrnn3_T3_W_r:0.0237165 Cwrnn3_T3_W_x:1.33184e-07 Cwrnn3_T2_W_r:0.0239754
 176     Cwrnn3_T2_W_x:1.3296e-07 Cwrnn3_T1_W_r:0.0194809 Cwrnn3_T1_W_x:0.0271934 ]
 177     """
 178
 179     if pattern not in set(["Relative parameter differences",
 180                            "Parameter differences"]):
 181         raise Exception("Unknown value for pattern : {0}".format(pattern))
 182
 183     progress_log_files = "%s/log/progress.*.log" % (exp_dir)
 184     progress_per_iter = {}
 185     component_names = set([])
 186     progress_log_lines = common_lib.run_kaldi_command(
 187         'grep -e "{0}" {1}'.format(pattern, progress_log_files))[0]
 188     parse_regex = re.compile(".*progress\.([0-9]+)\.log:"
 189                              "LOG.*{0}.*\[(.*)\]".format(pattern))
 190     for line in progress_log_lines.split("\n"):
 191         mat_obj = parse_regex.search(line)
 192         if mat_obj is None:
 193             continue
 194         groups = mat_obj.groups()
 195         iteration = groups[0]
 196         differences = parse_difference_string(groups[1])
 197         component_names = component_names.union(differences.keys())
 198         progress_per_iter[int(iteration)] = differences
 199
 200     component_names = list(component_names)
 201     component_names.sort()
 202     # rearranging the parameter differences available per iter
 203     # into parameter differences per component
 204     progress_per_component = {}
 205     for cn in component_names:
 206         progress_per_component[cn] = {}
 207
 208     max_iter = max(progress_per_iter.keys())
 209     total_missing_iterations = 0
 210     gave_user_warning = False
 211     for iter in range(max_iter + 1):
 212         try:
 213             component_dict = progress_per_iter[iter]
 214         except KeyError:
 215             continue
 216
 217         for component_name in component_names:
 218             try:
 219                 progress_per_component[component_name][iter] = component_dict[
 220                     component_name]
 221             except KeyError:
 222                 total_missing_iterations += 1
 223                 # the component was not found this iteration, may be because of
 224                 # layerwise discriminative training
 225                 pass
 226         if (total_missing_iterations/len(component_names) > 20
 227                 and not gave_user_warning and logger is not None):
 228             logger.warning("There are more than {0} missing iterations per "
 229                            "component. Something might be wrong.".format(
 230                                 total_missing_iterations/len(component_names)))
 231             gave_user_warning = True
 232
 233     return {'progress_per_component': progress_per_component,
 234             'component_names': component_names,
 235             'max_iter': max_iter}
 236
 237
 238 def parse_train_logs(exp_dir):
 239     train_log_files = "%s/log/train.*.log" % (exp_dir)
 240     train_log_lines = common_lib.run_kaldi_command(
 241         'grep -e Accounting {0}'.format(train_log_files))[0]
 242     parse_regex = re.compile(".*train\.([0-9]+)\.([0-9]+)\.log:# "
 243                              "Accounting: time=([0-9]+) thread.*")
 244
 245     train_times = {}
 246     for line in train_log_lines.split('\n'):
 247         mat_obj = parse_regex.search(line)
 248         if mat_obj is not None:
 249             groups = mat_obj.groups()
 250             try:
 251                 train_times[int(groups[0])][int(groups[1])] = float(groups[2])
 252             except KeyError:
 253                 train_times[int(groups[0])] = {}
 254                 train_times[int(groups[0])][int(groups[1])] = float(groups[2])
 255     iters = train_times.keys()
 256     for iter in iters:
 257         values = train_times[iter].values()
 258         train_times[iter] = max(values)
 259     return train_times
 260
 261
 262 def parse_prob_logs(exp_dir, key='accuracy', output="output"):
 263     train_prob_files = "%s/log/compute_prob_train.*.log" % (exp_dir)
 264     valid_prob_files = "%s/log/compute_prob_valid.*.log" % (exp_dir)
 265     train_prob_strings = common_lib.run_kaldi_command(
 266         'grep -e {0} {1}'.format(key, train_prob_files), wait=True)[0]
 267     valid_prob_strings = common_lib.run_kaldi_command(
 268         'grep -e {0} {1}'.format(key, valid_prob_files))[0]
 269
 270     # LOG
 271     # (nnet3-chain-compute-prob:PrintTotalStats():nnet-chain-diagnostics.cc:149)
 272     # Overall log-probability for 'output' is -0.399395 + -0.013437 = -0.412832
 273     # per frame, over 20000 fra
 274
 275     # LOG
 276     # (nnet3-chain-compute-prob:PrintTotalStats():nnet-chain-diagnostics.cc:144)
 277     # Overall log-probability for 'output' is -0.307255 per frame, over 20000
 278     # frames.
 279
 280     parse_regex = re.compile(
 281         ".*compute_prob_.*\.([0-9]+).log:LOG "
 282         ".nnet3.*compute-prob:PrintTotalStats..:"
 283         "nnet.*diagnostics.cc:[0-9]+. Overall ([a-zA-Z\-]+) for "
 284         "'{output}'.*is ([0-9.\-e]+) .*per frame".format(output=output))
 285
 286     train_loss = {}
 287     valid_loss = {}
 288
 289     for line in train_prob_strings.split('\n'):
 290         mat_obj = parse_regex.search(line)
 291         if mat_obj is not None:
 292             groups = mat_obj.groups()
 293             if groups[1] == key:
 294                 train_loss[int(groups[0])] = groups[2]
 295     for line in valid_prob_strings.split('\n'):
 296         mat_obj = parse_regex.search(line)
 297         if mat_obj is not None:
 298             groups = mat_obj.groups()
 299             if groups[1] == key:
 300                 valid_loss[int(groups[0])] = groups[2]
 301     iters = list(set(valid_loss.keys()).intersection(train_loss.keys()))
 302     iters.sort()
 303     return map(lambda x: (int(x), float(train_loss[x]),
 304                           float(valid_loss[x])), iters)
 305
 306
 307 def generate_accuracy_report(exp_dir, key="accuracy", output="output"):
 308     times = parse_train_logs(exp_dir)
 309     data = parse_prob_logs(exp_dir, key, output)
 310     report = []
 311     report.append("%Iter\tduration\ttrain_loss\tvalid_loss\tdifference")
 312     for x in data:
 313         try:
 314             report.append("%d\t%s\t%g\t%g\t%g" % (x[0], str(times[x[0]]),
 315                                                   x[1], x[2], x[2]-x[1]))
 316         except KeyError:
 317             continue
 318
 319     total_time = 0
 320     for iter in times.keys():
 321         total_time += times[iter]
 322     report.append("Total training time is {0}\n".format(
 323                     str(datetime.timedelta(seconds=total_time))))
 324     return ["\n".join(report), times, data]