summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 4a0106a)
raw | patch | inline | side by side (parent: 4a0106a)
author | pegahgh <pegahgh@gmail.com> | |
Wed, 9 Aug 2017 22:29:37 +0000 (18:29 -0400) | ||
committer | Daniel Povey <dpovey@gmail.com> | |
Wed, 9 Aug 2017 22:29:37 +0000 (15:29 -0700) |
15 files changed:
index a9c3c47ebbc111fdd5d2784fe1810ddc8d6654cc..546f272e821d58793389a99e0617fe304d68f7ef 100644 (file)
#ifndef KALDI_FEAT_FEATURE_COMMON_INL_H_
#define KALDI_FEAT_FEATURE_COMMON_INL_H_
+#include "feat/resample.h"
// Do not include this file directly. It is included by feat/feature-common.h
namespace kaldi {
+template <class F>
+void OfflineFeatureTpl<F>::ComputeFeatures(
+ const VectorBase<BaseFloat> &wave,
+ BaseFloat sample_freq,
+ BaseFloat vtln_warp,
+ Matrix<BaseFloat> *output) {
+ KALDI_ASSERT(output != NULL);
+ BaseFloat new_sample_freq = computer_.GetFrameOptions().samp_freq;
+ if (sample_freq == new_sample_freq)
+ Compute(wave, vtln_warp, output);
+ else {
+ if (new_sample_freq < sample_freq) {
+ if (! computer_.GetFrameOptions().allow_downsample)
+ KALDI_ERR << "Waveform and config sample Frequency mismatch: "
+ << sample_freq << " .vs " << new_sample_freq
+ << " ( use --allow_downsample=true option to allow "
+ << " downsampling the waveform).";
+
+ // Downsample the waveform.
+ Vector<BaseFloat> downsampled_wave(wave);
+ DownsampleWaveForm(sample_freq, wave,
+ new_sample_freq, &downsampled_wave);
+ Compute(downsampled_wave, vtln_warp, output);
+ } else
+ KALDI_ERR << "The waveform is allowed to get downsampled."
+ << "New sample Frequency " << new_sample_freq
+ << " is larger than waveform original sampling frequency "
+ << sample_freq;
+
+ }
+}
+
+template <class F>
+void OfflineFeatureTpl<F>::ComputeFeatures(
+ const VectorBase<BaseFloat> &wave,
+ BaseFloat sample_freq,
+ BaseFloat vtln_warp,
+ Matrix<BaseFloat> *output) const {
+ OfflineFeatureTpl<F> temp(*this);
+ // This const version of ComputeFeatures() is a wrapper that
+ // calls the non-const ComputeFeatures() on a temporary object
+ // that is a copy of *this. It is not as efficient because of the
+ // overhead of copying *this.
+ temp.ComputeFeatures(wave, vtln_warp, output);
+}
+
template <class F>
void OfflineFeatureTpl<F>::Compute(
const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
- Matrix<BaseFloat> *output,
- Vector<BaseFloat> *deprecated_wave_remainder) {
+ Matrix<BaseFloat> *output) {
KALDI_ASSERT(output != NULL);
int32 rows_out = NumFrames(wave.Dim(), computer_.GetFrameOptions()),
cols_out = computer_.Dim();
if (rows_out == 0) {
output->Resize(0, 0);
- if (deprecated_wave_remainder != NULL)
- *deprecated_wave_remainder = wave;
return;
}
output->Resize(rows_out, cols_out);
- if (deprecated_wave_remainder != NULL)
- ExtractWaveformRemainder(wave, computer_.GetFrameOptions(),
- deprecated_wave_remainder);
Vector<BaseFloat> window; // windowed waveform.
bool use_raw_log_energy = computer_.NeedRawLogEnergy();
for (int32 r = 0; r < rows_out; r++) { // r is frame index.
void OfflineFeatureTpl<F>::Compute(
const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
- Matrix<BaseFloat> *output,
- Vector<BaseFloat> *deprecated_wave_remainder) const {
+ Matrix<BaseFloat> *output) const {
OfflineFeatureTpl<F> temp(*this);
// call the non-const version of Compute() on a temporary copy of this object.
// This is a workaround for const-ness that may sometimes be useful in
// multi-threaded code, although it's not optimally efficient.
- temp.Compute(wave, vtln_warp, output, deprecated_wave_remainder);
+ temp.Compute(wave, vtln_warp, output);
}
} // end namespace kaldi
index 21e8ff34ac07c2f5b7a6bd5c1f43a2d13a6f3125..1c83aed8ea924b061775830d553be8f6858c4363 100644 (file)
computer_(opts),
feature_window_function_(computer_.GetFrameOptions()) { }
- // Computes the features for one file (one sequence of features).
- // Use of the 'deprecatd_wave_remainder' argument is highly deprecated; it is
- // only provided for back-compatibility for code that may have
- // relied on the older interface. It's deprecated because it
- // doesn't support the --snip-edges=false option, and because
- // we plan to eventually remove this argument so that there
- // will be only one way to do online feature extraction.
+ // Internal (and back-compatibility) interface for computing features, which
+ // requires that the user has already checked that the sampling frequency
+ // of the waveform is equal to the sampling frequency specified in
+ // the frame-extraction options.
void Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
- Matrix<BaseFloat> *output,
- Vector<BaseFloat> *deprecated_wave_remainder = NULL);
+ Matrix<BaseFloat> *output);
// This const version of Compute() is a wrapper that
// calls the non-const version on a temporary object.
// It's less efficient than the non-const version.
void Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
- Matrix<BaseFloat> *output,
- Vector<BaseFloat> *deprecated_wave_remainder = NULL) const;
+ Matrix<BaseFloat> *output) const;
+
+ /**
+ Computes the features for one file (one sequence of features).
+ This is the newer interface where you specify the sample frequency
+ of the input waveform.
+ @param [in] wave The input waveform
+ @param [in] sample_freq The sampling frequency with which
+ 'wave' was sampled.
+ if sample_freq is higher than the frequency
+ specified in the config, we will downsample
+ the waveform, but if lower, it's an error.
+ @param [in] vtln_warp The VTLN warping factor (will normally
+ be 1.0)
+ @param [out] output The matrix of features, where the row-index
+ is the frame index.
+ */
+ void ComputeFeatures(const VectorBase<BaseFloat> &wave,
+ BaseFloat sample_freq,
+ BaseFloat vtln_warp,
+ Matrix<BaseFloat> *output);
+ /**
+ This const version of ComputeFeatures() is a wrapper that
+ calls the non-const ComputeFeatures() on a temporary object
+ that is a copy of *this. It is not as efficient because of the
+ overhead of copying *this.
+ */
+ void ComputeFeatures(const VectorBase<BaseFloat> &wave,
+ BaseFloat sample_freq,
+ BaseFloat vtln_warp,
+ Matrix<BaseFloat> *output) const;
int32 Dim() const { return computer_.Dim(); }
index e458df00ac03ebe82e5b57c8c2786e4de59c54bd..47b7b1c4244ab6682e6e4bf3b6f94b5736b40f81 100644 (file)
// use default parameters
// compute fbanks.
- fbank.Compute(v, 1.0, &m, NULL);
+ fbank.Compute(v, 1.0, &m);
// possibly dump
// std::cout << "== Output features == \n" << m;
// calculate kaldi features
Matrix<BaseFloat> kaldi_features;
- fbank.Compute(waveform, 1.0, &kaldi_features, NULL);
+ fbank.Compute(waveform, 1.0, &kaldi_features);
std::cout << "<<<=== Compare with HTK features...\n";
// calculate kaldi features
Matrix<BaseFloat> kaldi_features;
- fbank.Compute(waveform, 1.0, &kaldi_features, NULL);
+ fbank.Compute(waveform, 1.0, &kaldi_features);
std::cout << "<<<=== Compare with HTK features...\n";
// calculate kaldi features
Matrix<BaseFloat> kaldi_features;
- fbank.Compute(waveform, vtln_warp, &kaldi_features, NULL);
+ fbank.Compute(waveform, vtln_warp, &kaldi_features);
std::cout << "<<<=== Compare with HTK features...\n";
// calculate kaldi features
Matrix<BaseFloat> kaldi_features;
- fbank.Compute(waveform, vtln_warp, &kaldi_features, NULL);
+ fbank.Compute(waveform, vtln_warp, &kaldi_features);
std::cout << "<<<=== Compare with HTK features...\n";
index bf23c373280d386540f45c89f21000d31c32b214..c436713970753adf4076c3259201d0014ee2a499 100644 (file)
// use default parameters
// compute mfccs.
- mfcc.Compute(v, 1.0, &m, NULL);
+ mfcc.Compute(v, 1.0, &m);
// possibly dump
// std::cout << "== Output features == \n" << m;
// calculate kaldi features
Matrix<BaseFloat> kaldi_raw_features;
- mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+ mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
DeltaFeaturesOptions delta_opts;
Matrix<BaseFloat> kaldi_features;
// calculate kaldi features
Matrix<BaseFloat> kaldi_raw_features;
- mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+ mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
DeltaFeaturesOptions delta_opts;
Matrix<BaseFloat> kaldi_features;
// calculate kaldi features
Matrix<BaseFloat> kaldi_raw_features;
- mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+ mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
DeltaFeaturesOptions delta_opts;
Matrix<BaseFloat> kaldi_features;
// calculate kaldi features
Matrix<BaseFloat> kaldi_raw_features;
- mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+ mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
DeltaFeaturesOptions delta_opts;
Matrix<BaseFloat> kaldi_features;
// calculate kaldi features
Matrix<BaseFloat> kaldi_raw_features;
- mfcc.Compute(waveform, vtln_warp, &kaldi_raw_features, NULL);
+ mfcc.Compute(waveform, vtln_warp, &kaldi_raw_features);
DeltaFeaturesOptions delta_opts;
Matrix<BaseFloat> kaldi_features;
// calculate kaldi features
Matrix<BaseFloat> kaldi_raw_features;
- mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+ mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
DeltaFeaturesOptions delta_opts;
Matrix<BaseFloat> kaldi_features;
index a48c939f6f9aa4e10004f70655ae70b58909c7cc..ad872cffcd0232e822ec826f8df318d54129e230 100644 (file)
// use default parameters
// compute mfccs.
- plp.Compute(v, 1.0, &m, NULL);
+ plp.Compute(v, 1.0, &m);
// possibly dump
// std::cout << "== Output features == \n" << m;
// calculate kaldi features
Matrix<BaseFloat> kaldi_raw_features;
- plp.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+ plp.Compute(waveform, 1.0, &kaldi_raw_features);
DeltaFeaturesOptions delta_opts;
Matrix<BaseFloat> kaldi_features;
index 3ecef148b879344d610aa80ab6afdba22ff1b5f6..4b99c65fef872c2de9339158750673087de956b7 100644 (file)
op.use_energy = false;
Mfcc mfcc(op);
Matrix<BaseFloat> raw_features;
- mfcc.Compute(waveform, 1.0, &raw_features, NULL);
+ mfcc.Compute(waveform, 1.0, &raw_features);
try {
for (int32 window = 1; window < 4; window++) {
index bbb24fd8988dbb9f817999625f30da66813a9b80..88002ae190387fe92c8dc41f6abe9a20b6a6f9f2 100644 (file)
bool round_to_power_of_two;
BaseFloat blackman_coeff;
bool snip_edges;
+ bool allow_downsample;
// May be "hamming", "rectangular", "povey", "hanning", "blackman"
// "povey" is a window I made to be similar to Hamming but to go to zero at the
// edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85)
window_type("povey"),
round_to_power_of_two(true),
blackman_coeff(0.42),
- snip_edges(true){ }
+ snip_edges(true),
+ allow_downsample(false) { }
void Register(OptionsItf *opts) {
opts->Register("sample-frequency", &samp_freq,
"completely fit in the file, and the number of frames depends on the "
"frame-length. If false, the number of frames depends only on the "
"frame-shift, and we reflect the data at the ends.");
+ opts->Register("allow-downsample", &allow_downsample,
+ "If true, allow the input waveform to have a higher frequency than"
+ "the specified --sample-frequency (and we'll downsample).");
}
int32 WindowShift() const {
return static_cast<int32>(samp_freq * 0.001 * frame_shift_ms);
index aea89d0099d27e819c18808140e851577583074a..e3a1d5f99f3425fb59fde56262395fcc31925ee3 100644 (file)
// compute mfcc offline
Matrix<BaseFloat> mfcc_feats;
- mfcc.Compute(waveform, 1.0, &mfcc_feats, NULL); // vtln not supported
+ mfcc.Compute(waveform, 1.0, &mfcc_feats); // vtln not supported
// compare
// The test waveform is about 1.44s long, so
// compute plp offline
Matrix<BaseFloat> plp_feats;
- plp.Compute(waveform, 1.0, &plp_feats, NULL); // vtln not supported
+ plp.Compute(waveform, 1.0, &plp_feats); // vtln not supported
// compare
// The test waveform is about 1.44s long, so
// compute mfcc offline
Matrix<BaseFloat> mfcc_feats;
- mfcc.Compute(waveform, 1.0, &mfcc_feats, NULL); // vtln not supported
+ mfcc.Compute(waveform, 1.0, &mfcc_feats); // vtln not supported
// the parametrization object for 2nd stream plp feature
PlpOptions plp_op;
// compute plp offline
Matrix<BaseFloat> plp_feats;
- plp.Compute(waveform, 1.0, &plp_feats, NULL); // vtln not supported
+ plp.Compute(waveform, 1.0, &plp_feats); // vtln not supported
// compare
// The test waveform is about 1.44s long, so
diff --git a/src/feat/resample.cc b/src/feat/resample.cc
index bf14f8e4654ecfe50bf7af5172fc9ac8f76c5f8e..518685d85c8046e608fbca780b54c1a04e9a58a9 100644 (file)
--- a/src/feat/resample.cc
+++ b/src/feat/resample.cc
int32 input_dim = input.Dim();
int64 tot_input_samp = input_sample_offset_ + input_dim,
tot_output_samp = GetNumOutputSamples(tot_input_samp, flush);
-
+
KALDI_ASSERT(tot_output_samp >= output_sample_offset_);
output->Resize(tot_output_samp - output_sample_offset_);
return filter * window;
}
-
+void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
+ BaseFloat new_freq, Vector<BaseFloat> *new_wave) {
+ KALDI_ASSERT(new_freq < orig_freq);
+ BaseFloat lowpass_cutoff = 0.99 * 0.5 * new_freq;
+ int32 lowpass_filter_width = 6;
+ LinearResample signal_downsampler(orig_freq, new_freq,
+ lowpass_cutoff, lowpass_filter_width);
+ signal_downsampler.Resample(wave, true, new_wave);
+}
} // namespace kaldi
diff --git a/src/feat/resample.h b/src/feat/resample.h
index 4c1f4f7cae97c42e04935551fe395d236ddd17c7..cc3e5064863c3a482d96da53c88bf354fc1f1a1b 100644 (file)
--- a/src/feat/resample.h
+++ b/src/feat/resample.h
limited.] In general we want to bandlimit to lower than S/2, because
we don't have a perfect filter and also because if we want to resample
at a lower frequency than S, we need to bandlimit to below half of that.
- Anyway, suppose we want to bandlimit to C, with 0 < C < C/2. The perfect
+ Anyway, suppose we want to bandlimit to C, with 0 < C < S/2. The perfect
rectangular filter with cutoff C is the sinc function,
\f[ f(t) = 2C sinc(2Ct), \f]
where sinc is the normalized sinc function \f$ sinc(t) = sin(pi t) / (pi t) \f$, with
///< previously seen input signal.
};
-
+/// Downsample a waveform. This is a convenience wrapper for the
+/// class 'LinearResample'.
+/// The low-pass filter cutoff used in 'LinearResample' is 0.99 of half of the
+/// new_freq and num_zeros is 6.
+/// The downsampling results is also checked wit sox resampling toolkit.
+/// Sox design is inspired by Laurent De Soras' paper,
+/// https://ccrma.stanford.edu/~jos/resample/Implementation.html
+/// It designs low pass filter using pass-band, stop-band, Nyquist freq
+/// and stop-band attenuation.
+/// e.g. The mainlob for Hanning window is 4pi/M, where the main-lobe width is
+/// equal to (pass-band-freq - stop-band-freq).
+/// Also the cutoff frequency is equal to (pass-band-freq - stop-band-freq).
+void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
+ BaseFloat new_freq, Vector<BaseFloat> *new_wave);
/// @} End of "addtogroup feat"
} // namespace kaldi
index a339d538c4df207abea84a50ae4c4292c9804a5d..41df621d62d50439e95c63d483635a5ba6ea34b8 100644 (file)
"needed if the vtln-map option is used.");
RandomAccessBaseFloatReaderMapped vtln_map_reader(vtln_map_rspecifier,
utt2spk_rspecifier);
-
+
if (output_format == "kaldi") {
if (!kaldi_writer.Open(output_wspecifier))
KALDI_ERR << "Could not initialize output with wspecifier "
} else {
vtln_warp_local = vtln_warp;
}
- if (fbank_opts.frame_opts.samp_freq != wave_data.SampFreq())
- KALDI_ERR << "Sample frequency mismatch: you specified "
- << fbank_opts.frame_opts.samp_freq << " but data has "
- << wave_data.SampFreq() << " (use --sample-frequency "
- << "option). Utterance is " << utt;
SubVector<BaseFloat> waveform(wave_data.Data(), this_chan);
Matrix<BaseFloat> features;
try {
- fbank.Compute(waveform, vtln_warp_local, &features, NULL);
+ fbank.ComputeFeatures(waveform, wave_data.SampFreq(), vtln_warp_local, &features);
} catch (...) {
KALDI_WARN << "Failed to compute features for utterance "
<< utt;
index bdb353f1e4addb6fa5d673fcdbb1660165590e76..09efcd38dd075d581fc13d941e484c0d0c3239e0 100644 (file)
} else {
vtln_warp_local = vtln_warp;
}
- if (mfcc_opts.frame_opts.samp_freq != wave_data.SampFreq())
- KALDI_ERR << "Sample frequency mismatch: you specified "
- << mfcc_opts.frame_opts.samp_freq << " but data has "
- << wave_data.SampFreq() << " (use --sample-frequency "
- << "option). Utterance is " << utt;
SubVector<BaseFloat> waveform(wave_data.Data(), this_chan);
Matrix<BaseFloat> features;
try {
- mfcc.Compute(waveform, vtln_warp_local, &features, NULL);
+ mfcc.ComputeFeatures(waveform, wave_data.SampFreq(), vtln_warp_local, &features);
} catch (...) {
KALDI_WARN << "Failed to compute features for utterance "
<< utt;
index 4da85f4ec70b984ae4501026d55efc14949b477c..3e9fe9d742395c6f85323b88a24f41a881691cac 100644 (file)
} else {
vtln_warp_local = vtln_warp;
}
- if (plp_opts.frame_opts.samp_freq != wave_data.SampFreq())
- KALDI_ERR << "Sample frequency mismatch: you specified "
- << plp_opts.frame_opts.samp_freq << " but data has "
- << wave_data.SampFreq() << " (use --sample-frequency "
- << "option). Utterance is " << utt;
SubVector<BaseFloat> waveform(wave_data.Data(), this_chan);
Matrix<BaseFloat> features;
try {
- plp.Compute(waveform, vtln_warp_local, &features, NULL);
+ plp.ComputeFeatures(waveform, wave_data.SampFreq(), vtln_warp_local, &features);
} catch (...) {
KALDI_WARN << "Failed to compute features for utterance "
<< utt;
index 3a74eb94b2f9f98ac30ec215f0dcfb4fb137c39f..3b40a6fa5c7af2a28a9116d542bcdf67c3b8fadf 100644 (file)
}
}
- if (spec_opts.frame_opts.samp_freq != wave_data.SampFreq())
- KALDI_ERR << "Sample frequency mismatch: you specified "
- << spec_opts.frame_opts.samp_freq << " but data has "
- << wave_data.SampFreq() << " (use --sample-frequency "
- << "option). Utterance is " << utt;
-
SubVector<BaseFloat> waveform(wave_data.Data(), this_chan);
Matrix<BaseFloat> features;
try {
- spec.Compute(waveform, 1.0, &features, NULL);
+ spec.ComputeFeatures(waveform, wave_data.SampFreq(), 1.0, &features);
} catch (...) {
KALDI_WARN << "Failed to compute features for utterance "
<< utt;
index 591d554e88e525661f57cdca962ef785eba9005f..b730a373ac0cfb107e33372d2ecf311a678e6a13 100644 (file)
const int32 frame_size, const int32 frame_shift);
virtual int32 Dim() const { return extractor_->Dim(); }
-
+
virtual bool Compute(Matrix<BaseFloat> *output);
private:
const int32 frame_size_;
const int32 frame_shift_;
Vector<BaseFloat> wave_; // the samples to be passed for extraction
- Vector<BaseFloat> wave_remainder_; // the samples remained from the previous
- // feature batch
KALDI_DISALLOW_COPY_AND_ASSIGN(OnlineFeInput);
};
Vector<BaseFloat> read_samples(samples_req);
bool ans = source_->Read(&read_samples);
-
- Vector<BaseFloat> all_samples(wave_remainder_.Dim() + read_samples.Dim());
- all_samples.Range(0, wave_remainder_.Dim()).CopyFromVec(wave_remainder_);
- all_samples.Range(wave_remainder_.Dim(), read_samples.Dim()).
- CopyFromVec(read_samples);
-
+
// Extract the features
- if (all_samples.Dim() >= frame_size_) {
- extractor_->Compute(all_samples, 1.0, output, &wave_remainder_);
+ if (read_samples.Dim() >= frame_size_) {
+ extractor_->Compute(read_samples, 1.0, output);
} else {
output->Resize(0, 0);
- wave_remainder_ = all_samples;
}
-
+
return ans;
}