index 2d2fd4dcf083b31318f67917ff342e3cc0becdc8..5b98f4b6beb550646a439c2c6726d32cffe8677c 100755 (executable)
fisher_dirs=( $@ )
for f in "$text" "$lexicon"; do
- [ ! -f $x ] && echo "$0: No such file $f" && exit 1;
+ [ ! -f $f ] && echo "$0: No such file $f" && exit 1;
done
loc=`which ngram-count`;
heldout_sent=10000
cut -d' ' -f2- $text | gzip -c > $dir/train.all.gz
-cut -d' ' -f2- $text | tail -n +$heldout_sent | gzip -c > $dir/train.gz
+cut -d' ' -f2- $text | tail -n +$(($heldout_sent+1)) | gzip -c > $dir/train.gz
cut -d' ' -f2- $text | head -n $heldout_sent > $dir/heldout
cut -d' ' -f1 $lexicon > $dir/wordlist