Source code for alex.corpustools.asrscore

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals
if __name__ == '__main__':
    import autopath

import re
import argparse
import sys

from alex.corpustools.wavaskey import load_wavaskey
from alex.components.asr.utterance import Utterance
from alex.utils.text import min_edit_dist, min_edit_ops

[docs]def score_file(reftext, testtext): """ Computes ASR scores between reference and test word strings. :param reftext: :param testtext: :return: a tuple with percentages of correct, substitutions, deletions, insertions, error rate, and a number of reference words. """ ii, dd, ss, nn = 0.0, 0.0, 0.0, 0.0 for utt_idx in sorted(reftext): r = re.sub(ur"\b_\w+_\b",r"",unicode(reftext[utt_idx]).lower(),flags=re.UNICODE).split() t = re.sub(ur"\b_\w+_\b",r"",unicode(testtext[utt_idx]).lower(),flags=re.UNICODE).split() # r = unicode(reftext[utt_idx]).lower().split() # t = unicode(testtext[utt_idx]).lower().split() i, d, s = min_edit_ops(t, r) ii += i dd += d ss += s nn += len(r) # print "Ref:", unicode(r) # print "Tst:", unicode(t) # print i, d, s, len(r) # print ii, dd, ss, nn # print return (nn-ss-dd)/nn*100, ss/nn*100, dd/nn*100, ii/nn*100, (ss+dd+ii)/nn*100, nn
[docs]def score(fn_reftext, fn_testtext, outfile = sys.stdout): reftext = load_wavaskey(fn_reftext, Utterance) testtext = load_wavaskey(fn_testtext, Utterance) corr, sub, dels, ins, wer, nwords = score_file(reftext, testtext) m =""" Please note that the scoring is implicitly ignoring all non-speech events. Ref: {r} Tst: {t} |==============================================================================================| | | # Sentences | # Words | Corr | Sub | Del | Ins | Err | |----------------------------------------------------------------------------------------------| | Sum/Avg |{num_sents:^14}|{num_words:^11.0f}|{corr:^10.2f}|{sub:^10.2f}|{dels:^10.2f}|{ins:^10.2f}|{wer:^10.2f}| |==============================================================================================| """.format(r=fn_reftext, t=fn_testtext, num_sents = len(reftext), num_words = nwords, corr=corr, sub = sub, dels = dels, ins = ins, wer = wer) outfile.write(m) outfile.write("\n")
if __name__ == '__main__': parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=""" Compute ASR scores for ASR output against reference text. The scoring implicitly ignores non-speech events in comparison. The files structures must be as follows: text_name => text_content ---------------------------------------- 0000001.wav => I want Chinese food 0000002.wav => Give me the phone number The text from the test file and the reference file is matched based on the text_name. """) parser.add_argument('refsem', action="store", help='a file with reference semantics') parser.add_argument('testsem', action="store", help='a file with tested semantics') args = parser.parse_args() score(args.refsem, args.testsem)