Source code for alex.applications.PublicTransportInfoCS.slu.prepare_hdc_sem_from_trn

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals
if __name__ == '__main__':
    import autopath

from alex.utils.config import as_project_path

from alex.components.asr.utterance import Utterance, UtteranceNBList


[docs]def hdc_slu(fn_input, constructor, fn_output): """ Use for transcription a HDC SLU model. :param fn_model: :param fn_input: :param constructor: :param fn_reference: :return: """ print "="*120 print "HDC SLU: ", fn_input, fn_output print "-"*120 from alex.components.slu.base import CategoryLabelDatabase from alex.applications.PublicTransportInfoCS.preprocessing import PTICSSLUPreprocessing from alex.applications.PublicTransportInfoCS.hdc_slu import PTICSHDCSLU from alex.corpustools.wavaskey import load_wavaskey, save_wavaskey from alex.corpustools.semscore import score cldb = CategoryLabelDatabase('../data/database.py') preprocessing = PTICSSLUPreprocessing(cldb) hdc_slu = PTICSHDCSLU(preprocessing, cfg = {'SLU': {PTICSHDCSLU: {'utt2da': as_project_path("applications/PublicTransportInfoCS/data/utt2da_dict.txt")}}}) test_utterances = load_wavaskey(fn_input, constructor, limit=1000000) parsed_das = {} for utt_key, utt in sorted(test_utterances.iteritems()): if isinstance(utt, Utterance): obs = {'utt': utt} elif isinstance(utt, UtteranceNBList): obs = {'utt_nbl': utt} else: raise BaseException('Unsupported observation type') print '-' * 120 print "Observation:" print utt_key, " ==> " print unicode(utt) da_confnet = hdc_slu.parse(obs, verbose=False) print "Conf net:" print unicode(da_confnet) da_confnet.prune() dah = da_confnet.get_best_da_hyp() print "1 best: " print unicode(dah) parsed_das[utt_key] = dah.da if 'CL_' in str(dah.da): print '*' * 120 print utt print dah.da hdc_slu.parse(obs, verbose=True) save_wavaskey(fn_output, parsed_das, trans = lambda da: '&'.join(sorted(unicode(da).split('&'))))
if __name__ == "__main__": hdc_slu('./all.trn', Utterance, './all.trn.hdc.sem') hdc_slu('./train.trn', Utterance, './train.trn.hdc.sem') hdc_slu('./dev.trn', Utterance, './dev.trn.hdc.sem') hdc_slu('./test.trn', Utterance, './test.trn.hdc.sem') hdc_slu('./uniq.trn', Utterance, './uniq.trn.hdc.sem')