Source code for alex.components.slu.cued_da

#!/usr/bin/env python
# vim: set fileencoding=utf-8
# This code is PEP8-compliant. See http://www.python.org/dev/peps/pep-0008/.

from __future__ import unicode_literals

from alex.components.slu.da import DialogueAct, DialogueActItem
from alex.components.slu.exceptions import CuedDialogueActError
from alex.corpustools.wavaskey import load_wavaskey
from alex.utils.text import split_by, split_by_comma


[docs]def load_das(das_fname, limit=None, encoding='UTF-8'): return load_wavaskey(das_fname, CUEDDialogueAct, limit, encoding)
[docs]class CUEDSlot(object): def __init__(self, slot_str): self.parse(slot_str) def __unicode__(self): equals_sign = '!=' if self.negated else ('=' if self.negated is False else '') equals_val = ('{eq}"{val}"'.format(eq=equals_sign, val=self.value) if self.value else '') return self.name + equals_val
[docs] def parse(self, slot_str): # Try to find the not-equals relation. eq_idx = slot_str.find('!=') if eq_idx == -1: # Try to find the equals relation. eq_idx = slot_str.find('=') if eq_idx == -1: # If no equality is included, value is empty. self.name = slot_str self.negated = None self.value = '' return else: self.negated = False eq_eidx = eq_idx + 1 else: self.negated = True eq_eidx = eq_idx + 2 self.name = slot_str[:eq_idx] self.value = slot_str[eq_eidx:].strip('"') if self.value == 'value': raise ValueError('FIXME: Ignore slots for which no values were ' 'found in the database.')
[docs]class CUEDDialogueAct(DialogueAct): """CUED-style dialogue act"""
[docs] def parse(self, da_str): # Get the dialogue act type. first_par_idx = da_str.index("(") self.dat = da_str[:first_par_idx] if len(split_by_comma(da_str)) != 1: raise ValueError('Too many (or none -- too few) DAs in CUED DA ' 'representation.') slots_str = da_str[first_par_idx:].lower()[1:-1] if not slots_str: # no slots to process self._dais = list() else: # split slots_str slotstr_list = split_by(slots_str, splitter=',', quotes='"') slots = list() for slot_str in slotstr_list: try: slots.append(CUEDSlot(slot_str)) except ValueError: # Skip slots we cannot parse. pass if self.dat == 'inform': for slot in slots: if slot.negated: self._dais.append(DialogueActItem( 'deny', slot.name, slot.value)) else: self._dais.append(DialogueActItem( 'inform', slot.name, slot.value)) elif self.dat == 'request': for slot in slots: if slot.value: if slot.negated: self._dais.append(DialogueActItem( 'deny', slot.name, slot.value)) else: self._dais.append(DialogueActItem( 'inform', slot.name, slot.value)) else: self._dais.append(DialogueActItem( 'request', slot.name, slot.value)) elif self.dat == 'confirm': for slot in slots: if slot.name == 'name': self._dais.append(DialogueActItem( 'inform', slot.name, slot.value)) else: self._dais.append(DialogueActItem( 'confirm', slot.name, slot.value)) elif self.dat == 'select': # XXX We cannot represent DAIS with multiple slots as of now. # Therefore, the select DAT is split into two DAIs here. self._dais.append(DialogueActItem( 'select', slots[0].name, slots[0].value)) self._dais.append(DialogueActItem( 'select', slots[1].name, slots[1].value)) elif self.dat in ('silence', 'thankyou', 'ack', 'bye', 'hangup', 'repeat', 'help', 'restart', 'null'): self._dais.append(DialogueActItem(self.dat)) elif self.dat in ('hello', 'affirm', 'negate', 'reqalts', 'reqmore'): self._dais.append(DialogueActItem(self.dat)) for slot in self._dais: if slot.negated: self._dais.append(DialogueActItem( 'deny', slot.name, slot.value)) else: self._dais.append(DialogueActItem( 'inform', slot.name, slot.value)) elif self.dat == 'deny': self._dais.append(DialogueActItem( 'deny', slots[0].name, slots[0].value)) for slot in slots[1:]: if slot.negated: self._dais.append(DialogueActItem( 'deny', slot.name, slot.value)) else: self._dais.append(DialogueActItem( 'inform', slot.name, slot.value)) else: raise CuedDialogueActError( 'Unknown CUED DA type "{dat}" when parsing "{da_str}".' .format(dat=self.dat, da_str=da_str)) self._dais_sorted = False