Source code for alex.components.nlg.template

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals

import random
import itertools
import copy
import re

from alex.components.slu.da import DialogueAct
from alex.utils.config import load_as_module
from alex.components.nlg.tectotpl.core.run import Scenario
from alex.components.nlg.exceptions import TemplateNLGException
from alex.components.dm.ontology import Ontology


[docs]class AbstractTemplateNLG(object): """\ Base abstract class for template-filling generators, providing the routines for template loading and selection. The generation (i.e. template filling) is left to the derived classes. It implements numerous backoff strategies: 1) it matches the exactly the input dialogue against the templates 2) if it cannot find exact match, then it tries to find a generic template (slot-independent) 3) if it cannot find a generic template, the it tries to compose the template from templates for individual dialogue act items """ def __init__(self, cfg): """\ Constructor, just save a link to the configuration. """ self.cfg = cfg # this will save the last utterance self.last_utterance = u"" # setup the composing strategy self.compose_utterance = self.compose_utterance_greedy self.compose_greedy_lookahead = 5 if 'NLG' in self.cfg and 'TemplateCompose' in self.cfg['NLG']: compose_setting = \ self.cfg['NLG']['TemplateCompose'].tolower().strip() if compose_setting.startswith('greedy'): self.compose_utterance = self.compose_utterance_greedy self.compose_greedy_lookahead = \ int(re.search(r'\d+', compose_setting).group(0)) elif compose_setting == 'single': self.compose_utterance = self.compose_utterance_single
[docs] def load_templates(self, file_name): """\ Load templates from an external file, which is assumed to be a Python source which defines the variable 'templates' as a dictionary containing stringified dialog acts as keys and (lists of) templates as values. """ try: templates = load_as_module(file_name, force=True).templates # normalize the templates self.templates = {} # generalised templates self.gtemplates = {} for k, v in templates.iteritems(): da = DialogueAct(k) # k.sort() self.templates[unicode(da)] = v self.gtemplates[unicode(self.get_generic_da(da))] = (da, v) except Exception as e: raise TemplateNLGException('No templates loaded from %s -- %s!' % (file_name, e))
[docs] def get_generic_da(self, da): """\ Given a dialogue act and a list of slots and values, substitute the generic values (starting with { and ending with }) with empty string. """ # copy the instance da = copy.deepcopy(da) # find matching slots & values for dai in da: if dai.value and dai.value.startswith('{'): # there is match, make it generic dai.value = "{%s}" % dai.name return da
[docs] def get_generic_da_given_svs(self, da, svs): """\ Given a dialogue act and a list of slots and values, substitute the matching slot and values with empty string. """ # copy the instance da = copy.deepcopy(da) # find matching slots & values for name, value in svs: for dai in da: if dai.name == name and dai.value == value: # there is match, make it generic dai.value = "{%s}" % dai.name return da
[docs] def match_generic_templates(self, da, svs): """\ Find a matching template for a dialogue act using substitutions for slot values. Returns a matching template and a dialogue act where values of some of the slots are substituted with a generic value. """ tpl = None # try to find increasingly generic templates # limit the complexity of the search if len(svs) == 0: rng = [] elif len(svs) == 1: rng = [1] elif len(svs) == 2: rng = [1, 2] else: rng = [1, len(svs) - 1, len(svs)] for r in rng: for cmb in itertools.combinations(svs, r): generic_da = self.get_generic_da_given_svs(da, cmb) try: gda, tpls = self.gtemplates[unicode(generic_da)] tpl = self.random_select(tpls) except KeyError: continue return tpl, gda # I did not find anything raise TemplateNLGException("No match with generic templates.")
[docs] def random_select(self, tpl): """\ Randomly select alternative templates for generation. The selection process is modeled by an embedded list structure (a tree-like structure). In the first level, the algorithm selects one of N. In the second level, for every item it selects one of M, and joins them together. This continues toward the leaves which must be non-list objects. There are the following random selection options (only the first three): (1) { 'hello()' : u"Hello", } This will return the "Hello" string. (2) { 'hello()' : (u"Hello", u"Hi", ), } This will return one of the "Hello" or "Hi" strings. (2) { 'hello()' : ( [ (u"Hello.", u"Hi.", ) (u"How are you doing?", u"Welcome"., ), u"Speak!", ], u"Hi my friend." ), } This will return one of the following strings: "Hello. How are you doing? Speak!" "Hi. How are you doing? Speak!" "Hello. Welcome. Speak!" "Hi. Welcome. Speak!" "Hi my friend." """ if isinstance(tpl, basestring): return tpl elif isinstance(tpl, tuple): tpl_rc_or = random.choice(tpl) if isinstance(tpl_rc_or, basestring): return tpl_rc_or elif isinstance(tpl_rc_or, list): tpl_rc_and = [] for t in tpl_rc_or: tpl_rc_and.append(self.random_select(t)) return u" ".join(tpl_rc_and).replace(u' ', u' ') elif isinstance(tpl_rc_or, tuple): raise TemplateNLGException("Unsupported generation type. " + "At this level, the template" + "cannot be a tuple: template = %s" % unicode(tpl)) elif isinstance(tpl, list): raise TemplateNLGException("Unsupported generation type. " + "At this level, the template cannot " + "be a list: template = %s" % unicode(tpl)) else: raise TemplateNLGException("Unsupported generation type.")
[docs] def match_and_fill_generic(self, da, svs): """\ Match a generic template and fill in the proper values for the slots which were substituted by a generic value. Will return the output text with the proper values filled in if a generic template can be found; will throw a TemplateNLGException otherwise. """ # find a generic template tpls, mda = self.match_generic_templates(da, svs) tpl = self.random_select(tpls) svs_mda = mda.get_slots_and_values() # prepare a list of generic values to be filled in svsx = [] for (slot_orig, val_orig), (_, val_generic) in zip(svs, svs_mda): if val_generic.startswith('{'): svsx.append([val_generic[1:-1], val_orig]) else: svsx.append([slot_orig, val_orig]) # return with generic values filled in return self.fill_in_template(tpl, svsx)
[docs] def generate(self, da): """\ Generate the natural text output for the given dialogue act. First, try to find an exact match with no variables to fill in. Then try to find a relaxed match of a more generic template and fill in the actual values of the variables. """ utterance = '' try: if unicode(da) == 'irepeat()': # just return last utterance utterance = self.last_utterance else: # try to return exact match utterance = self.random_select(self.templates[unicode(da)]) except KeyError: # try to find a relaxed match svs = da.get_slots_and_values() try: utterance = self.match_and_fill_generic(da, svs) except TemplateNLGException: # try to find a template for each dialogue act item and concatenate them try: utterance = self.compose_utterance(da) except TemplateNLGException: # nothing to do, I must backoff utterance = self.backoff(da) if re.match(r'^(inform|i?confirm|request|hello)', unicode(da)): self.last_utterance = utterance return utterance
[docs] def compose_utterance_single(self, da): """\ Compose an utterance from templates for single dialogue act items. Returns the composed utterance. """ composed_utt = [] # try to find a template for each single dialogue act item for dai in da: try: # look for an exact match dai_utt = self.random_select(self.templates[unicode(dai)]) except KeyError: # try to find a relaxed match dax = DialogueAct() dax.append(dai) svsx = dax.get_slots_and_values() try: dai_utt = self.match_and_fill_generic(dax, svsx) except TemplateNLGException: dai_utt = unicode(dai) composed_utt.append(dai_utt) return ' '.join(composed_utt)
[docs] def compose_utterance_greedy(self, da): """\ Compose an utterance from templates by iteratively looking for the longest (up to self.compose_greedy_lookahead) matching sub-utterance at the current position in the DA. Returns the composed utterance. """ composed_utt = [] sub_start = 0 # pass through the dialogue act while sub_start < len(da): dax_utt = None dax_len = None # greedily look for the longest template that will cover the next # dialogue act items (try longer templates first, from maximum # length given in settings down to 1). for sub_len in xrange(self.compose_greedy_lookahead, 0, -1): dax = DialogueAct() dax.extend(da[sub_start:sub_start + sub_len]) try: # try to find an exact match dax_utt = self.random_select(self.templates[unicode(dax)]) dax_len = sub_len break except KeyError: # try to find a relaxed match svsx = dax.get_slots_and_values() try: dax_utt = self.match_and_fill_generic(dax, svsx) dax_len = sub_len break except TemplateNLGException: # nothing found: look for shorter templates continue if dax_utt is None: # dummy backoff dax_utt = unicode(da[sub_start]) dax_len = 1 composed_utt.append(dax_utt) sub_start += dax_len return ' '.join(composed_utt)
[docs] def fill_in_template(self, tpl, svs): """\ Fill in the given slot values of a dialogue act into the given template. This should be implemented in derived classes. """ raise NotImplementedError()
[docs] def backoff(self, da): """\ Provide an alternative NLG template for the dialogue output which is not covered in the templates. This serves as a backoff solution. This should be implemented in derived classes. """ raise NotImplementedError()
[docs]class TemplateNLG(AbstractTemplateNLG): """\ A simple text-replacement template NLG implementation with the ability to resort to a back-off system if no appropriate template is found. """ def __init__(self, cfg): super(TemplateNLG, self).__init__(cfg) # load templates if 'model' in self.cfg['NLG']['Template']: self.load_templates(self.cfg['NLG']['Template']['model']) # load ontology self.ontology = Ontology() if 'ontology' in self.cfg['NLG']['Template']: self.ontology.load(cfg['NLG']['Template']['ontology']) # initialize pre- and post-processing self.preprocessing = None self.postprocessing = None if 'preprocessing_cls' in self.cfg['NLG']['Template']: self.preprocessing = self.cfg['NLG']['Template']['preprocessing_cls'](self.ontology) if 'postprocessing_cls' in self.cfg['NLG']['Template']: self.postprocessing = self.cfg['NLG']['Template']['postprocessing_cls']()
[docs] def fill_in_template(self, tpl, svs): """\ Simple text replacement template filling. Applies template NLG pre- and postprocessing, if applicable. """ svs_dict = dict(svs) if self.preprocessing is not None: tpl, svs_dict = self.preprocessing.preprocess(tpl, svs_dict) out_text = tpl.format(**svs_dict) if self.postprocessing is not None: return self.postprocessing.postprocess(out_text) return out_text
[docs]class TemplateNLGPreprocessing(object): """Base class for template NLG preprocessing, handles preprocessing of the values to be filled into a template. This base class provides no functionality, it just defines an interface for derived language-specific and/or domain-specific classes. """ def __init__(self, ontology): self.ontology = ontology
[docs] def preprocess(self, svs_dict): raise NotImplementedError()
[docs]class TemplateNLGPostprocessing(object): """Base class for template NLG postprocessing, handles postprocessing of the text resulting from filling in a template. This base class provides no functionality, it just defines an interface for derived language-specific and/or domain-specific classes. """ def __init__(self): pass
[docs] def postprocess(self, nlg_text): raise NotImplementedError()
[docs]class TectoTemplateNLG(AbstractTemplateNLG): """\ Template generation using tecto-trees and NLG rules. """ def __init__(self, cfg): """\ Initialization, checking configuration, loading templates and NLG rules. """ super(TectoTemplateNLG, self).__init__(cfg) # check that the configuration contains everything we need if not 'NLG' in self.cfg or not 'TectoTemplate' in self.cfg['NLG']: raise TemplateNLGException('No configuration found!') mycfg = self.cfg['NLG']['TectoTemplate'] if not 'model' in mycfg or not 'scenario' in mycfg or \ not 'data_dir' in mycfg: raise TemplateNLGException('NLG scenario, data directory ' + 'and templates must be defined!') # load templates self.load_templates(mycfg['model']) # load NLG system self.nlg_rules = Scenario(mycfg) self.nlg_rules.load_blocks()
[docs] def fill_in_template(self, tpl, svs): """\ Filling in tecto-templates, i.e. filling-in strings to templates and using rules to generate the result. """ tpl = unicode(tpl) filled_tpl = tpl.format(**dict(svs)) return self.nlg_rules.apply_to(filled_tpl)