Source code for alex.components.dm.dddstate

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import unicode_literals
from collections import defaultdict
from copy import deepcopy

from alex.components.dm.base import DiscreteValue, DialogueState
from alex.components.dm.exceptions import DeterministicDiscriminativeDialogueStateException
from alex.components.slu.da import DialogueAct, DialogueActItem, DialogueActConfusionNetwork


[docs]class D3DiscreteValue(DiscreteValue):
    """This is a simple implementation of a probabilistic slot. It serves for the case of simple MDP approach or
    UFAL DSTC 1.0-like dialogue state deterministic update.
    """

    def __init__(self, values={}, name="", desc=""):
        self.name = name
        self.desc = desc

        if values:
            self.values = defaultdict(float, values)
        else:
            self.values = defaultdict(float, {'none': 1.0, })

    def __str__(self):
        return unicode(self).encode('ascii', 'replace')

    def __repr__(self):
        return repr(self.values)

    def __unicode__(self):
        return unicode(self.items())

    def __getitem__(self, value):
        return self.values[value]

[docs]    def get(self, value, default_prob):
        return self.values.get(value, default_prob)

    def __iter__(self):
        return self.values.__iter__()

[docs]    def items(self):
        return sorted(self.values.items(), key=lambda x: x[1], reverse=True)

[docs]    def reset(self):
        self.values = defaultdict(float, {'none': 1.0, })

[docs]    def set(self, value, prob=None):
        """This function sets a probability of a specific value.

        *WARNING* This can lead to un-normalised probabilities.
        """
        if isinstance(value, dict) and not prob:
            # rewrite the complete set of values
            self.values = defaultdict(float, value)
        elif isinstance(value, basestring) and isinstance(prob, float):
            self.values[value] = prob
        else:
            raise DeterministicDiscriminativeDialogueStateException('Unsupported D3DiscreteValue set value.')

[docs]    def normalise(self):
        """This function normalises the sum of all probabilities to 1.0"""

        s = sum([v for v in self.values.itervalues()])
        if s < 1e-9:
            # this is a backup solution with unknown consequences
            n = len(self.values)
            for value in self.values:
                self.values[value] = 1.0 / n
        else:
            for value in self.values:
                self.values[value] /= s

[docs]    def scale(self, weight):
        """This function scales each probability by the weigh.t"""

        for value in self.values:
            self.values[value] *= weight

[docs]    def add(self, value, prob):
        """This function adds probability to the given value."""

        self.values[value] += prob

[docs]    def distribute(self, value, dist_prob):
        """This function distributes a portion of probability mass assigned to the ``value`` to other values
         with a weight ``prob``."""

        value_prob = self.values[value]
        non_value_prob = sum([p for v, p in self.values.iteritems() if v != value])

        # first deny the value proportionally to the denied probability
        self.set(value, (1.0 - dist_prob) * value_prob)

        # second redistribute the denied probability mass to to other values proportionally to their own probability
        # if all other values have probability close to zero, then distribute the probability mass uniformly
        for v in self.values:
            if v != value:
                if non_value_prob > 1e-9:
                    self.add(v, dist_prob * value_prob * self.values[v] / non_value_prob)
                else:
                    self.add(v, dist_prob * value_prob * 1.0 / (len(self.values) - 1))

[docs]    def mph(self):
        """The function returns the most probable value and its probability
        in a tuple.
        """

        max_prob = -1.0
        max_value = None
        for value, prob in self.values.iteritems():
            if prob > max_prob or \
               prob == max_prob and (max_value == 'none' or max_value is None):

                max_prob = prob
                max_value = value

        return (max_prob, max_value)

[docs]    def tmphs(self):
        """This function returns two most probable values and their probabilities. If there are
        multiple values with the same probability, it prefers non-'none' values.

        The function returns a tuple consisting of two tuples (probability, value).

        :rtype: tuple
        """

        max_prob1 = -1.0
        max_value1 = None
        max_prob2 = -1.0
        max_value2 = None

        for value, prob in self.values.iteritems():
            if prob > max_prob1:
                max_prob2, max_prob1 = max_prob1, prob
                max_value2, max_value1 = max_value1, value
            elif prob > max_prob2:
                max_prob2 = prob
                max_value2 = value

        return ((max_prob1, max_value1), (max_prob2, max_value2))

[docs]    def test(self, test_value=None, test_prob=None, neg_val=False, neg_prob=False):
        """ Test the most probable value of the slot whether:

        1. the most probable value is equal to test_value and
        2. its probability is larger the test_prob

        Each of the above tests can be negated when neg_* is set True.

        :param test_value:
        :param test_prob:
        :param neg_val:
        :param neg_prob:
        :return:
        """
        prob, value = self.mph()

        if not neg_val:
            if test_value and value != test_value:
                return False
        else:
            if test_value and value == test_value:
                return False

        if not neg_prob:
            if test_prob and prob < test_prob:
                return False
        else:
            if test_prob and prob >= test_prob:
                return False

        return True

[docs]    def explain(self, full=False, linear_prob=True):
        """This function prints the values and their probabilities for this node.
        """
        pass


[docs]class DeterministicDiscriminativeDialogueState(DialogueState):
    """This is a trivial implementation of a dialogue state and its update.

    It uses only the best dialogue act from the input.
    Based on this it updates its state.
    """
    slots = None

    def __init__(self, cfg, ontology):
        super(DeterministicDiscriminativeDialogueState, self).__init__(cfg, ontology)

        self.turns = []
        self.turn_number = 0
        self.debug = cfg.getpath('DM/basic/debug', False)
        self.type = cfg['DM']['DeterministicDiscriminativeDialogueState']['type']
        self.session_logger = cfg['Logging']['session_logger']
        self.system_logger = cfg['Logging']['system_logger']
        self.restart()

    def __unicode__(self):
        """Get the content of the dialogue state in a human readable form."""
        s = []
        s.append("D3State - Dialogue state content:")
        s.append("")
        s.append("{slot:20} = {value}".format(slot="ludait", value=unicode(self.slots["ludait"])))

        for name in [sl for sl in sorted(self.slots) if not sl.startswith('ch_') and
                not sl.startswith('sh_') and not sl.startswith('rh_') and not sl.startswith('lta_') and
                not sl.startswith("ludait") and isinstance(self.slots[sl], D3DiscreteValue)]:
            s.append("{slot:20} = {value}".format(slot=name, value=unicode(self.slots[name])))
        s.append("")

        for prefix in ['lta_', 'rh_', 'ch_', 'sh_']:
            for name in [sl for sl in sorted(self.slots) if sl.startswith(prefix)]:
                s.append("{slot:20} = {value}".format(slot=name, value=unicode(self.slots[name])))
            s.append("")

        for name in [sl for sl in sorted(self.slots) if not isinstance(self.slots[sl], D3DiscreteValue)]:
            s.append("{slot:20} = {value}".format(slot=name, value=unicode(self.slots[name])))

        s.append("")

        return '\n'.join(s)

    def __getitem__(self, key):
        return self.slots[key]

    def __delitem__(self, key):
        del self.slots[key]

    def __setitem__(self, key, value):
        self.slots[key] = value

    def __contains__(self, key):
        return key in self.slots

    def __iter__(self):
        return iter(self.slots)

[docs]    def log_state(self):
        """Log the state using the the session logger."""

        state = []

        state.append(("ludait", self.slots["ludait"]))

        for name in [sl for sl in sorted(self.slots) if not sl.startswith('ch_') and
                not sl.startswith('sh_') and not sl.startswith('rh_') and
                not sl.startswith("ludait")]:
            state.append((name, self.slots[name]))

        for name in [sl for sl in sorted(self.slots) if sl.startswith('rh_')]:
            state.append((name, self.slots[name]))

        for name in [sl for sl in sorted(self.slots) if sl.startswith('ch_')]:
            state.append((name, self.slots[name]))

        for name in [sl for sl in sorted(self.slots) if sl.startswith('sh_')]:
            state.append((name, self.slots[name]))

        self.session_logger.dialogue_state("system", [state, ])

[docs]    def restart(self):
        """Reinitialise the dialogue state so that the dialogue manager
        can start from scratch.

        Nevertheless, remember the turn history.
        """
        # initialize slots
        self.slots = defaultdict(D3DiscreteValue)
        # initialize other variables
        if 'variables' in self.ontology:
            for var_name in self.ontology['variables']:
                setattr(self, var_name, None)

[docs]    def update(self, user_da, system_da):
        """Interface for the dialogue act update.

        It can process dialogue act, dialogue act N best lists, or dialogue act
        confusion networks.

        :param user_da: Dialogue act to process.
        :type user_da: :class:`~alex.components.slu.da.DialogueAct`,
            :class:`~alex.components.slu.da.DialogueActNBList` or
            :class:`~alex.components.slu.da.DialogueActConfusionNetwork`
        :param system_da: Last system dialogue act.

        """

        if system_da == "silence()":
            # use the last non-silence dialogue act
            # if the system said nothing the last time, lets assume that the
            # user acts in the context of the previous dialogue act
            system_da = self.last_system_da
        else:
            # save the last non-silence dialogue act
            self.last_system_da = system_da

        if not isinstance(user_da, DialogueActConfusionNetwork):
            raise DeterministicDiscriminativeDialogueStateException("Unsupported input for the dialogue manager.")

        if self.debug:
            self.system_logger.debug('D3State Dialogue Act in:\n%s' % user_da)

        user_da = self._resolve_user_da_in_context(user_da, system_da)

        if self.debug:
            self.system_logger.debug('Context Resolution - Dialogue Act: \n%s' % user_da)

        user_da = self._infer_last_talked_about_slots(user_da, system_da)

        if self.debug:
            self.system_logger.debug('Last Talked About Inference - Dialogue Act: \n%s' % user_da)

        # perform the state update
        self._update_state(user_da, system_da)
        self.turn_number += 1

        # store the result
        self.turns.append([deepcopy(user_da), deepcopy(system_da), deepcopy(self.slots)])

        # print the dialogue state if requested
        if self.debug:
            self.system_logger.debug(unicode(self))

    def _resolve_user_da_in_context(self, user_da, system_da):
        """Resolves and converts meaning of some user dialogue acts
        given the context."""
        old_user_da = deepcopy(user_da)
        new_user_da = DialogueActConfusionNetwork()

        if isinstance(system_da, DialogueAct):
            for system_dai in system_da:
                for prob, user_dai in user_da:
                    new_user_dai = None

                    if system_dai.dat == "confirm" and user_dai.dat == "affirm":
                        new_user_dai = DialogueActItem("inform", system_dai.name, system_dai.value)

                    elif system_dai.dat == "confirm" and user_dai.dat == "negate":
                        new_user_dai = DialogueActItem("deny", system_dai.name, system_dai.value)

                    elif system_dai.dat == "request" and user_dai.dat == "inform" and \
                                    user_dai.name in self.ontology['context_resolution'] and \
                                    system_dai.name in self.ontology['context_resolution'][user_dai.name] and \
                                    user_dai.value == "dontcare":
                        new_user_dai = DialogueActItem("inform", system_dai.name, system_dai.value)

                    elif system_dai.dat == "request" and user_dai.dat == "inform" and \
                                    user_dai.name in self.ontology['context_resolution'] and \
                                    system_dai.name in self.ontology['context_resolution'][user_dai.name] and \
                                    self.ontology.slot_has_value(system_dai.name, user_dai.value):
                        new_user_dai = DialogueActItem("inform", system_dai.name, user_dai.value)

                    elif system_dai.dat == "request" and system_dai.name != "" and \
                                    user_dai.dat == "affirm" and self.ontology.slot_is_binary(system_dai.name):
                        new_user_dai = DialogueActItem("inform", system_dai.name, "true")

                    elif system_dai.dat == "request" and system_dai.name != "" and \
                                    user_dai.dat == "negate" and self.ontology.slot_is_binary(system_dai.name):
                        new_user_dai = DialogueActItem("inform", system_dai.name, "false")

                    if new_user_dai:
                        new_user_da.add(prob, new_user_dai)

        old_user_da.merge(new_user_da, combine='max')

        return old_user_da

    def _infer_last_talked_about_slots(self, user_da, system_da):
        """This adds dialogue act items to support inference of the last slots the user talked about."""
        old_user_da = deepcopy(user_da)
        new_user_da = DialogueActConfusionNetwork()

        colliding_slots = {}
        done_slots = set()

        for prob, user_dai in user_da:
            new_user_dais = []
            lta_tsvs = self.ontology.last_talked_about(user_dai.dat, user_dai.name, user_dai.value)

            for name, value in lta_tsvs:
                new_user_dais.append(DialogueActItem("inform", name, value))
                if name in done_slots:
                    if not name in colliding_slots:
                        colliding_slots[name] = set()
                    colliding_slots[name].add(value)
                else:
                    done_slots.add(name)

            if new_user_dais:
                for nudai in new_user_dais:
                    if not nudai in new_user_da:
                        new_user_da.add(prob, nudai)

        # In case of collisions, prefer the current last talked about values if it is one of the colliding values.
        # If there is a collision and the current last talked about value is not among the colliding values, do not
        # consider the colliding DA's at all.
        invalid_das = set()
        for prob, da in set(new_user_da):
            if da.name in colliding_slots and self[da.name].mpv() in colliding_slots[da.name]:
                if not da.value == self[da.name].mpv():
                    invalid_das.add(da)
            elif da.name in colliding_slots:
                invalid_das.add(da)

        for invalid_da in invalid_das:
            new_user_da.remove(invalid_da)

        old_user_da.merge(new_user_da, combine='max')

        return old_user_da

    def _update_state(self, user_da, system_da):
        """Records the information provided by the system and/or by the user."""

        # since there is a state update, the silence_time from the last from the user voice activity is 0.0
        # unless this update fired just to inform about the silence time. This case is taken care of later.
        # - this slot is not probabilistic
        self.slots['silence_time'] = 0.0

        # first process the system dialogue act since it was produce "earlier"
        if isinstance(system_da, DialogueAct):
            for dai in system_da:
                if dai.dat == "inform":
                    # set that the system already informed about the slot
                    self.slots["rh_" + dai.name].set({"system-informed": 1.0, })
                    self.slots["ch_" + dai.name].set({"system-informed": 1.0, })
                    self.slots["sh_" + dai.name].set({"system-informed": 1.0, })

                if dai.dat == "iconfirm":
                    # set that the system already informed about the slot
                    self.slots["rh_" + dai.name].set({"system-informed": 1.0, })
                    self.slots["ch_" + dai.name].set({"system-informed": 1.0, })
                    self.slots["sh_" + dai.name].set({"system-informed": 1.0, })

        # now process the user dialogue act
        # processing the low probability DAIs first, emphasize the dialogue acts with high probability
        for prob, dai in sorted(user_da.items()):
            #print "#0 ", self.type
            #print "#1 SType:", prob, dai
            ##print "#51", self.slots

            if self.type == "MDP":
                if prob >= 0.5:
                    weight = 0.0
                else:
                    continue
            else:
                weight = 1.0 - prob

            if dai.dat == "inform":
                if dai.name:
                    self.slots[dai.name].scale(weight)
                    self.slots[dai.name].add(dai.value, prob)
            elif dai.dat == "deny":
                # handle true and false values because we know their opposite values
                if dai.value == "true" and self.ontology.slot_is_binary(dai.name):
                    self.slots[dai.name].scale(weight)
                    self.slots[dai.name].add('false', prob)
                elif dai.value == "false" and self.ontology.slot_is_binary(dai.name):
                    self.slots[dai.name].scale(weight)
                    self.slots[dai.name].add('true', prob)
                else:
                    self.slots[dai.name].distribute(dai.value, prob)
            elif dai.dat == "request":
                self.slots["rh_" + dai.name].scale(weight)
                self.slots["rh_" + dai.name].add("user-requested", prob)
            elif dai.dat == "confirm":
                self.slots["ch_" + dai.name].scale(weight)
                self.slots["ch_" + dai.name].add(dai.value, prob)
            elif dai.dat == "select":
                self.slots["sh_" + dai.name].scale(weight)
                self.slots["sh_" + dai.name].add(dai.value, prob)
            elif dai.dat in set(["ack", "apology", "bye", "hangup", "hello", "help", "null", "other",
                             "repeat", "reqalts", "reqmore", "restart", "thankyou"]):
                self.slots["ludait"].scale(weight)
                self.slots["ludait"].add(dai.dat, prob)
            elif dai.dat == "silence":
                self.slots["ludait"].scale(weight)
                self.slots["ludait"].add(dai.dat, prob)
                if dai.name == "time":
                    self.slots['silence_time'] = float(dai.value)

        #print "#52", self.slots

[docs]    def get_slots_being_requested(self, req_prob=0.8):
        """Return all slots which are currently being requested by the user along with the correct value."""
        requested_slots = {}

        for slot in self.slots:
            if isinstance(self.slots[slot], D3DiscreteValue) and slot.startswith("rh_"):
                if self.slots[slot]["user-requested"] > req_prob:
                    if slot[3:] in self.slots:
                        requested_slots[slot[3:]] = self.slots[slot[3:]]
                    else:
                        requested_slots[slot[3:]] = "none"

        return requested_slots

[docs]    def get_slots_being_confirmed(self, conf_prob=0.8):
        """Return all slots which are currently being confirmed by the user along with the value being confirmed."""
        confirmed_slots = {}

        for slot in self.slots:
            if isinstance(self.slots[slot], D3DiscreteValue) and slot.startswith("ch_"):
                prob, value = self.slots[slot].mph()
                if value not in ['none', 'system-informed', None] and prob > conf_prob:
                    confirmed_slots[slot[3:]] = self.slots[slot]

        return confirmed_slots

[docs]    def get_slots_being_noninformed(self, noninf_prob=0.8):
        """Return all slots provided by the user and the system has not informed about them yet along with
        the value of the slot.

        This will not detect a change in a goal. For example::

            U: I want a Chinese restaurant.
            S: Ok, you want a Chinese restaurant. What price range you have in mind?
            U: Well, I would rather want an Italian Restaurant.
            S: Ok, no problem. You want an Italian restaurant. What price range you have in mind?

        Because the system informed about the food type and stored "system-informed", then
        we will not notice that we confirmed a different food type.
        """
        noninformed_slots = {}

        for slot in self.slots:
            if any([1 for x in ['rh_', 'ch_', 'sh_', "ludait"] if slot.startswith(x)]):
                continue
            if not isinstance(self.slots[slot], D3DiscreteValue):
                continue

            # test whether the slot is not currently requested
            if "rh_" + slot not in self.slots or self.slots["rh_" + slot]["none"] > 0.999:
                prob, value = self.slots[slot].mph()
                # test that the nin informed value is an interesting value
                if value not in ['none', None] and prob > noninf_prob:
                    noninformed_slots[slot] = self.slots[slot]

        return noninformed_slots

[docs]    def get_accepted_slots(self, acc_prob):
        """Returns all slots which have a probability of a non "none" value larger then some threshold.
        """
        accepted_slots = {}

        for slot in self.slots:
            if any([1 for x in ['rh_', 'ch_', 'sh_', "ludait"] if slot.startswith(x)]):
                continue
            if not isinstance(self.slots[slot], D3DiscreteValue):
                continue

            prob, value = self.slots[slot].mph()
            if value not in ['none', 'system-informed', None] and prob >= acc_prob:
                accepted_slots[slot] = self.slots[slot]

        return accepted_slots

[docs]    def get_slots_tobe_confirmed(self, min_prob, max_prob):
        """Returns all slots which have a probability of a non "none" value larger then some threshold and still not so
        large to be considered as accepted.
        """
        tobe_confirmed_slots = {}

        for slot in self.slots:
            if any([1 for x in ['rh_', 'ch_', 'sh_', "ludait"] if slot.startswith(x)]):
                continue
            if not isinstance(self.slots[slot], D3DiscreteValue):
                continue

            prob, value = self.slots[slot].mph()
            if value not in ['none', 'system-informed', None] and min_prob <= prob and prob < max_prob:
                tobe_confirmed_slots[slot] = self.slots[slot]

        return tobe_confirmed_slots

[docs]    def get_slots_tobe_selected(self, sel_prob):
        """Returns all slots which have a probability of the two most probable non "none" value larger then some threshold.
        """
        tobe_selected_slots = {}

        for slot in self.slots:
            if any([1 for x in ['rh_', 'ch_', 'sh_', "ludait"] if slot.startswith(x)]):
                continue
            if not isinstance(self.slots[slot], D3DiscreteValue):
                continue

            (prob1, value1), (prob2, value2) = self.slots[slot].tmphs()

            if value1 not in ['none', 'system-informed', None] and prob1 > sel_prob and \
                value2 not in ['none', 'system-informed', None] and prob2 > sel_prob:
                tobe_selected_slots[slot] = self.slots[slot]

        return tobe_selected_slots

[docs]    def get_changed_slots(self, cha_prob):
        """Returns all slots that has changed from the previous turn. Because the change is determined by change in
        probability for a particular value, there may be very small changes. Therefore, this will only report changes
        for values with a probability larger than the given threshold.

        :param cha_prob: minimum current probability of the most probable hypothesis to be reported
        :rtype: dict
        """
        changed_slots = {}

        # compare the accepted slots from the previous and the current turn
        if len(self.turns) >= 2:
            cur_slots = self.turns[-1][2]
            prev_slots = self.turns[-2][2]

            for slot in cur_slots:
                if any([1 for x in ['rh_', 'ch_', 'sh_', "ludait"] if slot.startswith(x)]):
                    continue

                if not isinstance(cur_slots[slot], D3DiscreteValue):
                    continue

                cur_prob, cur_value = cur_slots[slot].mph()
                prev_prob, prev_value = prev_slots[slot].mph()

                if cur_value not in ['none', 'system-informed', None] and cur_prob > cha_prob and \
                    prev_value not in ['system-informed', None] and \
                    cur_value != prev_value:
                    #prev_prob > cha_prob and \ # only the current value must be accepted
                    changed_slots[slot] = cur_slots[slot]

            return changed_slots
        elif len(self.turns) == 1:
            # after the first turn all accepted slots are effectively changed
            return self.get_accepted_slots(cha_prob)
        else:
            return {}

[docs]    def has_state_changed(self, cha_prob):
        """Returns a boolean indicating whether the dialogue state changed significantly
        since the last turn. True is returned if at least one slot has at least one value
        whose probability has changed at least by the given threshold since last time.

        :param cha_prob: minimum probability change to be reported
        :rtype: Boolean
        """
        if len(self.turns) >= 2:
            cur_slots = self.turns[-1][2]
            prev_slots = self.turns[-2][2]

            for slot in cur_slots:
                if not isinstance(cur_slots[slot], D3DiscreteValue):
                    continue

                for value, cur_prob in cur_slots[slot].items():
                    if value in ['none', 'system-informed', None]:
                        continue
                    prev_prob = prev_slots[slot].get(value, 0.0)
                    if abs(cur_prob - prev_prob) > cha_prob:
                        return True
        elif len(self.turns) == 1:
            slots = self.turns[-1][2]
            for slot in slots:
                if not isinstance(slots[slot], D3DiscreteValue):
                    continue
                prob, value = slots[slot].mph()
                if value in ['none', 'system-informed', None]:
                    continue
                if prob > cha_prob:
                    return True
            pass
        return False