Source code for alex.applications.PublicTransportInfoEN.preprocessing

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# This code is PEP8-compliant. See http://www.python.org/dev/peps/pep-0008.

from __future__ import unicode_literals

import re

from alex.components.slu.base import SLUPreprocessing
from alex.components.nlg.template import TemplateNLGPreprocessing
from alex.components.nlg.tools.en import word_for_number
import site_preprocessing
import string


[docs]class PTIENSLUPreprocessing(SLUPreprocessing): """ Extends SLUPreprocessing for some transformations: """ def __init__(self, *args, **kwargs): super(PTIENSLUPreprocessing, self).__init__(*args, **kwargs) num_norms = [] for num in xrange(60): num_norms.append(([unicode(num)], word_for_number(num).split())) self.text_normalization_mapping += num_norms # map ordinal numbers for streets and avenues 1st - 200th ord_norms = [] suffixes = ['th', 'st', 'nd', 'rd', 'th', 'th', 'th', 'th', 'th', 'th'] for ord in xrange(1, 200): suffix = suffixes[ord % 10] if ord < 10 or ord > 20 else 'th' ord_form = str(ord) + suffix ord_norms.append(([unicode(ord_form)], word_for_number(ord, ordinary=True).split())) self.text_normalization_mapping += ord_norms self.text_normalization_mapping += [ (["i'm"], ['i am']), (["im"], ['i am']), (["it'll"], ['it will']), (["i'll"], ['i will']), (["that's"], ['that is']), (["don't"], ['do not']), (["doesn't"], ['does not']), ]
[docs] def normalise_utterance(self, utterance): utterance = super(PTIENSLUPreprocessing, self).normalise_utterance(utterance) return utterance
[docs]class PTIENNLGPreprocessing(TemplateNLGPreprocessing): """Template NLG preprocessing routines for English public transport information. This serves for spelling out relative and absolute time expressions, """ def __init__(self, ontology): super(PTIENNLGPreprocessing, self).__init__(ontology) # keep track of relative and absolute time slots self.rel_time_slots = set() self.abs_time_slots = set() # keep track of temperature and temperature interval slots self.temp_slots = set() self.temp_int_slots = set() # keep track of translated slots self.translated_slots = set() self.translations = {} # load their lists from the ontology if 'slot_attributes' in self.ontology: for slot in self.ontology['slot_attributes']: if 'relative_time' in self.ontology['slot_attributes'][slot]: self.rel_time_slots.add(slot) elif 'absolute_time' in self.ontology['slot_attributes'][slot]: self.abs_time_slots.add(slot) elif 'temperature' in self.ontology['slot_attributes'][slot]: self.temp_slots.add(slot) elif 'temperature_int' in self.ontology['slot_attributes'][slot]: self.temp_int_slots.add(slot) # load translations from the ontology if 'value_translation' in self.ontology: self.translations = self.ontology['value_translation'] for slot in self.ontology['value_translation']: self.translated_slots.add(slot)
[docs] def preprocess(self, template, svs_dict): """Preprocess values to be filled into an NLG template. Spells out temperature and time expressions and translates some of the values to English. :param svs_dict: Slot-value dictionary :return: The same dictionary, with modified values """ # regular changes to slot values for slot_id, val in svs_dict.iteritems(): # remove number suffixes from some slot IDs to produce actual slot names slot_name = slot_id[:-1] if slot_id[-1] in string.digits else slot_id # spell out time expressions if slot_name in self.rel_time_slots: svs_dict[slot_id] = self.spell_time_relative(val) elif slot_name in self.abs_time_slots: svs_dict[slot_id] = self.spell_time_absolute(val) # spell out temperature expressions elif slot_name in self.temp_slots: svs_dict[slot_id] = self.spell_temperature(val, interval=False) elif slot_name in self.temp_int_slots: svs_dict[slot_id] = self.spell_temperature(val, interval=True) # translate some slot values (default to untranslated) elif slot_name in self.translated_slots: svs_dict[slot_id] = self.translations[slot_name].get(val, val) elif slot_name in ['from_street', 'to_street', 'from_stop', 'to_stop', 'headsign', ]: svs_dict[slot_id] = site_preprocessing.expand_stop(val.lower()) # reflect changes to slot values stored in the template slot_modif = {} def store_repl(match): slot, modif = match.groups() slot_modif[slot] = modif return '{' + slot + '}' template = re.sub(r'\{([^}/]+)/([^}]+)\}', store_repl, template) for slot, modif in slot_modif.iteritems(): if modif == 'Cap1': svs_dict[slot] = svs_dict[slot][0].upper() + svs_dict[slot][1:] return template, svs_dict
[docs] def spell_time_relative(self, time): """\ Convert a time expression into words. :param time: Numerical time value in a string, e.g. '8:05' :return: time string with all numerals written out as words 0:15 will generate '15 minutes' and not '0 hours and \ 15 minutes'. """ if ':' not in time: # 'now' and similar return time hours, mins = map(int, time.split(':')) time_str = [] if hours is not 0: hr_id = 'hour' if hours is 1 else 'hours' hours = word_for_number(hours) time_str.extend((hours, hr_id)) if mins == 0 and hours != 0: return ' '.join(time_str) if time_str: time_str.append('and') min_id = 'minute' if mins == 1 else 'minutes' mins = word_for_number(mins) return ' '.join(time_str + [mins, min_id])
[docs] def spell_time_absolute(self, time): """\ Convert a time expression into words. :param time: The 12hr numerical time value in a string, e.g. '08:05:pm' :return: time string with all numerals written out as words """ period = "" if len(time.split(':')) > 2: hours, mins, period = time.split(':')[0:3] elif len(time.split(':')) > 1: hours, mins, = time.split(':')[0:2] else: return time # 'now' and similar hours = int(hours) mins = int(mins) # hack for TTS period = "P M" if period == "PM" else period period = "A M" if period == "AM" else period time_str = [word_for_number(hours)] # always start with an hour if mins == 0: return ' '.join(time_str + [period]) if mins < 9: time_str.append('o') # support one-o-one am return ' '.join(time_str + [word_for_number(mins), period])
[docs] def spell_temperature(self, value, interval): """Convert a temperature expression into words (assuming nominative). :param value: Temperature value (whole number in degrees as string), \ e.g. '1' or '-10'. :param interval: Boolean indicating whether to treat this as a start \ of an interval, i.e. omit the degrees word. :return: temperature expression as string """ ret = '' value = int(value) if value < 0: ret += 'minus ' value = abs(value) ret += word_for_number(value) if not interval: ret += ' degrees' return ret