Skip to content

Commit

Permalink
Merge pull request #264 from snipsco/release/0.8.0
Browse files Browse the repository at this point in the history
Release/0.8.0
  • Loading branch information
Adrien Ball authored Jun 1, 2017
2 parents e141ebb + f6a650a commit e580c10
Show file tree
Hide file tree
Showing 27 changed files with 817 additions and 341 deletions.
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@
version = f.readline().strip()

required = [
"duckling==0.0.18",
"pytest",
"enum34==1.1.6",
"mock",
"numpy==1.12.1",
"scipy==0.19.0",
"scikit-learn==0.18.1",
"sklearn-crfsuite==0.3.5",
"builtin_entities_ontology==0.1.1",
"semantic_version==2.6.0"
"builtin_entities_ontology==0.2.3",
"semantic_version==2.6.0",
"rustling==2.0",
]

setup(name=PACKAGE_NAME,
Expand Down
3 changes: 0 additions & 3 deletions snips_nlu/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,10 @@
import os

import builtin_entities_ontology
from duckling import core

from snips_nlu.resources import load_resources
from snips_nlu.utils import ROOT_PATH, PACKAGE_NAME

core.load()

VERSION_FILE_NAME = "__version__"

with io.open(os.path.join(ROOT_PATH, PACKAGE_NAME, VERSION_FILE_NAME)) as f:
Expand Down
2 changes: 1 addition & 1 deletion snips_nlu/__version__
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.7.0
0.8.0
112 changes: 0 additions & 112 deletions snips_nlu/built_in_entities.py

This file was deleted.

219 changes: 219 additions & 0 deletions snips_nlu/builtin_entities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
from __future__ import unicode_literals

from collections import defaultdict

import rustling
from enum import Enum
from rustling import RustlingParser as _RustlingParser, RustlingError

from snips_nlu.constants import MATCH_RANGE, VALUE, ENTITY, LABEL, \
RUSTLING_DIM_KIND, SUPPORTED_LANGUAGES
from snips_nlu.languages import Language
from utils import LimitedSizeDict, classproperty


class BuiltInEntity(Enum):
NUMBER = {
LABEL: "snips/number",
RUSTLING_DIM_KIND: "Number",
SUPPORTED_LANGUAGES: {
Language.EN,
Language.FR,
Language.ES
}
}

ORDINAL = {
LABEL: "snips/ordinal",
RUSTLING_DIM_KIND: "Ordinal",
SUPPORTED_LANGUAGES: {
Language.EN,
Language.FR,
Language.ES
}
}

TEMPERATURE = {
LABEL: "snips/temperature",
RUSTLING_DIM_KIND: "Temperature",
SUPPORTED_LANGUAGES: {
Language.EN,
Language.FR,
Language.ES
}
}

DATETIME = {
LABEL: "snips/datetime",
RUSTLING_DIM_KIND: "Time",
SUPPORTED_LANGUAGES: {
Language.EN,
Language.FR,
Language.ES
}
}

DURATION = {
LABEL: "snips/duration",
RUSTLING_DIM_KIND: "Duration",
SUPPORTED_LANGUAGES: {
Language.EN,
Language.FR,
Language.ES
}
}

AMOUNT_OF_MONEY = {
LABEL: "snips/amountOfMoney",
RUSTLING_DIM_KIND: "AmountOfMoney",
SUPPORTED_LANGUAGES: {
Language.EN
}
}

@property
def label(self):
return self.value[LABEL]

@property
def rustling_dim_kind(self):
return self.value[RUSTLING_DIM_KIND]

@property
def supported_languages(self):
return self.value[SUPPORTED_LANGUAGES]

@classproperty
@classmethod
def built_in_entity_by_label(cls):
try:
return cls._built_in_entity_by_label
except AttributeError:
cls._built_in_entity_by_label = dict()
for ent in cls:
cls._built_in_entity_by_label[ent.label] = ent
return cls._built_in_entity_by_label

@classmethod
def from_label(cls, label, default=None):
try:
ent = cls.built_in_entity_by_label[label]
except KeyError:
if default is None:
raise KeyError("Unknown entity '%s'" % label)
else:
return default
return ent

@classproperty
@classmethod
def built_in_entity_by_rustling_dim_kind(cls):
try:
return cls._built_in_entity_by_rustling_dim_kind
except AttributeError:
cls._built_in_entity_by_rustling_dim_kind = dict()
for ent in cls:
cls._built_in_entity_by_rustling_dim_kind[
ent.rustling_dim_kind] = ent
return cls._built_in_entity_by_rustling_dim_kind

@classmethod
def from_rustling_dim_kind(cls, rustling_dim_kind, default=None):
try:
ent = cls.built_in_entity_by_rustling_dim_kind[rustling_dim_kind]
except KeyError:
if default is None:
raise KeyError(
"Unknown rustling dim kind '%s'" % rustling_dim_kind)
else:
return default
return ent


_RUSTLING_SUPPORTED_BUILTINS_BY_LANGUAGE = {
Language.from_rustling_code(k.upper()): set(
BuiltInEntity.from_rustling_dim_kind(e) for e in v)
for k, v in rustling.all_configs().iteritems()
}

_SUPPORTED_BUILTINS_BY_LANGUAGE = defaultdict(set)
for entity in BuiltInEntity:
for language in entity.supported_languages:
if not entity in _RUSTLING_SUPPORTED_BUILTINS_BY_LANGUAGE[language]:
raise KeyError("Found '%s' in supported languages of '%s' but, "
"'%s' is not supported in rustling.all_configs()" %
(language, entity, language))
_SUPPORTED_BUILTINS_BY_LANGUAGE[language].add(entity)

RUSTLING_ENTITIES = set(
kind for kinds in _RUSTLING_SUPPORTED_BUILTINS_BY_LANGUAGE.values()
for kind in kinds)

_DIM_KIND_TO_ENTITY = {e.rustling_dim_kind: e for e in RUSTLING_ENTITIES}


def scope_to_dim_kinds(scope):
return [entity.rustling_dim_kind for entity in scope]


class RustlingParser(object):
def __init__(self, language):
self.language = language
self.parser = _RustlingParser(language.rustling_code)
self._cache = LimitedSizeDict(size_limit=1000)
self.supported_entities = set(
_SUPPORTED_BUILTINS_BY_LANGUAGE[self.language])

def parse(self, text):
text = text.lower() # Rustling only work with lowercase
if text not in self._cache:
try:
parser_result = self.parser.parse(text, remove_overlap=True)
except RustlingError:
parser_result = []
self._cache[text] = parser_result
return self._cache[text]

def supports_entity(self, entity):
return entity in self.supported_entities


_RUSTLING_PARSERS = dict()
for language in Language:
try:
_RUSTLING_PARSERS[language] = RustlingParser(language)
except RustlingError:
pass

RUSTLING_SUPPORTED_LANGUAGES = set(_RUSTLING_PARSERS.keys())


def get_builtin_entities(text, language, scope=None):
global _RUSTLING_CACHE
global _RUSTLING_PARSERS

parser = _RUSTLING_PARSERS.get(language, False)
if not parser:
return []

if scope is None:
scope = set(RUSTLING_ENTITIES)

# Don't detect entities that are not supportBuiltInEntity
entities = [e for e in scope if parser.supports_entity(e)]
entities_parsed_dims = set(e.rustling_dim_kind for e in entities)
parsed_entities = []
for ent in parser.parse(text):
if ent["dim"] in entities_parsed_dims:
parsed_entity = {
MATCH_RANGE: (ent["char_range"]["start"],
ent["char_range"]["end"]),
VALUE: ent["value"],
ENTITY: _DIM_KIND_TO_ENTITY[ent["dim"]]
}
parsed_entities.append(parsed_entity)
return parsed_entities


def is_builtin_entity(entity_label):
return entity_label in BuiltInEntity.built_in_entity_by_label
3 changes: 2 additions & 1 deletion snips_nlu/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,12 @@
BUILTIN_PATH = "builtin_path"
BUILTIN_BINARY = "builtin_binary"
LABEL = "label"
DUCKLING_DIM = "duckling_dim"
RUSTLING_DIM_KIND = "rustling_dim_kind"
NGRAM = "ngram"
TOKEN_INDEXES = "token_indexes"
GAZETTEERS = "gazetteers"
STOP_WORDS = "stop_words"
SUBTITLES = "subtitles"
WORD_CLUSTERS = "word_clusters"
SNIPS_NLU_VERSION = "snips_nlu_version"
SUPPORTED_LANGUAGES = "supported_languages"
2 changes: 1 addition & 1 deletion snips_nlu/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from semantic_version import Version

from snips_nlu.built_in_entities import BuiltInEntity, is_builtin_entity
from snips_nlu.builtin_entities import BuiltInEntity, is_builtin_entity
from snips_nlu.constants import (TEXT, USE_SYNONYMS, SYNONYMS, DATA, INTENTS,
ENTITIES, ENTITY, SLOT_NAME, UTTERANCES,
LANGUAGE, VALUE, AUTOMATICALLY_EXTENSIBLE,
Expand Down
Loading

0 comments on commit e580c10

Please sign in to comment.