Skip to content

Commit

Permalink
Merge pull request #310 from snipsco/release/0.8.8
Browse files Browse the repository at this point in the history
Release 0.8.8
  • Loading branch information
ClemDoum authored Jun 26, 2017
2 parents 987ef87 + 756fed3 commit 26b4f4a
Show file tree
Hide file tree
Showing 12 changed files with 13,706 additions and 49 deletions.
16 changes: 12 additions & 4 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,18 @@ node('jenkins-slave-generic') {
}

stage('Tests') {
sh """
${VENV}
python -m unittest discover
"""
if(branchName.startsWith("release/") || branchName.startsWith("hotfix/") || branchName == "master") {
sh """
${VENV}
python -m unittest discover
python -m unittest discover -p 'integration_test*.py'
"""
} else {
sh """
${VENV}
python -m unittest discover
"""
}
}

stage('Publish') {
Expand Down
20 changes: 4 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Snips NLU (0.8.7)
# Snips NLU (0.8.8)

[![Build Status](https://jenkins2.snips.ai/buildStatus/icon?job=SDK/snips-nlu/master)](https://jenkins2.snips.ai/job/SDK/job/snips-nlu/view/Branches/job/master)

Expand Down Expand Up @@ -77,23 +77,13 @@ engine_as_dict = engine.to_dict()
# }
# "slots": [
# {
# "value": {
# "kind": "Custom",
# "value": "on"
# },
# "raw_value": "on",
# "value": "on"
# "range": [5, 7],
# "entity": "on_off"
# "slot_name": "light_on_off",
# },
# {
# "value": {
# "kind": "Custom",
# "value": "kitchen"
# },
# "raw_value": "kitchen",
# "value": "kitchen"
# "range": [25, 32],
# "entity": "light",
# "slot_name": "light_room",
# }
# ]
Expand All @@ -102,9 +92,7 @@ engine_as_dict = engine.to_dict()

### Training
``` python
engine.fitted # False
engine.fit(dataset)
engine.fitted # True
engine.fit(dataset)
```

where `dataset` is a dictionary which format is described [here](https://github.com/snipsco/snips-nlu/blob/develop/snips_nlu/tests/resources/sample_dataset.json)
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@
required = [
"pytest",
"enum34==1.1.6",
"mock",
"mock==2.0.0",
"numpy==1.12.1",
"scipy==0.19.0",
"scikit-learn==0.18.1",
"sklearn-crfsuite==0.3.5",
"builtin_entities_ontology==0.2.3",
"builtin_entities_ontology==0.3.1",
"semantic_version==2.6.0",
"rustling==2.0",
"rustling==3.0",
]

setup(name=PACKAGE_NAME,
Expand Down
1 change: 1 addition & 0 deletions snips_nlu/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import unicode_literals
import io
import os

Expand Down
2 changes: 1 addition & 1 deletion snips_nlu/__version__
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.8.7
0.8.8
12 changes: 8 additions & 4 deletions snips_nlu/builtin_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ class BuiltInEntity(Enum):
SUPPORTED_LANGUAGES: {
Language.EN,
Language.FR,
Language.ES
Language.ES,
Language.KO
}
}

Expand All @@ -29,7 +30,8 @@ class BuiltInEntity(Enum):
SUPPORTED_LANGUAGES: {
Language.EN,
Language.FR,
Language.ES
Language.ES,
Language.KO
}
}

Expand All @@ -49,7 +51,8 @@ class BuiltInEntity(Enum):
SUPPORTED_LANGUAGES: {
Language.EN,
Language.FR,
Language.ES
Language.ES,
Language.KO
}
}

Expand All @@ -59,7 +62,8 @@ class BuiltInEntity(Enum):
SUPPORTED_LANGUAGES: {
Language.EN,
Language.FR,
Language.ES
Language.ES,
Language.KO
}
}

Expand Down
36 changes: 25 additions & 11 deletions snips_nlu/intent_parser/probabilistic_intent_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from snips_nlu.slot_filler.crf_tagger import CRFTagger
from snips_nlu.slot_filler.crf_utils import (tags_to_slots,
utterance_to_sample,
positive_tagging)
positive_tagging, OUTSIDE,
tag_name_to_slot_name)
from snips_nlu.slot_filler.data_augmentation import augment_utterances
from snips_nlu.tokenization import tokenize, tokenize_light
from snips_nlu.utils import (namedtuple_with_defaults)
Expand Down Expand Up @@ -152,21 +153,20 @@ def get_slots(self, text, intent=None):
slots = tags_to_slots(text, tokens, tags, tagger.tagging_scheme,
intent_slots_mapping)

# Remove slots corresponding to builtin entities
slots = [s for s in slots if intent_slots_mapping[s.slot_name] not in
BuiltInEntity.built_in_entity_by_label]

builtin_slots = set(s for s in intent_slots_mapping
if intent_slots_mapping[s] in
BuiltInEntity.built_in_entity_by_label)
if len(builtin_slots) == 0:
builtin_slot_names = set(slot_name for (slot_name, entity) in
intent_slots_mapping.iteritems() if entity
in BuiltInEntity.built_in_entity_by_label)
if len(builtin_slot_names) == 0:
return slots

# Replace tags corresponding to builtin entities by outside tags
tags = replace_builtin_tags(tags, builtin_slot_names)

scope = [BuiltInEntity.from_label(intent_slots_mapping[slot])
for slot in builtin_slots]
for slot in builtin_slot_names]
builtin_entities = get_builtin_entities(text, self.language, scope)
slots = augment_slots(text, tokens, tags, tagger, intent_slots_mapping,
builtin_entities, builtin_slots)
builtin_entities, builtin_slot_names)
return slots

@property
Expand Down Expand Up @@ -216,6 +216,20 @@ def from_dict(cls, obj_dict):
)


def replace_builtin_tags(tags, builtin_slot_names):
new_tags = []
for tag in tags:
if tag == OUTSIDE:
new_tags.append(tag)
else:
slot_name = tag_name_to_slot_name(tag)
if slot_name in builtin_slot_names:
new_tags.append(OUTSIDE)
else:
new_tags.append(tag)
return new_tags


def augment_slots(text, tokens, tags, tagger, intent_slots_mapping,
builtin_entities, missing_slots):
augmented_tags = tags
Expand Down
8 changes: 2 additions & 6 deletions snips_nlu/slot_filler/default/default_features_functions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import numpy as np

from snips_nlu.builtin_entities import BuiltInEntity, \
_SUPPORTED_BUILTINS_BY_LANGUAGE
from snips_nlu.builtin_entities import _SUPPORTED_BUILTINS_BY_LANGUAGE
from snips_nlu.constants import DATA, USE_SYNONYMS, SYNONYMS, VALUE
from snips_nlu.preprocessing import stem
from snips_nlu.slot_filler.crf_utils import TaggingScheme
Expand Down Expand Up @@ -64,10 +63,7 @@ def default_features(language, intent_entities, use_stemming,
]

# Built-ins
for dim in _SUPPORTED_BUILTINS_BY_LANGUAGE[language]:
if dim not in BuiltInEntity.built_in_entity_by_rustling_dim_kind:
continue
entity = BuiltInEntity.from_rustling_dim_kind(dim)
for entity in _SUPPORTED_BUILTINS_BY_LANGUAGE[language]:
features.append(
{
"factory_name": "get_built_in_annotation_fn",
Expand Down
27 changes: 27 additions & 0 deletions snips_nlu/slot_filler/default/test_default_features_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# coding=utf-8
from __future__ import unicode_literals

import unittest

from snips_nlu.dataset import validate_and_format_dataset
from snips_nlu.languages import Language
from snips_nlu.nlu_engine import SnipsNLUEngine
from snips_nlu.tests.utils import BEVERAGE_DATASET


class TestDefaultFeaturesFunction(unittest.TestCase):
def test_should_include_builtin_features(self):
# Given
dataset = validate_and_format_dataset(BEVERAGE_DATASET)
language = Language.EN
engine = SnipsNLUEngine(language)
intent = "MakeCoffee"

# When
engine = engine.fit(dataset)

# Then
features = engine.probabilistic_parser.crf_taggers[intent].features
builtin_features_count = len(
[f for f in features if "built-in-snips/number" in f])
self.assertGreater(builtin_features_count, 0)
84 changes: 84 additions & 0 deletions snips_nlu/tests/integration_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# coding=utf-8
from __future__ import unicode_literals

import io
import json
import os
import unittest
from copy import deepcopy

from snips_nlu.constants import TEXT, DATA, PARSED_INTENT, INTENT_NAME, \
SLOT_NAME, MATCH_RANGE, PARSED_SLOTS, INTENTS, UTTERANCES, CUSTOM_ENGINE, \
ENGINE_TYPE
from snips_nlu.languages import Language
from snips_nlu.nlu_engine import SnipsNLUEngine
from snips_nlu.tests.utils import TEST_PATH


class IntegrationTestSnipsNLUEngine(unittest.TestCase):
def test_engine_performance(self):
# Given
dataset_path = os.path.join(TEST_PATH, "resources",
"performance_dataset.json")
with io.open(dataset_path, encoding="utf8") as f:
dataset = json.load(f)

k_fold_batches = create_k_fold_batches(dataset, k=3)
parsing_results = []
for (train_dataset, test_utterances) in k_fold_batches:
engine = SnipsNLUEngine(Language.EN).fit(train_dataset)
for intent_name, utterance in test_utterances:
text = "".join([chunk[TEXT] for chunk in utterance[DATA]])
result = engine.parse(text)
if result[PARSED_INTENT] is None:
parsing_results.append(0.0)
else:
if result[PARSED_INTENT][INTENT_NAME] != intent_name:
parsing_results.append(0.0)
else:
parsing_result = 1.0
slot_chunks = [chunk for chunk in utterance[DATA]
if SLOT_NAME in chunk]
for chunk in slot_chunks:
chunk_range = [chunk[MATCH_RANGE]["start"],
chunk[MATCH_RANGE]["end"]]
no_matching_slot = all(s[SLOT_NAME] != chunk[
SLOT_NAME] or s[MATCH_RANGE] != chunk_range for
s in result[PARSED_SLOTS])
if no_matching_slot:
parsing_result = 0.0
break
parsing_results.append(parsing_result)
accuracy = sum(parsing_results) / len(parsing_results)
self.assertGreaterEqual(accuracy, 0.35)


def create_k_fold_batches(dataset, k):
utterances = [
(intent_name, utterance, i)
for intent_name, intent_data in dataset[INTENTS].iteritems()
for i, utterance in enumerate(intent_data[UTTERANCES])
]
utterances = sorted(utterances, key=lambda u: u[2])
utterances = [(intent_name, utterance) for (intent_name, utterance, _) in
utterances]
nb_utterances = len(utterances)
k_fold_batches = []
batch_size = nb_utterances / k
for batch_index in xrange(k):
test_start = batch_index * batch_size
test_end = (batch_index + 1) * batch_size
train_utterances = utterances[0:test_start] + utterances[test_end:]
test_utterances = utterances[test_start: test_end]
train_dataset = deepcopy(dataset)
train_dataset[INTENTS] = dict()
for intent_name, utterance in train_utterances:
if intent_name not in train_dataset[INTENTS]:
train_dataset[INTENTS][intent_name] = {
ENGINE_TYPE: CUSTOM_ENGINE,
UTTERANCES: []
}
train_dataset[INTENTS][intent_name][UTTERANCES].append(
deepcopy(utterance))
k_fold_batches.append((train_dataset, test_utterances))
return k_fold_batches
Loading

0 comments on commit 26b4f4a

Please sign in to comment.