Skip to content

Commit

Permalink
Merge pull request #287 from snipsco/hotfix/feature-extraction
Browse files Browse the repository at this point in the history
Hotfix/feature extraction
  • Loading branch information
Adrien Ball authored Jun 13, 2017
2 parents a651b16 + 1e04edd commit 2449657
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 8 deletions.
2 changes: 1 addition & 1 deletion snips_nlu/__version__
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.8.4
0.8.5
6 changes: 2 additions & 4 deletions snips_nlu/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,9 @@ def load_clusters():
with io.open(path, encoding="utf8") as f:
_word_clusters[name] = dict()
for l in f:
split = l.rstrip().lower().split("\t")
normalized = " ".join(
[t.value for t in tokenize(split[0])])
split = l.rstrip().split("\t")
if len(split) == 2:
_word_clusters[name][normalized] = split[1]
_word_clusters[name][split[0]] = split[1]


def get_word_clusters(language):
Expand Down
6 changes: 3 additions & 3 deletions snips_nlu/slot_filler/feature_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def get_ngram_fn(n, use_stemming, language_code=None,
def ngram(tokens, token_index):
max_len = len(tokens)
end = token_index + n
if 0 <= token_index < max_len and 0 < end <= max_len:
if 0 <= token_index < max_len and end <= max_len:
if gazetteer is None:
if use_stemming:
return " ".join(t.stem.lower()
Expand All @@ -113,7 +113,7 @@ def ngram(tokens, token_index):
for t in tokens[token_index:end]:
lowered = t.stem.lower() if use_stemming else \
t.value.lower()
words.append(lowered if t.value.lower() in gazetteer
words.append(lowered if lowered in gazetteer
else "rare_word")
return " ".join(words)
return None
Expand All @@ -128,7 +128,7 @@ def get_shape_ngram_fn(n):
def shape_ngram(tokens, token_index):
max_len = len(tokens)
end = token_index + n
if 0 <= token_index < max_len and 0 <= end < max_len:
if 0 <= token_index < max_len and end <= max_len:
return " ".join(get_shape(t.value)
for t in tokens[token_index:end])
return None
Expand Down

0 comments on commit 2449657

Please sign in to comment.