-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
558 additions
and
98 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,97 +1,2 @@ | ||
import logging | ||
from nebulento.fuzz import MatchStrategy, match_one | ||
from nebulento.bracket_expansion import expand_template, expand_slots | ||
import quebra_frases | ||
|
||
LOG = logging.getLogger('nebulento') | ||
|
||
|
||
class IntentContainer: | ||
def __init__(self, fuzzy_strategy=MatchStrategy.DAMERAU_LEVENSHTEIN_SIMILARITY, | ||
ignore_case=True): | ||
self.fuzzy_strategy = fuzzy_strategy | ||
self.ignore_case = ignore_case | ||
self.registered_intents = {} | ||
self.registered_entities = {} | ||
|
||
def match_entities(self, sentence): | ||
if self.ignore_case: | ||
sentence = sentence.lower() | ||
matches = {} | ||
for entity, samples in self.registered_entities.items(): | ||
chunked = quebra_frases.chunk(sentence, samples) | ||
matches[entity] = [s for s in samples if s in chunked] | ||
return matches | ||
|
||
def match_fuzzy(self, sentence): | ||
if self.ignore_case: | ||
sentence = sentence.lower() | ||
entities = self.match_entities(sentence) | ||
for intent, samples in self.registered_intents.items(): | ||
samples = self.registered_intents[intent] | ||
|
||
sent, score = match_one(sentence, samples, | ||
strategy=self.fuzzy_strategy) | ||
remainder = [ | ||
w for w in quebra_frases.word_tokenize(sentence) | ||
if w not in quebra_frases.word_tokenize(sent)] | ||
consumed = [ | ||
w for w in quebra_frases.word_tokenize(sentence) | ||
if w in quebra_frases.word_tokenize(sent)] | ||
|
||
tagged_entities = {} | ||
for ent, v in entities.items(): | ||
if v and any("{" + ent + "}" in s for s in samples): | ||
score = 0.25 + score * 0.75 | ||
tagged_entities[ent] = v | ||
consumed += [_ for _ in v if _ not in consumed] | ||
remainder = [_ for _ in remainder if _ not in v] | ||
remainder = " ".join(remainder) | ||
consumed = " ".join(consumed) | ||
yield {"best_match": sent, | ||
"conf": min(score, 1), | ||
"entities": tagged_entities, | ||
"match_strategy": self.fuzzy_strategy.name, | ||
"utterance": sentence, | ||
"utterance_remainder": remainder, | ||
"utterance_consumed": consumed, | ||
"name": intent} | ||
|
||
def add_intent(self, name, lines): | ||
expanded = [] | ||
for l in lines: | ||
expanded += expand_template(l) | ||
if self.ignore_case: | ||
expanded = [l.lower() for l in expanded] | ||
self.registered_intents[name] = expanded | ||
|
||
def remove_intent(self, name): | ||
if name in self.registered_intents: | ||
del self.registered_intents[name] | ||
|
||
def add_entity(self, name, lines): | ||
expanded = [] | ||
for l in lines: | ||
expanded += expand_template(l) | ||
if self.ignore_case: | ||
expanded = [l.lower() for l in expanded] | ||
self.registered_entities[name] = expanded | ||
|
||
def remove_entity(self, name): | ||
if name in self.registered_entities: | ||
del self.registered_entities[name] | ||
|
||
def calc_intents(self, query): | ||
for intent in self.match_fuzzy(query): | ||
yield intent | ||
|
||
def calc_intent(self, query): | ||
return max( | ||
self.calc_intents(query), | ||
key=lambda x: x["conf"], | ||
default={"best_match": None, | ||
"conf": 0, | ||
"match_strategy": self.fuzzy_strategy, | ||
"utterance": query, | ||
"name": None} | ||
) | ||
from nebulento.container import IntentContainer | ||
from nebulento.domain_engine import DomainIntentContainer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import logging | ||
from nebulento.fuzz import MatchStrategy, match_one | ||
from nebulento.bracket_expansion import expand_template, expand_slots | ||
import quebra_frases | ||
|
||
LOG = logging.getLogger('nebulento') | ||
|
||
|
||
class IntentContainer: | ||
def __init__(self, fuzzy_strategy=MatchStrategy.DAMERAU_LEVENSHTEIN_SIMILARITY, | ||
ignore_case=True): | ||
self.fuzzy_strategy = fuzzy_strategy | ||
self.ignore_case = ignore_case | ||
self.registered_intents = {} | ||
self.registered_entities = {} | ||
|
||
@property | ||
def intent_names(self): | ||
return list(self.registered_intents) | ||
|
||
def match_entities(self, sentence): | ||
if self.ignore_case: | ||
sentence = sentence.lower() | ||
matches = {} | ||
for entity, samples in self.registered_entities.items(): | ||
chunked = quebra_frases.chunk(sentence, samples) | ||
matches[entity] = [s for s in samples if s in chunked] | ||
return matches | ||
|
||
def match_fuzzy(self, sentence): | ||
if self.ignore_case: | ||
sentence = sentence.lower() | ||
entities = self.match_entities(sentence) | ||
for intent, samples in self.registered_intents.items(): | ||
samples = self.registered_intents[intent] | ||
|
||
sent, score = match_one(sentence, samples, | ||
strategy=self.fuzzy_strategy) | ||
remainder = [ | ||
w for w in quebra_frases.word_tokenize(sentence) | ||
if w not in quebra_frases.word_tokenize(sent)] | ||
consumed = [ | ||
w for w in quebra_frases.word_tokenize(sentence) | ||
if w in quebra_frases.word_tokenize(sent)] | ||
|
||
tagged_entities = {} | ||
for ent, v in entities.items(): | ||
if v and any("{" + ent + "}" in s for s in samples): | ||
score = 0.25 + score * 0.75 | ||
tagged_entities[ent] = v | ||
consumed += [_ for _ in v if _ not in consumed] | ||
remainder = [_ for _ in remainder if _ not in v] | ||
remainder = " ".join(remainder) | ||
consumed = " ".join(consumed) | ||
yield {"best_match": sent, | ||
"conf": min(score, 1), | ||
"entities": tagged_entities, | ||
"match_strategy": self.fuzzy_strategy.name, | ||
"utterance": sentence, | ||
"utterance_remainder": remainder, | ||
"utterance_consumed": consumed, | ||
"name": intent} | ||
|
||
def add_intent(self, name, lines): | ||
expanded = [] | ||
for l in lines: | ||
expanded += expand_template(l) | ||
if self.ignore_case: | ||
expanded = [l.lower() for l in expanded] | ||
self.registered_intents[name] = expanded | ||
|
||
def remove_intent(self, name): | ||
if name in self.registered_intents: | ||
del self.registered_intents[name] | ||
|
||
def add_entity(self, name, lines): | ||
expanded = [] | ||
for l in lines: | ||
expanded += expand_template(l) | ||
if self.ignore_case: | ||
expanded = [l.lower() for l in expanded] | ||
self.registered_entities[name] = expanded | ||
|
||
def remove_entity(self, name): | ||
if name in self.registered_entities: | ||
del self.registered_entities[name] | ||
|
||
def calc_intents(self, query): | ||
for intent in self.match_fuzzy(query): | ||
yield intent | ||
|
||
def calc_intent(self, query): | ||
return max( | ||
self.calc_intents(query), | ||
key=lambda x: x["conf"], | ||
default={"best_match": None, | ||
"conf": 0, | ||
"match_strategy": self.fuzzy_strategy, | ||
"utterance": query, | ||
"name": None} | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
from collections import defaultdict | ||
from typing import Dict, List, Optional | ||
|
||
from nebulento.container import IntentContainer | ||
from nebulento.fuzz import MatchStrategy | ||
|
||
|
||
class DomainIntentContainer: | ||
""" | ||
A domain-aware intent recognition engine that organizes intents and entities | ||
into specific domains, providing flexible and hierarchical intent matching. | ||
""" | ||
|
||
def __init__(self, fuzzy_strategy=MatchStrategy.DAMERAU_LEVENSHTEIN_SIMILARITY, | ||
ignore_case=True): | ||
""" | ||
Initialize the DomainIntentContainer. | ||
Attributes: | ||
domain_engine (IntentContainer): A top-level intent container for cross-domain calculations. | ||
domains (Dict[str, IntentContainer]): A mapping of domain names to their respective intent containers. | ||
training_data (Dict[str, List[str]]): A mapping of domain names to their associated training samples. | ||
""" | ||
self.fuzzy_strategy = fuzzy_strategy | ||
self.ignore_case = ignore_case | ||
self.domain_engine = IntentContainer(fuzzy_strategy=fuzzy_strategy, ignore_case=ignore_case) | ||
self.domains: Dict[str, IntentContainer] = {} | ||
self.training_data: Dict[str, List[str]] = defaultdict(list) | ||
self.must_train = True | ||
|
||
def remove_domain(self, domain_name: str): | ||
""" | ||
Remove a domain and its associated intents and training data. | ||
Args: | ||
domain_name (str): The name of the domain to remove. | ||
""" | ||
if domain_name in self.training_data: | ||
self.training_data.pop(domain_name) | ||
if domain_name in self.domains: | ||
self.domains.pop(domain_name) | ||
if domain_name in self.domain_engine.intent_names: | ||
self.domain_engine.remove_intent(domain_name) | ||
|
||
def register_domain_intent(self, domain_name: str, intent_name: str, intent_samples: List[str]): | ||
""" | ||
Register an intent within a specific domain. | ||
Args: | ||
domain_name (str): The name of the domain. | ||
intent_name (str): The name of the intent to register. | ||
intent_samples (List[str]): A list of sample sentences for the intent. | ||
""" | ||
if domain_name not in self.domains: | ||
self.domains[domain_name] = IntentContainer(fuzzy_strategy=self.fuzzy_strategy, | ||
ignore_case=self.ignore_case) | ||
self.domains[domain_name].add_intent(intent_name, intent_samples) | ||
self.training_data[domain_name] += intent_samples | ||
self.must_train = True | ||
|
||
def remove_domain_intent(self, domain_name: str, intent_name: str): | ||
""" | ||
Remove a specific intent from a domain. | ||
Args: | ||
domain_name (str): The name of the domain. | ||
intent_name (str): The name of the intent to remove. | ||
""" | ||
if domain_name in self.domains: | ||
self.domains[domain_name].remove_intent(intent_name) | ||
|
||
def register_domain_entity(self, domain_name: str, entity_name: str, entity_samples: List[str]): | ||
""" | ||
Register an entity within a specific domain. | ||
Args: | ||
domain_name (str): The name of the domain. | ||
entity_name (str): The name of the entity to register. | ||
entity_samples (List[str]): A list of sample phrases for the entity. | ||
""" | ||
if domain_name not in self.domains: | ||
self.domains[domain_name] = IntentContainer(fuzzy_strategy=self.fuzzy_strategy, | ||
ignore_case=self.ignore_case) | ||
self.domains[domain_name].add_entity(entity_name, entity_samples) | ||
|
||
def remove_domain_entity(self, domain_name: str, entity_name: str): | ||
""" | ||
Remove a specific entity from a domain. | ||
Args: | ||
domain_name (str): The name of the domain. | ||
entity_name (str): The name of the entity to remove. | ||
""" | ||
if domain_name in self.domains: | ||
self.domains[domain_name].remove_entity(entity_name) | ||
|
||
def calc_domain(self, query: str): | ||
""" | ||
Calculate the best matching domain for a query. | ||
Args: | ||
query (str): The input query. | ||
Returns: | ||
MatchData: The best matching domain. | ||
""" | ||
return self.domain_engine.calc_intent(query) | ||
|
||
def calc_intent(self, query: str, domain: Optional[str] = None): | ||
""" | ||
Calculate the best matching intent for a query within a specific domain. | ||
Args: | ||
query (str): The input query. | ||
domain (Optional[str]): The domain to limit the search to. Defaults to None. | ||
Returns: | ||
MatchData: The best matching intent. | ||
""" | ||
domain: str = domain or self.domain_engine.calc_intent(query).name | ||
if domain in self.domains: | ||
return self.domains[domain].calc_intent(query) | ||
return {"best_match": None, | ||
"conf": 0, | ||
"match_strategy": self.fuzzy_strategy, | ||
"utterance": query, | ||
"name": None} |
Oops, something went wrong.