Skip to content

Commit

Permalink
OPM
Browse files Browse the repository at this point in the history
  • Loading branch information
JarbasAl committed Dec 11, 2024
1 parent 9e29ed1 commit c820814
Show file tree
Hide file tree
Showing 5 changed files with 558 additions and 98 deletions.
99 changes: 2 additions & 97 deletions nebulento/__init__.py
Original file line number Diff line number Diff line change
@@ -1,97 +1,2 @@
import logging
from nebulento.fuzz import MatchStrategy, match_one
from nebulento.bracket_expansion import expand_template, expand_slots
import quebra_frases

LOG = logging.getLogger('nebulento')


class IntentContainer:
def __init__(self, fuzzy_strategy=MatchStrategy.DAMERAU_LEVENSHTEIN_SIMILARITY,
ignore_case=True):
self.fuzzy_strategy = fuzzy_strategy
self.ignore_case = ignore_case
self.registered_intents = {}
self.registered_entities = {}

def match_entities(self, sentence):
if self.ignore_case:
sentence = sentence.lower()
matches = {}
for entity, samples in self.registered_entities.items():
chunked = quebra_frases.chunk(sentence, samples)
matches[entity] = [s for s in samples if s in chunked]
return matches

def match_fuzzy(self, sentence):
if self.ignore_case:
sentence = sentence.lower()
entities = self.match_entities(sentence)
for intent, samples in self.registered_intents.items():
samples = self.registered_intents[intent]

sent, score = match_one(sentence, samples,
strategy=self.fuzzy_strategy)
remainder = [
w for w in quebra_frases.word_tokenize(sentence)
if w not in quebra_frases.word_tokenize(sent)]
consumed = [
w for w in quebra_frases.word_tokenize(sentence)
if w in quebra_frases.word_tokenize(sent)]

tagged_entities = {}
for ent, v in entities.items():
if v and any("{" + ent + "}" in s for s in samples):
score = 0.25 + score * 0.75
tagged_entities[ent] = v
consumed += [_ for _ in v if _ not in consumed]
remainder = [_ for _ in remainder if _ not in v]
remainder = " ".join(remainder)
consumed = " ".join(consumed)
yield {"best_match": sent,
"conf": min(score, 1),
"entities": tagged_entities,
"match_strategy": self.fuzzy_strategy.name,
"utterance": sentence,
"utterance_remainder": remainder,
"utterance_consumed": consumed,
"name": intent}

def add_intent(self, name, lines):
expanded = []
for l in lines:
expanded += expand_template(l)
if self.ignore_case:
expanded = [l.lower() for l in expanded]
self.registered_intents[name] = expanded

def remove_intent(self, name):
if name in self.registered_intents:
del self.registered_intents[name]

def add_entity(self, name, lines):
expanded = []
for l in lines:
expanded += expand_template(l)
if self.ignore_case:
expanded = [l.lower() for l in expanded]
self.registered_entities[name] = expanded

def remove_entity(self, name):
if name in self.registered_entities:
del self.registered_entities[name]

def calc_intents(self, query):
for intent in self.match_fuzzy(query):
yield intent

def calc_intent(self, query):
return max(
self.calc_intents(query),
key=lambda x: x["conf"],
default={"best_match": None,
"conf": 0,
"match_strategy": self.fuzzy_strategy,
"utterance": query,
"name": None}
)
from nebulento.container import IntentContainer
from nebulento.domain_engine import DomainIntentContainer
101 changes: 101 additions & 0 deletions nebulento/container.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import logging
from nebulento.fuzz import MatchStrategy, match_one
from nebulento.bracket_expansion import expand_template, expand_slots
import quebra_frases

LOG = logging.getLogger('nebulento')


class IntentContainer:
def __init__(self, fuzzy_strategy=MatchStrategy.DAMERAU_LEVENSHTEIN_SIMILARITY,
ignore_case=True):
self.fuzzy_strategy = fuzzy_strategy
self.ignore_case = ignore_case
self.registered_intents = {}
self.registered_entities = {}

@property
def intent_names(self):
return list(self.registered_intents)

def match_entities(self, sentence):
if self.ignore_case:
sentence = sentence.lower()
matches = {}
for entity, samples in self.registered_entities.items():
chunked = quebra_frases.chunk(sentence, samples)
matches[entity] = [s for s in samples if s in chunked]
return matches

def match_fuzzy(self, sentence):
if self.ignore_case:
sentence = sentence.lower()
entities = self.match_entities(sentence)
for intent, samples in self.registered_intents.items():
samples = self.registered_intents[intent]

sent, score = match_one(sentence, samples,
strategy=self.fuzzy_strategy)
remainder = [
w for w in quebra_frases.word_tokenize(sentence)
if w not in quebra_frases.word_tokenize(sent)]
consumed = [
w for w in quebra_frases.word_tokenize(sentence)
if w in quebra_frases.word_tokenize(sent)]

tagged_entities = {}
for ent, v in entities.items():
if v and any("{" + ent + "}" in s for s in samples):
score = 0.25 + score * 0.75
tagged_entities[ent] = v
consumed += [_ for _ in v if _ not in consumed]
remainder = [_ for _ in remainder if _ not in v]
remainder = " ".join(remainder)
consumed = " ".join(consumed)
yield {"best_match": sent,
"conf": min(score, 1),
"entities": tagged_entities,
"match_strategy": self.fuzzy_strategy.name,
"utterance": sentence,
"utterance_remainder": remainder,
"utterance_consumed": consumed,
"name": intent}

def add_intent(self, name, lines):
expanded = []
for l in lines:
expanded += expand_template(l)
if self.ignore_case:
expanded = [l.lower() for l in expanded]
self.registered_intents[name] = expanded

def remove_intent(self, name):
if name in self.registered_intents:
del self.registered_intents[name]

def add_entity(self, name, lines):
expanded = []
for l in lines:
expanded += expand_template(l)
if self.ignore_case:
expanded = [l.lower() for l in expanded]
self.registered_entities[name] = expanded

def remove_entity(self, name):
if name in self.registered_entities:
del self.registered_entities[name]

def calc_intents(self, query):
for intent in self.match_fuzzy(query):
yield intent

def calc_intent(self, query):
return max(
self.calc_intents(query),
key=lambda x: x["conf"],
default={"best_match": None,
"conf": 0,
"match_strategy": self.fuzzy_strategy,
"utterance": query,
"name": None}
)
127 changes: 127 additions & 0 deletions nebulento/domain_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from collections import defaultdict
from typing import Dict, List, Optional

from nebulento.container import IntentContainer
from nebulento.fuzz import MatchStrategy


class DomainIntentContainer:
"""
A domain-aware intent recognition engine that organizes intents and entities
into specific domains, providing flexible and hierarchical intent matching.
"""

def __init__(self, fuzzy_strategy=MatchStrategy.DAMERAU_LEVENSHTEIN_SIMILARITY,
ignore_case=True):
"""
Initialize the DomainIntentContainer.
Attributes:
domain_engine (IntentContainer): A top-level intent container for cross-domain calculations.
domains (Dict[str, IntentContainer]): A mapping of domain names to their respective intent containers.
training_data (Dict[str, List[str]]): A mapping of domain names to their associated training samples.
"""
self.fuzzy_strategy = fuzzy_strategy
self.ignore_case = ignore_case
self.domain_engine = IntentContainer(fuzzy_strategy=fuzzy_strategy, ignore_case=ignore_case)
self.domains: Dict[str, IntentContainer] = {}
self.training_data: Dict[str, List[str]] = defaultdict(list)
self.must_train = True

def remove_domain(self, domain_name: str):
"""
Remove a domain and its associated intents and training data.
Args:
domain_name (str): The name of the domain to remove.
"""
if domain_name in self.training_data:
self.training_data.pop(domain_name)
if domain_name in self.domains:
self.domains.pop(domain_name)
if domain_name in self.domain_engine.intent_names:
self.domain_engine.remove_intent(domain_name)

def register_domain_intent(self, domain_name: str, intent_name: str, intent_samples: List[str]):
"""
Register an intent within a specific domain.
Args:
domain_name (str): The name of the domain.
intent_name (str): The name of the intent to register.
intent_samples (List[str]): A list of sample sentences for the intent.
"""
if domain_name not in self.domains:
self.domains[domain_name] = IntentContainer(fuzzy_strategy=self.fuzzy_strategy,
ignore_case=self.ignore_case)
self.domains[domain_name].add_intent(intent_name, intent_samples)
self.training_data[domain_name] += intent_samples
self.must_train = True

def remove_domain_intent(self, domain_name: str, intent_name: str):
"""
Remove a specific intent from a domain.
Args:
domain_name (str): The name of the domain.
intent_name (str): The name of the intent to remove.
"""
if domain_name in self.domains:
self.domains[domain_name].remove_intent(intent_name)

def register_domain_entity(self, domain_name: str, entity_name: str, entity_samples: List[str]):
"""
Register an entity within a specific domain.
Args:
domain_name (str): The name of the domain.
entity_name (str): The name of the entity to register.
entity_samples (List[str]): A list of sample phrases for the entity.
"""
if domain_name not in self.domains:
self.domains[domain_name] = IntentContainer(fuzzy_strategy=self.fuzzy_strategy,
ignore_case=self.ignore_case)
self.domains[domain_name].add_entity(entity_name, entity_samples)

def remove_domain_entity(self, domain_name: str, entity_name: str):
"""
Remove a specific entity from a domain.
Args:
domain_name (str): The name of the domain.
entity_name (str): The name of the entity to remove.
"""
if domain_name in self.domains:
self.domains[domain_name].remove_entity(entity_name)

def calc_domain(self, query: str):
"""
Calculate the best matching domain for a query.
Args:
query (str): The input query.
Returns:
MatchData: The best matching domain.
"""
return self.domain_engine.calc_intent(query)

def calc_intent(self, query: str, domain: Optional[str] = None):
"""
Calculate the best matching intent for a query within a specific domain.
Args:
query (str): The input query.
domain (Optional[str]): The domain to limit the search to. Defaults to None.
Returns:
MatchData: The best matching intent.
"""
domain: str = domain or self.domain_engine.calc_intent(query).name
if domain in self.domains:
return self.domains[domain].calc_intent(query)
return {"best_match": None,
"conf": 0,
"match_strategy": self.fuzzy_strategy,
"utterance": query,
"name": None}
Loading

0 comments on commit c820814

Please sign in to comment.