OPM

TigreGotico · Dec 11, 2024 · c820814 · c820814
1 parent 9e29ed1
commit c820814
Show file tree

Hide file tree

Showing 5 changed files with 558 additions and 98 deletions.
diff --git a/nebulento/__init__.py b/nebulento/__init__.py
@@ -1,97 +1,2 @@
-import logging
-from nebulento.fuzz import MatchStrategy, match_one
-from nebulento.bracket_expansion import expand_template, expand_slots
-import quebra_frases
-
-LOG = logging.getLogger('nebulento')
-
-
-class IntentContainer:
-    def __init__(self, fuzzy_strategy=MatchStrategy.DAMERAU_LEVENSHTEIN_SIMILARITY,
-                 ignore_case=True):
-        self.fuzzy_strategy = fuzzy_strategy
-        self.ignore_case = ignore_case
-        self.registered_intents = {}
-        self.registered_entities = {}
-
-    def match_entities(self, sentence):
-        if self.ignore_case:
-            sentence = sentence.lower()
-        matches = {}
-        for entity, samples in self.registered_entities.items():
-            chunked = quebra_frases.chunk(sentence, samples)
-            matches[entity] = [s for s in samples if s in chunked]
-        return matches
-
-    def match_fuzzy(self, sentence):
-        if self.ignore_case:
-            sentence = sentence.lower()
-        entities = self.match_entities(sentence)
-        for intent, samples in self.registered_intents.items():
-            samples = self.registered_intents[intent]
-
-            sent, score = match_one(sentence, samples,
-                                    strategy=self.fuzzy_strategy)
-            remainder = [
-                w for w in quebra_frases.word_tokenize(sentence)
-                if w not in quebra_frases.word_tokenize(sent)]
-            consumed = [
-                w for w in quebra_frases.word_tokenize(sentence)
-                if w in quebra_frases.word_tokenize(sent)]
-
-            tagged_entities = {}
-            for ent, v in entities.items():
-                if v and any("{" + ent + "}" in s for s in samples):
-                    score = 0.25 + score * 0.75
-                    tagged_entities[ent] = v
-                    consumed += [_ for _ in v if _ not in consumed]
-                    remainder = [_ for _ in remainder if _ not in v]
-            remainder = " ".join(remainder)
-            consumed = " ".join(consumed)
-            yield {"best_match": sent,
-                   "conf": min(score, 1),
-                   "entities": tagged_entities,
-                   "match_strategy": self.fuzzy_strategy.name,
-                   "utterance": sentence,
-                   "utterance_remainder": remainder,
-                   "utterance_consumed": consumed,
-                   "name": intent}
-
-    def add_intent(self, name, lines):
-        expanded = []
-        for l in lines:
-            expanded += expand_template(l)
-        if self.ignore_case:
-            expanded = [l.lower() for l in expanded]
-        self.registered_intents[name] = expanded
-
-    def remove_intent(self, name):
-        if name in self.registered_intents:
-            del self.registered_intents[name]
-
-    def add_entity(self, name, lines):
-        expanded = []
-        for l in lines:
-            expanded += expand_template(l)
-        if self.ignore_case:
-            expanded = [l.lower() for l in expanded]
-        self.registered_entities[name] = expanded
-
-    def remove_entity(self, name):
-        if name in self.registered_entities:
-            del self.registered_entities[name]
-
-    def calc_intents(self, query):
-        for intent in self.match_fuzzy(query):
-            yield intent
-
-    def calc_intent(self, query):
-        return max(
-            self.calc_intents(query),
-            key=lambda x: x["conf"],
-            default={"best_match": None,
-                     "conf": 0,
-                     "match_strategy": self.fuzzy_strategy,
-                     "utterance": query,
-                     "name": None}
-        )
+from nebulento.container import IntentContainer
+from nebulento.domain_engine import DomainIntentContainer
diff --git a/nebulento/container.py b/nebulento/container.py
@@ -0,0 +1,101 @@
+import logging
+from nebulento.fuzz import MatchStrategy, match_one
+from nebulento.bracket_expansion import expand_template, expand_slots
+import quebra_frases
+
+LOG = logging.getLogger('nebulento')
+
+
+class IntentContainer:
+    def __init__(self, fuzzy_strategy=MatchStrategy.DAMERAU_LEVENSHTEIN_SIMILARITY,
+                 ignore_case=True):
+        self.fuzzy_strategy = fuzzy_strategy
+        self.ignore_case = ignore_case
+        self.registered_intents = {}
+        self.registered_entities = {}
+
+    @property
+    def intent_names(self):
+        return list(self.registered_intents)
+
+    def match_entities(self, sentence):
+        if self.ignore_case:
+            sentence = sentence.lower()
+        matches = {}
+        for entity, samples in self.registered_entities.items():
+            chunked = quebra_frases.chunk(sentence, samples)
+            matches[entity] = [s for s in samples if s in chunked]
+        return matches
+
+    def match_fuzzy(self, sentence):
+        if self.ignore_case:
+            sentence = sentence.lower()
+        entities = self.match_entities(sentence)
+        for intent, samples in self.registered_intents.items():
+            samples = self.registered_intents[intent]
+
+            sent, score = match_one(sentence, samples,
+                                    strategy=self.fuzzy_strategy)
+            remainder = [
+                w for w in quebra_frases.word_tokenize(sentence)
+                if w not in quebra_frases.word_tokenize(sent)]
+            consumed = [
+                w for w in quebra_frases.word_tokenize(sentence)
+                if w in quebra_frases.word_tokenize(sent)]
+
+            tagged_entities = {}
+            for ent, v in entities.items():
+                if v and any("{" + ent + "}" in s for s in samples):
+                    score = 0.25 + score * 0.75
+                    tagged_entities[ent] = v
+                    consumed += [_ for _ in v if _ not in consumed]
+                    remainder = [_ for _ in remainder if _ not in v]
+            remainder = " ".join(remainder)
+            consumed = " ".join(consumed)
+            yield {"best_match": sent,
+                   "conf": min(score, 1),
+                   "entities": tagged_entities,
+                   "match_strategy": self.fuzzy_strategy.name,
+                   "utterance": sentence,
+                   "utterance_remainder": remainder,
+                   "utterance_consumed": consumed,
+                   "name": intent}
+
+    def add_intent(self, name, lines):
+        expanded = []
+        for l in lines:
+            expanded += expand_template(l)
+        if self.ignore_case:
+            expanded = [l.lower() for l in expanded]
+        self.registered_intents[name] = expanded
+
+    def remove_intent(self, name):
+        if name in self.registered_intents:
+            del self.registered_intents[name]
+
+    def add_entity(self, name, lines):
+        expanded = []
+        for l in lines:
+            expanded += expand_template(l)
+        if self.ignore_case:
+            expanded = [l.lower() for l in expanded]
+        self.registered_entities[name] = expanded
+
+    def remove_entity(self, name):
+        if name in self.registered_entities:
+            del self.registered_entities[name]
+
+    def calc_intents(self, query):
+        for intent in self.match_fuzzy(query):
+            yield intent
+
+    def calc_intent(self, query):
+        return max(
+            self.calc_intents(query),
+            key=lambda x: x["conf"],
+            default={"best_match": None,
+                     "conf": 0,
+                     "match_strategy": self.fuzzy_strategy,
+                     "utterance": query,
+                     "name": None}
+        )
diff --git a/nebulento/domain_engine.py b/nebulento/domain_engine.py
@@ -0,0 +1,127 @@
+from collections import defaultdict
+from typing import Dict, List, Optional
+
+from nebulento.container import IntentContainer
+from nebulento.fuzz import MatchStrategy
+
+
+class DomainIntentContainer:
+    """
+    A domain-aware intent recognition engine that organizes intents and entities
+    into specific domains, providing flexible and hierarchical intent matching.
+    """
+
+    def __init__(self, fuzzy_strategy=MatchStrategy.DAMERAU_LEVENSHTEIN_SIMILARITY,
+                 ignore_case=True):
+        """
+        Initialize the DomainIntentContainer.
+
+        Attributes:
+            domain_engine (IntentContainer): A top-level intent container for cross-domain calculations.
+            domains (Dict[str, IntentContainer]): A mapping of domain names to their respective intent containers.
+            training_data (Dict[str, List[str]]): A mapping of domain names to their associated training samples.
+        """
+        self.fuzzy_strategy = fuzzy_strategy
+        self.ignore_case = ignore_case
+        self.domain_engine = IntentContainer(fuzzy_strategy=fuzzy_strategy, ignore_case=ignore_case)
+        self.domains: Dict[str, IntentContainer] = {}
+        self.training_data: Dict[str, List[str]] = defaultdict(list)
+        self.must_train = True
+
+    def remove_domain(self, domain_name: str):
+        """
+        Remove a domain and its associated intents and training data.
+
+        Args:
+            domain_name (str): The name of the domain to remove.
+        """
+        if domain_name in self.training_data:
+            self.training_data.pop(domain_name)
+        if domain_name in self.domains:
+            self.domains.pop(domain_name)
+        if domain_name in self.domain_engine.intent_names:
+            self.domain_engine.remove_intent(domain_name)
+
+    def register_domain_intent(self, domain_name: str, intent_name: str, intent_samples: List[str]):
+        """
+        Register an intent within a specific domain.
+
+        Args:
+            domain_name (str): The name of the domain.
+            intent_name (str): The name of the intent to register.
+            intent_samples (List[str]): A list of sample sentences for the intent.
+        """
+        if domain_name not in self.domains:
+            self.domains[domain_name] = IntentContainer(fuzzy_strategy=self.fuzzy_strategy,
+                                                        ignore_case=self.ignore_case)
+        self.domains[domain_name].add_intent(intent_name, intent_samples)
+        self.training_data[domain_name] += intent_samples
+        self.must_train = True
+
+    def remove_domain_intent(self, domain_name: str, intent_name: str):
+        """
+        Remove a specific intent from a domain.
+
+        Args:
+            domain_name (str): The name of the domain.
+            intent_name (str): The name of the intent to remove.
+        """
+        if domain_name in self.domains:
+            self.domains[domain_name].remove_intent(intent_name)
+
+    def register_domain_entity(self, domain_name: str, entity_name: str, entity_samples: List[str]):
+        """
+        Register an entity within a specific domain.
+
+        Args:
+            domain_name (str): The name of the domain.
+            entity_name (str): The name of the entity to register.
+            entity_samples (List[str]): A list of sample phrases for the entity.
+        """
+        if domain_name not in self.domains:
+            self.domains[domain_name] = IntentContainer(fuzzy_strategy=self.fuzzy_strategy,
+                                                        ignore_case=self.ignore_case)
+        self.domains[domain_name].add_entity(entity_name, entity_samples)
+
+    def remove_domain_entity(self, domain_name: str, entity_name: str):
+        """
+        Remove a specific entity from a domain.
+
+        Args:
+            domain_name (str): The name of the domain.
+            entity_name (str): The name of the entity to remove.
+        """
+        if domain_name in self.domains:
+            self.domains[domain_name].remove_entity(entity_name)
+
+    def calc_domain(self, query: str):
+        """
+        Calculate the best matching domain for a query.
+
+        Args:
+            query (str): The input query.
+
+        Returns:
+            MatchData: The best matching domain.
+        """
+        return self.domain_engine.calc_intent(query)
+
+    def calc_intent(self, query: str, domain: Optional[str] = None):
+        """
+        Calculate the best matching intent for a query within a specific domain.
+
+        Args:
+            query (str): The input query.
+            domain (Optional[str]): The domain to limit the search to. Defaults to None.
+
+        Returns:
+            MatchData: The best matching intent.
+        """
+        domain: str = domain or self.domain_engine.calc_intent(query).name
+        if domain in self.domains:
+            return self.domains[domain].calc_intent(query)
+        return {"best_match": None,
+                "conf": 0,
+                "match_strategy": self.fuzzy_strategy,
+                "utterance": query,
+                "name": None}