docstring changes for vdataframe/text

vertica · Nov 6, 2023 · 345fdbc · 345fdbc
1 parent 12ba3a9
commit 345fdbc
Showing 1 changed file with 307 additions and 0 deletions.
diff --git a/verticapy/core/vdataframe/_text.py b/verticapy/core/vdataframe/_text.py
@@ -101,6 +101,87 @@ def regexp(
         -------
         vDataFrame
             self
+
+        Examples
+        ---------
+
+        Let's begin by importing `VerticaPy`.
+
+        .. ipython:: python
+
+            import verticapy as vp
+
+
+        Let's generate a small dataset using the following data:
+
+        .. ipython:: python
+
+            data = vp.vDataFrame(
+                            {
+                                "rollno": ['1', '2', '3', '4'],
+                                "subjects": ['English, Math', 'English, Math, Computer', 'Math, Computer, Science', 'Math, Science'],
+                            })
+
+        Let's retrieve the second subject.
+
+        .. code-block:: python
+
+            data.regexp(column = "subjects",
+                                 pattern = "[^,]+",
+                                 method = "substr",
+                                 occurrence = 2,
+                                 name = "subject_2").select(["subjects",
+                                                          "subject_2"])
+
+
+        .. ipython:: python
+            :suppress:
+
+            res = data.regexp(column = "subjects",
+                                 pattern = "[^,]+",
+                                 method = "substr",
+                                 occurrence = 2,
+                                 name = "subject_2").select(["subjects",
+                                                          "subject_2"])
+            html_file = open("figures/core_vDataFrame_text_regex1.html", "w")
+            html_file.write(res._repr_html_())
+            html_file.close()
+
+        .. raw:: html
+            :file: ../../../figures/core_vDataFrame_text_regex1.html
+
+        Let's count the number of subjects.
+
+        .. code-block:: python
+
+            data.regexp(column = "subjects",
+                                 pattern = ",",
+                                 method = "count",
+                                 name = "nb_subjects")
+            data["nb_subjects"].add(1)
+            data.select(["subjects", "nb_subjects"])
+
+
+        .. ipython:: python
+            :suppress:
+
+            data.regexp(column = "subjects",
+                                 pattern = ",",
+                                 method = "count",
+                                 name = "nb_subjects")
+            data["nb_subjects"].add(1)
+            res = data.select(["subjects", "nb_subjects"])
+            html_file = open("figures/core_vDataFrame_text_regex2.html", "w")
+            html_file.write(res._repr_html_())
+            html_file.close()
+
+        .. raw:: html
+            :file: ../../../figures/core_vDataFrame_text_regex2.html
+
+        .. seealso::
+
+            | :py:mod:`verticapy.vDataFrame.eval`
+
         """
         column = self.format_colnames(column)
         pattern_str = pattern.replace("'", "''")
@@ -135,6 +216,52 @@ def str_contains(self, pat: str) -> "vDataFrame":
         -------
         vDataFrame
             self._parent
+
+        Examples
+        ---------
+
+        Let's begin by importing `VerticaPy`.
+
+        .. ipython:: python
+
+            import verticapy as vp
+
+
+        Let's generate a small dataset using the following data:
+
+        .. ipython:: python
+
+            data = vp.vDataFrame(
+                            {
+                                "rollno": ['1', '2', '3', '4'],
+                                "subjects": ['English, Math', 'English, Math, Computer', 'Math, Computer, Science', 'Math, Science'],
+                            })
+
+        Let's retrieve the second subject.
+
+        .. code-block:: python
+
+            data["subjects"].str_contains(pat = "English").select(["rollno",
+                                              "subjects as has_english"])
+
+        .. ipython:: python
+            :suppress:
+
+            res = data["subjects"].str_contains(pat = "English").select(["rollno",
+                                              "subjects as has_english"])
+            html_file = open("figures/core_vDataFrame_text_str_contains.html", "w")
+            html_file.write(res._repr_html_())
+            html_file.close()
+
+        .. raw:: html
+            :file: ../../../figures/core_vDataFrame_text_str_contains.html
+
+        .. seealso::
+
+            | :py:mod:`verticapy.vDataFrame.str_count`
+            | :py:mod:`verticapy.vDataFrame.str_extract`
+            | :py:mod:`verticapy.vDataFrame.str_replace`
+            | :py:mod:`verticapy.vDataFrame.str_slice`
         """
         pat = pat.replace("'", "''")
         return self.apply(func=f"REGEXP_COUNT({{}}, '{pat}') > 0")
@@ -155,6 +282,52 @@ def str_count(self, pat: str) -> "vDataFrame":
         -------
         vDataFrame
             self._parent
+
+        Examples
+        ---------
+
+        Let's begin by importing `VerticaPy`.
+
+        .. ipython:: python
+
+            import verticapy as vp
+
+
+        Let's generate a small dataset using the following data:
+
+        .. ipython:: python
+
+            data = vp.vDataFrame(
+                            {
+                                "rollno": ['1', '2', '3', '4'],
+                                "subjects": ['English, Math', 'English, Math, Computer', 'Math, Computer, Science', 'Math, Science'],
+                            })
+
+        Let's count number of times "English" appears in "subjects" vDataColumn.
+
+        .. code-block:: python
+
+            data["subjects"].str_count(pat = "English").select(["rollno",
+                                              "subjects as english_count"])
+
+        .. ipython:: python
+            :suppress:
+
+            res = data["subjects"].str_count(pat = "English").select(["rollno",
+                                              "subjects as english_count"])
+            html_file = open("figures/core_vDataFrame_text_str_count.html", "w")
+            html_file.write(res._repr_html_())
+            html_file.close()
+
+        .. raw:: html
+            :file: ../../../figures/core_vDataFrame_text_str_count.html
+
+        .. seealso::
+
+            | :py:mod:`verticapy.vDataFrame.str_contains`
+            | :py:mod:`verticapy.vDataFrame.str_extract`
+            | :py:mod:`verticapy.vDataFrame.str_replace`
+            | :py:mod:`verticapy.vDataFrame.str_slice`
         """
         pat = pat.replace("'", "''")
         return self.apply(func=f"REGEXP_COUNT({{}}, '{pat}')")
@@ -174,6 +347,50 @@ def str_extract(self, pat: str) -> "vDataFrame":
         -------
         vDataFrame
             self._parent
+
+        Examples
+        ---------
+
+        Let's begin by importing `VerticaPy`.
+
+        .. ipython:: python
+
+            import verticapy as vp
+
+
+        Let's generate a small dataset using the following data:
+
+        .. ipython:: python
+
+            data = vp.vDataFrame(
+                            {
+                                "name": ['Mr. Steve Smith', 'Mr. Charlie Dickens', 'Mrs. Helen Ross', 'Dr. Jack Smith']
+                            }
+                        )
+
+        Let's extract the name prefix.
+
+        .. code-block:: python
+
+            data["name"].str_extract(pat = "([A-Za-z])+\.")
+
+        .. ipython:: python
+            :suppress:
+
+            res = data["name"].str_extract(pat = "([A-Za-z])+\.")
+            html_file = open("figures/core_vDataFrame_text_str_extract.html", "w")
+            html_file.write(res._repr_html_())
+            html_file.close()
+
+        .. raw:: html
+            :file: ../../../figures/core_vDataFrame_text_str_extract.html
+
+        .. seealso::
+
+            | :py:mod:`verticapy.vDataFrame.str_contains`
+            | :py:mod:`verticapy.vDataFrame.str_count`
+            | :py:mod:`verticapy.vDataFrame.str_replace`
+            | :py:mod:`verticapy.vDataFrame.str_slice`
         """
         pat = pat.replace("'", "''")
         return self.apply(func=f"REGEXP_SUBSTR({{}}, '{pat}')")
@@ -196,6 +413,52 @@ def str_replace(self, to_replace: str, value: Optional[str] = None) -> "vDataFra
         -------
         vDataFrame
             self._parent
+
+        Examples
+        ---------
+
+        Let's begin by importing `VerticaPy`.
+
+        .. ipython:: python
+
+            import verticapy as vp
+
+
+        Let's generate a small dataset using the following data:
+
+        .. ipython:: python
+
+            data = vp.vDataFrame(
+                            {
+                                "name": ['Mr. Steve Smith', 'Mr. Charlie Dickens', 'Mrs. Helen Ross', 'Dr. Jack Smith']
+                            }
+                        )
+
+        Let's replace the name prefix with static text "[Name_Prefix]".
+
+        .. code-block:: python
+
+            data["name"].str_replace(to_replace  = "([A-Za-z])+\.",
+                                    value = "[Name_Prefix]")
+
+        .. ipython:: python
+            :suppress:
+
+            res = data["name"].str_replace(to_replace  = "([A-Za-z])+\.",
+                                           value = "[Name_Prefix]")
+            html_file = open("figures/core_vDataFrame_text_str_replace.html", "w")
+            html_file.write(res._repr_html_())
+            html_file.close()
+
+        .. raw:: html
+            :file: ../../../figures/core_vDataFrame_text_str_replace.html
+
+        .. seealso::
+
+            | :py:mod:`verticapy.vDataFrame.str_contains`
+            | :py:mod:`verticapy.vDataFrame.str_count`
+            | :py:mod:`verticapy.vDataFrame.str_extract`
+            | :py:mod:`verticapy.vDataFrame.str_slice`
         """
         to_replace = to_replace.replace("'", "''")
         value = value.replace("'", "''")
@@ -217,5 +480,49 @@ def str_slice(self, start: int, step: int) -> "vDataFrame":
         -------
         vDataFrame
             self._parent
+
+        Examples
+        ---------
+
+        Let's begin by importing `VerticaPy`.
+
+        .. ipython:: python
+
+            import verticapy as vp
+
+
+        Let's generate a small dataset using the following data:
+
+        .. ipython:: python
+
+            data = vp.vDataFrame(
+                            {
+                                "name": ['Mr. Steve Smith', 'Mr. Charlie Dickens', 'Mrs. Helen Ross', 'Dr. Jack Smith']
+                            }
+                        )
+
+        Let's extract the first 3 alphabets of name.
+
+        .. code-block:: python
+
+            data["name"].str_slice(start = 0, step =3)
+
+        .. ipython:: python
+            :suppress:
+
+            res = data["name"].str_slice(start = 0, step =3)
+            html_file = open("figures/core_vDataFrame_text_str_slice.html", "w")
+            html_file.write(res._repr_html_())
+            html_file.close()
+
+        .. raw:: html
+            :file: ../../../figures/core_vDataFrame_text_str_slice.html
+
+        .. seealso::
+
+            | :py:mod:`verticapy.vDataFrame.str_contains`
+            | :py:mod:`verticapy.vDataFrame.str_count`
+            | :py:mod:`verticapy.vDataFrame.str_replace`
+            | :py:mod:`verticapy.vDataFrame.str_extract`
         """
         return self.apply(func=f"SUBSTR({{}}, {start}, {step})")