Skip to content

Commit

Permalink
docstring changes for vdataframe/text
Browse files Browse the repository at this point in the history
  • Loading branch information
abhsharma2 committed Nov 6, 2023
1 parent 12ba3a9 commit 345fdbc
Showing 1 changed file with 307 additions and 0 deletions.
307 changes: 307 additions & 0 deletions verticapy/core/vdataframe/_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,87 @@ def regexp(
-------
vDataFrame
self
Examples
---------
Let's begin by importing `VerticaPy`.
.. ipython:: python
import verticapy as vp
Let's generate a small dataset using the following data:
.. ipython:: python
data = vp.vDataFrame(
{
"rollno": ['1', '2', '3', '4'],
"subjects": ['English, Math', 'English, Math, Computer', 'Math, Computer, Science', 'Math, Science'],
})
Let's retrieve the second subject.
.. code-block:: python
data.regexp(column = "subjects",
pattern = "[^,]+",
method = "substr",
occurrence = 2,
name = "subject_2").select(["subjects",
"subject_2"])
.. ipython:: python
:suppress:
res = data.regexp(column = "subjects",
pattern = "[^,]+",
method = "substr",
occurrence = 2,
name = "subject_2").select(["subjects",
"subject_2"])
html_file = open("figures/core_vDataFrame_text_regex1.html", "w")
html_file.write(res._repr_html_())
html_file.close()
.. raw:: html
:file: ../../../figures/core_vDataFrame_text_regex1.html
Let's count the number of subjects.
.. code-block:: python
data.regexp(column = "subjects",
pattern = ",",
method = "count",
name = "nb_subjects")
data["nb_subjects"].add(1)
data.select(["subjects", "nb_subjects"])
.. ipython:: python
:suppress:
data.regexp(column = "subjects",
pattern = ",",
method = "count",
name = "nb_subjects")
data["nb_subjects"].add(1)
res = data.select(["subjects", "nb_subjects"])
html_file = open("figures/core_vDataFrame_text_regex2.html", "w")
html_file.write(res._repr_html_())
html_file.close()
.. raw:: html
:file: ../../../figures/core_vDataFrame_text_regex2.html
.. seealso::
| :py:mod:`verticapy.vDataFrame.eval`
"""
column = self.format_colnames(column)
pattern_str = pattern.replace("'", "''")
Expand Down Expand Up @@ -135,6 +216,52 @@ def str_contains(self, pat: str) -> "vDataFrame":
-------
vDataFrame
self._parent
Examples
---------
Let's begin by importing `VerticaPy`.
.. ipython:: python
import verticapy as vp
Let's generate a small dataset using the following data:
.. ipython:: python
data = vp.vDataFrame(
{
"rollno": ['1', '2', '3', '4'],
"subjects": ['English, Math', 'English, Math, Computer', 'Math, Computer, Science', 'Math, Science'],
})
Let's retrieve the second subject.
.. code-block:: python
data["subjects"].str_contains(pat = "English").select(["rollno",
"subjects as has_english"])
.. ipython:: python
:suppress:
res = data["subjects"].str_contains(pat = "English").select(["rollno",
"subjects as has_english"])
html_file = open("figures/core_vDataFrame_text_str_contains.html", "w")
html_file.write(res._repr_html_())
html_file.close()
.. raw:: html
:file: ../../../figures/core_vDataFrame_text_str_contains.html
.. seealso::
| :py:mod:`verticapy.vDataFrame.str_count`
| :py:mod:`verticapy.vDataFrame.str_extract`
| :py:mod:`verticapy.vDataFrame.str_replace`
| :py:mod:`verticapy.vDataFrame.str_slice`
"""
pat = pat.replace("'", "''")
return self.apply(func=f"REGEXP_COUNT({{}}, '{pat}') > 0")
Expand All @@ -155,6 +282,52 @@ def str_count(self, pat: str) -> "vDataFrame":
-------
vDataFrame
self._parent
Examples
---------
Let's begin by importing `VerticaPy`.
.. ipython:: python
import verticapy as vp
Let's generate a small dataset using the following data:
.. ipython:: python
data = vp.vDataFrame(
{
"rollno": ['1', '2', '3', '4'],
"subjects": ['English, Math', 'English, Math, Computer', 'Math, Computer, Science', 'Math, Science'],
})
Let's count number of times "English" appears in "subjects" vDataColumn.
.. code-block:: python
data["subjects"].str_count(pat = "English").select(["rollno",
"subjects as english_count"])
.. ipython:: python
:suppress:
res = data["subjects"].str_count(pat = "English").select(["rollno",
"subjects as english_count"])
html_file = open("figures/core_vDataFrame_text_str_count.html", "w")
html_file.write(res._repr_html_())
html_file.close()
.. raw:: html
:file: ../../../figures/core_vDataFrame_text_str_count.html
.. seealso::
| :py:mod:`verticapy.vDataFrame.str_contains`
| :py:mod:`verticapy.vDataFrame.str_extract`
| :py:mod:`verticapy.vDataFrame.str_replace`
| :py:mod:`verticapy.vDataFrame.str_slice`
"""
pat = pat.replace("'", "''")
return self.apply(func=f"REGEXP_COUNT({{}}, '{pat}')")
Expand All @@ -174,6 +347,50 @@ def str_extract(self, pat: str) -> "vDataFrame":
-------
vDataFrame
self._parent
Examples
---------
Let's begin by importing `VerticaPy`.
.. ipython:: python
import verticapy as vp
Let's generate a small dataset using the following data:
.. ipython:: python
data = vp.vDataFrame(
{
"name": ['Mr. Steve Smith', 'Mr. Charlie Dickens', 'Mrs. Helen Ross', 'Dr. Jack Smith']
}
)
Let's extract the name prefix.
.. code-block:: python
data["name"].str_extract(pat = "([A-Za-z])+\.")
.. ipython:: python
:suppress:
res = data["name"].str_extract(pat = "([A-Za-z])+\.")
html_file = open("figures/core_vDataFrame_text_str_extract.html", "w")
html_file.write(res._repr_html_())
html_file.close()
.. raw:: html
:file: ../../../figures/core_vDataFrame_text_str_extract.html
.. seealso::
| :py:mod:`verticapy.vDataFrame.str_contains`
| :py:mod:`verticapy.vDataFrame.str_count`
| :py:mod:`verticapy.vDataFrame.str_replace`
| :py:mod:`verticapy.vDataFrame.str_slice`
"""
pat = pat.replace("'", "''")
return self.apply(func=f"REGEXP_SUBSTR({{}}, '{pat}')")
Expand All @@ -196,6 +413,52 @@ def str_replace(self, to_replace: str, value: Optional[str] = None) -> "vDataFra
-------
vDataFrame
self._parent
Examples
---------
Let's begin by importing `VerticaPy`.
.. ipython:: python
import verticapy as vp
Let's generate a small dataset using the following data:
.. ipython:: python
data = vp.vDataFrame(
{
"name": ['Mr. Steve Smith', 'Mr. Charlie Dickens', 'Mrs. Helen Ross', 'Dr. Jack Smith']
}
)
Let's replace the name prefix with static text "[Name_Prefix]".
.. code-block:: python
data["name"].str_replace(to_replace = "([A-Za-z])+\.",
value = "[Name_Prefix]")
.. ipython:: python
:suppress:
res = data["name"].str_replace(to_replace = "([A-Za-z])+\.",
value = "[Name_Prefix]")
html_file = open("figures/core_vDataFrame_text_str_replace.html", "w")
html_file.write(res._repr_html_())
html_file.close()
.. raw:: html
:file: ../../../figures/core_vDataFrame_text_str_replace.html
.. seealso::
| :py:mod:`verticapy.vDataFrame.str_contains`
| :py:mod:`verticapy.vDataFrame.str_count`
| :py:mod:`verticapy.vDataFrame.str_extract`
| :py:mod:`verticapy.vDataFrame.str_slice`
"""
to_replace = to_replace.replace("'", "''")
value = value.replace("'", "''")
Expand All @@ -217,5 +480,49 @@ def str_slice(self, start: int, step: int) -> "vDataFrame":
-------
vDataFrame
self._parent
Examples
---------
Let's begin by importing `VerticaPy`.
.. ipython:: python
import verticapy as vp
Let's generate a small dataset using the following data:
.. ipython:: python
data = vp.vDataFrame(
{
"name": ['Mr. Steve Smith', 'Mr. Charlie Dickens', 'Mrs. Helen Ross', 'Dr. Jack Smith']
}
)
Let's extract the first 3 alphabets of name.
.. code-block:: python
data["name"].str_slice(start = 0, step =3)
.. ipython:: python
:suppress:
res = data["name"].str_slice(start = 0, step =3)
html_file = open("figures/core_vDataFrame_text_str_slice.html", "w")
html_file.write(res._repr_html_())
html_file.close()
.. raw:: html
:file: ../../../figures/core_vDataFrame_text_str_slice.html
.. seealso::
| :py:mod:`verticapy.vDataFrame.str_contains`
| :py:mod:`verticapy.vDataFrame.str_count`
| :py:mod:`verticapy.vDataFrame.str_replace`
| :py:mod:`verticapy.vDataFrame.str_extract`
"""
return self.apply(func=f"SUBSTR({{}}, {start}, {step})")

0 comments on commit 345fdbc

Please sign in to comment.