Skip to content

Commit

Permalink
Backport PR #54815 on branch 2.1.x (DOC: added docstring for `storage…
Browse files Browse the repository at this point in the history
…_options` in `read_html`) (#54852)

Backport PR #54815: DOC: added docstring for `storage_options` in `read_html`

Co-authored-by: Rajat Subhra Mukherjee <[email protected]>
  • Loading branch information
meeseeksmachine and rsm-23 authored Aug 29, 2023
1 parent 11424f8 commit f7f2057
Showing 1 changed file with 17 additions and 10 deletions.
27 changes: 17 additions & 10 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
AbstractMethodError,
EmptyDataError,
)
from pandas.util._decorators import doc
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import check_dtype_backend

Expand All @@ -32,6 +33,7 @@
from pandas.core.indexes.base import Index
from pandas.core.indexes.multi import MultiIndex
from pandas.core.series import Series
from pandas.core.shared_docs import _shared_docs

from pandas.io.common import (
file_exists,
Expand Down Expand Up @@ -363,13 +365,13 @@ def _parse_tfoot_tr(self, table):
"""
raise AbstractMethodError(self)

def _parse_tables(self, doc, match, attrs):
def _parse_tables(self, document, match, attrs):
"""
Return all tables from the parsed DOM.
Parameters
----------
doc : the DOM from which to parse the table element.
document : the DOM from which to parse the table element.
match : str or regular expression
The text to search for in the DOM tree.
Expand Down Expand Up @@ -594,9 +596,9 @@ def __init__(self, *args, **kwargs) -> None:

self._strainer = SoupStrainer("table")

def _parse_tables(self, doc, match, attrs):
def _parse_tables(self, document, match, attrs):
element_name = self._strainer.name
tables = doc.find_all(element_name, attrs=attrs)
tables = document.find_all(element_name, attrs=attrs)
if not tables:
raise ValueError("No tables found")

Expand Down Expand Up @@ -726,7 +728,7 @@ def _parse_td(self, row):
# <thead> or <tfoot> (see _parse_thead_tr).
return row.xpath("./td|./th")

def _parse_tables(self, doc, match, kwargs):
def _parse_tables(self, document, match, kwargs):
pattern = match.pattern

# 1. check all descendants for the given pattern and only search tables
Expand All @@ -738,7 +740,7 @@ def _parse_tables(self, doc, match, kwargs):
if kwargs:
xpath_expr += _build_xpath_expr(kwargs)

tables = doc.xpath(xpath_expr, namespaces=_re_namespace)
tables = document.xpath(xpath_expr, namespaces=_re_namespace)

tables = self._handle_hidden_tables(tables, "attrib")
if self.displayed_only:
Expand Down Expand Up @@ -1026,6 +1028,7 @@ def _parse(
return ret


@doc(storage_options=_shared_docs["storage_options"])
def read_html(
io: FilePath | ReadBuffer[str],
*,
Expand Down Expand Up @@ -1096,13 +1099,13 @@ def read_html(
passed to lxml or Beautiful Soup. However, these attributes must be
valid HTML table attributes to work correctly. For example, ::
attrs = {'id': 'table'}
attrs = {{'id': 'table'}}
is a valid attribute dictionary because the 'id' HTML tag attribute is
a valid HTML attribute for *any* HTML tag as per `this document
<https://html.spec.whatwg.org/multipage/dom.html#global-attributes>`__. ::
attrs = {'asdf': 'table'}
attrs = {{'asdf': 'table'}}
is *not* a valid attribute dictionary because 'asdf' is not a valid
HTML attribute even if it is a valid XML attribute. Valid HTML 4.01
Expand Down Expand Up @@ -1144,13 +1147,13 @@ def read_html(
displayed_only : bool, default True
Whether elements with "display: none" should be parsed.
extract_links : {None, "all", "header", "body", "footer"}
extract_links : {{None, "all", "header", "body", "footer"}}
Table elements in the specified section(s) with <a> tags will have their
href extracted.
.. versionadded:: 1.5.0
dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
Back-end data type applied to the resultant :class:`DataFrame`
(still experimental). Behaviour is as follows:
Expand All @@ -1161,6 +1164,10 @@ def read_html(
.. versionadded:: 2.0
{storage_options}
.. versionadded:: 2.1.0
Returns
-------
dfs
Expand Down

0 comments on commit f7f2057

Please sign in to comment.