Skip to content

Commit

Permalink
Fix parsing on github readme dataset table
Browse files Browse the repository at this point in the history
  • Loading branch information
vss-2 committed Apr 23, 2024
1 parent bda5a6a commit f6fd5af
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
10 changes: 7 additions & 3 deletions python-package/geobr/list_geobr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pandas as pd
from io import StringIO
from urllib.error import HTTPError

import re

def list_geobr():
"""Prints available functions, according to latest README.md file
Expand All @@ -19,8 +19,12 @@ def list_geobr():

try:
html_data = get("https://github.com/ipeaGIT/geobr/blob/master/README.md").text

df = pd.read_html(StringIO(html_data))[1]
find_emoji = html_data.index("👉")
html_data = html_data[find_emoji:]
escaped_data = html_data.replace("\\u003c", "<").replace("\\u003e", ">")
tables = re.findall("<table>(.+?)</table>", escaped_data)
available_datasets = "<table>" + tables[0].replace("\\n", "") + "</table>"
df = pd.DataFrame(pd.read_html(StringIO(available_datasets))[0])

except HTTPError:
print(
Expand Down
2 changes: 0 additions & 2 deletions python-package/tests/test_list_geobr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@


def test_list_geobr(capsys):

list_geobr()

# Tests whether the function prints output
captured = capsys.readouterr()
assert len(captured.out) > 200

0 comments on commit f6fd5af

Please sign in to comment.