From 4d51da7d1d8a42cff9976fc9dc5783e11b1ae890 Mon Sep 17 00:00:00 2001 From: Sean F Quinn Date: Tue, 18 Apr 2023 07:47:48 -0400 Subject: [PATCH 01/11] Fixed shape test for test_get_program_ratings after patch to remove extraneous rows --- tests/test_misc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_misc.py b/tests/test_misc.py index 253d95e..b82a90d 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -78,7 +78,7 @@ def test_get_gameattribs(browser): def test_get_program_ratings(browser): - expected = (358, 17) + expected = (356, 17) df = kpmisc.get_program_ratings(browser) assert df.shape == expected \ No newline at end of file From 1c67574476bffa8451468a8e0d865ec46def02ba Mon Sep 17 00:00:00 2001 From: Sean F Quinn Date: Sat, 27 May 2023 10:06:51 -0400 Subject: [PATCH 02/11] Fixed typo'd reference in test_team.py --- tests/test_team.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_team.py b/tests/test_team.py index b5c6993..54652db 100644 --- a/tests/test_team.py +++ b/tests/test_team.py @@ -21,7 +21,7 @@ def test_get_valid_teams(browser): assert len(teams_2003) == 327 valid_2003_teams = ['Kentucky', 'Kansas', 'Georgetown', 'Dayton', 'South Carolina', 'Fresno St.', 'Iowa', 'SMU', 'TCU', 'North Carolina A&T'] - for team in teams_2003: + for team in valid_2003_teams: assert team in teams_2003 invalid_2003_teams = ['Loyola Marymnt University', 'YRU', 'Praget', 'Invalid U', 'SRTU', 'Kennesaw St.'] From 0e1d6d4c0b09b5f0cb4f767e786d88a60d91c4cd Mon Sep 17 00:00:00 2001 From: Sean F Quinn Date: Fri, 14 Jul 2023 13:02:06 -0400 Subject: [PATCH 03/11] updated test_get_program_ratings to use dynamic verification of df shape to preclude annual updates to tuple literal --- tests/test_misc.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_misc.py b/tests/test_misc.py index b82a90d..a987fcf 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -78,7 +78,6 @@ def test_get_gameattribs(browser): def test_get_program_ratings(browser): - expected = (356, 17) - df = kpmisc.get_program_ratings(browser) + expected = (len(browser.page.select("tr:not(:has(th))")), 17) assert df.shape == expected \ No newline at end of file From 58cb6ad24f1a77584e916cab8cecbdf6f61e3752 Mon Sep 17 00:00:00 2001 From: Sean F Quinn Date: Thu, 19 Oct 2023 16:48:11 -0400 Subject: [PATCH 04/11] fix FutureWarnings in conference.py (#55) --- kenpompy/conference.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kenpompy/conference.py b/kenpompy/conference.py index dfa6a55..4d14898 100644 --- a/kenpompy/conference.py +++ b/kenpompy/conference.py @@ -8,7 +8,7 @@ import re from bs4 import BeautifulSoup import datetime - +from io import StringIO def get_valid_conferences(browser, season=None): """ @@ -60,10 +60,10 @@ def get_aggregate_stats(browser, conf=None, season=None): confs = browser.get_current_page() #get first table table = confs.find_all('table')[-3] - conf_df = pd.read_html(str(table))[0] + conf_df = pd.read_html(StringIO(str(table)))[0] #get second table table = confs.find_all('table')[-2] - conf2_df = pd.read_html(str(table))[0] + conf2_df = pd.read_html(StringIO(str(table)))[0] conf2_df['Value'] = conf2_df['Value'].str.replace('%', '').astype(float) conf_df = pd.concat([conf_df, conf2_df]) #clean table @@ -80,7 +80,7 @@ def get_aggregate_stats(browser, conf=None, season=None): confs = browser.get_current_page() #get table table = confs.find_all('table')[0] - conf_df = pd.read_html(str(table))[0] + conf_df = pd.read_html(StringIO(str(table)))[0] # Clean table conf_df = conf_df.set_index('Conf') conf_df.columns = [stat[:-1] + 'Rank' if '.1' in stat else stat for stat in conf_df.columns] @@ -107,7 +107,7 @@ def get_standings(browser, conf, season=None): browser.open(url) confs = browser.get_current_page() table = confs.find_all('table')[0] - conf_df = pd.read_html(str(table))[0] + conf_df = pd.read_html(StringIO(str(table)))[0] # Parse out seed conf_df['Seed'] = conf_df['Team'].str.extract('([0-9]+)') conf_df['Team'] = conf_df['Team'].str.replace('([0-9]+)', '', regex=True).str.rstrip() @@ -139,7 +139,7 @@ def get_offense(browser, conf, season=None): browser.open(url) confs = browser.get_current_page() table = confs.find_all('table')[1] - conf_df = pd.read_html(str(table))[0] + conf_df = pd.read_html(StringIO(str(table)))[0] # Rename Rank headers conf_df.columns = [stat[:-1] + 'Rank' if '.1' in stat else stat for stat in conf_df.columns] @@ -168,7 +168,7 @@ def get_defense(browser, conf, season=None): browser.open(url) confs = browser.get_current_page() table = confs.find_all('table')[2] - conf_df = pd.read_html(str(table))[0] + conf_df = pd.read_html(StringIO(str(table)))[0] # Rename Rank headers conf_df.columns = [stat[:-1] + 'Rank' if '.1' in stat else stat for stat in conf_df.columns] From 400320beb7865c7115592d844001a0e60049d53b Mon Sep 17 00:00:00 2001 From: Sean F Quinn Date: Thu, 19 Oct 2023 16:50:27 -0400 Subject: [PATCH 05/11] fix FutureWarnings in misc.py (#55) --- kenpompy/misc.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/kenpompy/misc.py b/kenpompy/misc.py index 15cd9e8..c4721a8 100644 --- a/kenpompy/misc.py +++ b/kenpompy/misc.py @@ -6,6 +6,7 @@ import mechanicalsoup import pandas as pd from bs4 import BeautifulSoup +from io import StringIO def get_pomeroy_ratings(browser, season=None): """ @@ -28,7 +29,7 @@ def get_pomeroy_ratings(browser, season=None): browser.open(url) page = browser.get_current_page() table = page.find_all('table')[0] - ratings_df = pd.read_html(str(table)) + ratings_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. ratings_df = ratings_df[0] ratings_df.columns = ratings_df.columns.map(lambda x: x[1]) @@ -66,7 +67,7 @@ def get_trends(browser): browser.open(url) trends = browser.get_current_page() table = trends.find_all('table')[0] - trends_df = pd.read_html(str(table)) + trends_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. trends_df = trends_df[0] @@ -103,7 +104,7 @@ def get_refs(browser, season=None): browser.open(url) refs = browser.get_current_page() table = refs.find_all('table')[0] - refs_df = pd.read_html(str(table)) + refs_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. refs_df = refs_df[0] @@ -132,7 +133,7 @@ def get_hca(browser): browser.open(url) hca = browser.get_current_page() table = hca.find_all('table')[0] - hca_df = pd.read_html(str(table)) + hca_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. hca_df = hca_df[0] @@ -171,7 +172,7 @@ def get_arenas(browser, season=None): browser.open(url) arenas = browser.get_current_page() table = arenas.find_all('table')[0] - arenas_df = pd.read_html(str(table)) + arenas_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. arenas_df = arenas_df[0] @@ -234,7 +235,7 @@ def get_gameattribs(browser, season=None, metric='Excitement'): playerstats = browser.get_current_page() table = playerstats.find_all('table')[0] - ga_df = pd.read_html(str(table)) + ga_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. ga_df = ga_df[0] @@ -263,7 +264,7 @@ def get_program_ratings(browser): browser.open(url) programs = browser.get_current_page() table = programs.find_all('table')[0] - programs_df = pd.read_html(str(table)) + programs_df = pd.read_html(StringIO(str(table))) programs_df = programs_df[0] programs_df.columns = ['Rank', 'Team', 'Rating', 'kenpom.Best.Rank', 'kenpom.Best.Season', 'kenpom.Worst.Rank', From c64e2e103a2755344f8b25aaa6e4a454e628a4d2 Mon Sep 17 00:00:00 2001 From: Sean F Quinn Date: Thu, 19 Oct 2023 16:55:47 -0400 Subject: [PATCH 06/11] fix FutureWarnings in summary.py (#55) --- kenpompy/summary.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/kenpompy/summary.py b/kenpompy/summary.py index f4c678b..af1cc01 100644 --- a/kenpompy/summary.py +++ b/kenpompy/summary.py @@ -7,7 +7,7 @@ import pandas as pd import re from bs4 import BeautifulSoup - +from io import StringIO def get_efficiency(browser, season=None): """ @@ -37,7 +37,7 @@ def get_efficiency(browser, season=None): browser.open(url) eff = browser.get_current_page() table = eff.find_all('table')[0] - eff_df = pd.read_html(str(table)) + eff_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. eff_df = eff_df[0] @@ -95,7 +95,7 @@ def get_fourfactors(browser, season=None): browser.open(url) ff = browser.get_current_page() table = ff.find_all('table')[0] - ff_df = pd.read_html(str(table)) + ff_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. ff_df = ff_df[0] @@ -153,7 +153,7 @@ def get_teamstats(browser, defense=False, season=None): browser.open(url) ts = browser.get_current_page() table = ts.find_all('table')[0] - ts_df = pd.read_html(str(table)) + ts_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. ts_df = ts_df[0] @@ -201,7 +201,7 @@ def get_pointdist(browser, season=None): browser.open(url) dist = browser.get_current_page() table = dist.find_all('table')[0] - dist_df = pd.read_html(str(table)) + dist_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. dist_df = dist_df[0] @@ -247,7 +247,7 @@ def get_height(browser, season=None): browser.open(url) height = browser.get_current_page() table = height.find_all('table')[0] - h_df = pd.read_html(str(table)) + h_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. h_df = h_df[0] @@ -346,7 +346,7 @@ def get_playerstats(browser, season=None, metric='EFG', conf=None, conf_only=Fal ps_dfs = [] tables = playerstats.find_all('table') for t in tables: - ps_df = pd.read_html(str(t)) + ps_df = pd.read_html(StringIO(str(t))) ps_df = ps_df[0] # Split ortg column. @@ -364,7 +364,7 @@ def get_playerstats(browser, season=None, metric='EFG', conf=None, conf_only=Fal if metric.upper() in perc_mets: metric = metric + '%' table = playerstats.find_all('table')[0] - ps_df = pd.read_html(str(table)) + ps_df = pd.read_html(StringIO(str(table))) # Dataframe tidying. ps_df = ps_df[0] @@ -415,7 +415,7 @@ def get_kpoy(browser, season=None): browser.open(url) kpoy = browser.get_current_page() table = kpoy.find_all('table')[0] - df = pd.read_html(str(table)) + df = pd.read_html(StringIO(str(table))) kpoy_df = df[0] kpoy_df.columns = ['Rank', 'Player', 'KPOY Rating'] @@ -431,7 +431,7 @@ def get_kpoy(browser, season=None): # Now the MVP table. if int(season) >= 2013: table = kpoy.find_all('table')[-1] - df = pd.read_html(str(table)) + df = pd.read_html(StringIO(str(table))) mvp_df = df[0] mvp_df.columns = ['Rank', 'Player', 'Game MVPs'] From 77c1939d349999f20e8b64cecc64f7186b177625 Mon Sep 17 00:00:00 2001 From: Sean F Quinn Date: Thu, 19 Oct 2023 16:57:33 -0400 Subject: [PATCH 07/11] fix FutureWarnings in team.py (#55) --- kenpompy/team.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kenpompy/team.py b/kenpompy/team.py index d2addc8..54e7f55 100644 --- a/kenpompy/team.py +++ b/kenpompy/team.py @@ -5,7 +5,7 @@ import pandas as pd import datetime - +from io import StringIO def get_valid_teams(browser, season=None): """ @@ -26,7 +26,7 @@ def get_valid_teams(browser, season=None): browser.open(url) teams = browser.get_current_page() table = teams.find_all('table')[0] - team_df = pd.read_html(str(table)) + team_df = pd.read_html(StringIO(str(table))) # Get only the team column. team_df = team_df[0].iloc[:, 1] # Remove NCAA tourny seeds for previous seasons. @@ -89,7 +89,7 @@ def get_schedule(browser, team=None, season=None): browser.open(url) schedule = browser.get_current_page() table = schedule.find_all('table')[1] - schedule_df = pd.read_html(str(table)) + schedule_df = pd.read_html(StringIO(str(table))) # Dataframe Tidying schedule_df = schedule_df[0] From 5cdc0fa610503b99d7d7b8484f2c152fdc5e3955 Mon Sep 17 00:00:00 2001 From: Sean F Quinn Date: Thu, 19 Oct 2023 16:58:30 -0400 Subject: [PATCH 08/11] fix FutureWarnings in FanMatch.py (#55) --- kenpompy/FanMatch.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kenpompy/FanMatch.py b/kenpompy/FanMatch.py index e2ab36b..5893dc7 100644 --- a/kenpompy/FanMatch.py +++ b/kenpompy/FanMatch.py @@ -5,6 +5,7 @@ import mechanicalsoup import pandas as pd from bs4 import BeautifulSoup +from io import StringIO class FanMatch: """Object to hold FanMatch page scraping results. @@ -52,7 +53,7 @@ def __init__(self, browser, date = None): browser.open(self.url) fm = browser.get_current_page() table = fm.find_all("table")[0] - fm_df = pd.read_html(str(table)) + fm_df = pd.read_html(StringIO(str(table))) fm_df = fm_df[0] fm_df = fm_df.rename(columns={"Thrill Score": "ThrillScore", "Come back": "Comeback", "Excite ment": "Excitement"}) fm_df.ThrillScore = fm_df.ThrillScore.astype("str") From cdb99376ecaff166c3276773cd0e11475d0e820a Mon Sep 17 00:00:00 2001 From: Sean F Quinn Date: Thu, 19 Oct 2023 19:35:02 -0400 Subject: [PATCH 09/11] added Python 3.12 to ci/cd (closes #57) --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2181c2d..3848ff9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -9,7 +9,7 @@ jobs: max-parallel: 1 matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] runs-on: ${{ matrix.os }} From 540f41897d29bc09d5b09474d43ddae8779ce88f Mon Sep 17 00:00:00 2001 From: Sean F Quinn Date: Thu, 19 Oct 2023 20:03:09 -0400 Subject: [PATCH 10/11] add concurrency config to prevent simultaneous jobs --- .github/workflows/tests.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3848ff9..7f7b133 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,6 +1,8 @@ name: Python package - on: [push] +concurrency: + group: ${{ github.repository }} + cancel-in-progress: true jobs: pytest: From 76e17f7ae055f2b43e600bd745571eaaa4c39c73 Mon Sep 17 00:00:00 2001 From: Sean F Quinn Date: Thu, 19 Oct 2023 20:03:43 -0400 Subject: [PATCH 11/11] run ci/cd only on pull requests for all branches except master --- .github/workflows/tests.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7f7b133..ecad73e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,5 +1,12 @@ name: Python package -on: [push] +on: + push: + branches: + - master + pull_request: + branches: + - master + concurrency: group: ${{ github.repository }} cancel-in-progress: true