Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into enhancement/team-sc…
Browse files Browse the repository at this point in the history
…outing-report
  • Loading branch information
esqew committed Oct 20, 2023
2 parents 3b8ddd7 + 378b5e2 commit 32f33ec
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 32 deletions.
13 changes: 11 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
name: Python package
on:
push:
branches:
- master
pull_request:
branches:
- master

on: [push]
concurrency:
group: ${{ github.repository }}
cancel-in-progress: true

jobs:
pytest:
Expand All @@ -9,7 +18,7 @@ jobs:
max-parallel: 1
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]

runs-on: ${{ matrix.os }}

Expand Down
3 changes: 2 additions & 1 deletion kenpompy/FanMatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import mechanicalsoup
import pandas as pd
from bs4 import BeautifulSoup
from io import StringIO

class FanMatch:
"""Object to hold FanMatch page scraping results.
Expand Down Expand Up @@ -52,7 +53,7 @@ def __init__(self, browser, date = None):
browser.open(self.url)
fm = browser.get_current_page()
table = fm.find_all("table")[0]
fm_df = pd.read_html(str(table))
fm_df = pd.read_html(StringIO(str(table)))
fm_df = fm_df[0]
fm_df = fm_df.rename(columns={"Thrill Score": "ThrillScore", "Come back": "Comeback", "Excite ment": "Excitement"})
fm_df.ThrillScore = fm_df.ThrillScore.astype("str")
Expand Down
14 changes: 7 additions & 7 deletions kenpompy/conference.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import re
from bs4 import BeautifulSoup
import datetime

from io import StringIO

def get_valid_conferences(browser, season=None):
"""
Expand Down Expand Up @@ -60,10 +60,10 @@ def get_aggregate_stats(browser, conf=None, season=None):
confs = browser.get_current_page()
#get first table
table = confs.find_all('table')[-3]
conf_df = pd.read_html(str(table))[0]
conf_df = pd.read_html(StringIO(str(table)))[0]
#get second table
table = confs.find_all('table')[-2]
conf2_df = pd.read_html(str(table))[0]
conf2_df = pd.read_html(StringIO(str(table)))[0]
conf2_df['Value'] = conf2_df['Value'].str.replace('%', '').astype(float)
conf_df = pd.concat([conf_df, conf2_df])
#clean table
Expand All @@ -80,7 +80,7 @@ def get_aggregate_stats(browser, conf=None, season=None):
confs = browser.get_current_page()
#get table
table = confs.find_all('table')[0]
conf_df = pd.read_html(str(table))[0]
conf_df = pd.read_html(StringIO(str(table)))[0]
# Clean table
conf_df = conf_df.set_index('Conf')
conf_df.columns = [stat[:-1] + 'Rank' if '.1' in stat else stat for stat in conf_df.columns]
Expand All @@ -107,7 +107,7 @@ def get_standings(browser, conf, season=None):
browser.open(url)
confs = browser.get_current_page()
table = confs.find_all('table')[0]
conf_df = pd.read_html(str(table))[0]
conf_df = pd.read_html(StringIO(str(table)))[0]
# Parse out seed
conf_df['Seed'] = conf_df['Team'].str.extract('([0-9]+)')
conf_df['Team'] = conf_df['Team'].str.replace('([0-9]+)', '', regex=True).str.rstrip()
Expand Down Expand Up @@ -139,7 +139,7 @@ def get_offense(browser, conf, season=None):
browser.open(url)
confs = browser.get_current_page()
table = confs.find_all('table')[1]
conf_df = pd.read_html(str(table))[0]
conf_df = pd.read_html(StringIO(str(table)))[0]

# Rename Rank headers
conf_df.columns = [stat[:-1] + 'Rank' if '.1' in stat else stat for stat in conf_df.columns]
Expand Down Expand Up @@ -168,7 +168,7 @@ def get_defense(browser, conf, season=None):
browser.open(url)
confs = browser.get_current_page()
table = confs.find_all('table')[2]
conf_df = pd.read_html(str(table))[0]
conf_df = pd.read_html(StringIO(str(table)))[0]

# Rename Rank headers
conf_df.columns = [stat[:-1] + 'Rank' if '.1' in stat else stat for stat in conf_df.columns]
Expand Down
15 changes: 8 additions & 7 deletions kenpompy/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import mechanicalsoup
import pandas as pd
from bs4 import BeautifulSoup
from io import StringIO

def get_pomeroy_ratings(browser, season=None):
"""
Expand All @@ -28,7 +29,7 @@ def get_pomeroy_ratings(browser, season=None):
browser.open(url)
page = browser.get_current_page()
table = page.find_all('table')[0]
ratings_df = pd.read_html(str(table))
ratings_df = pd.read_html(StringIO(str(table)))
# Dataframe tidying.
ratings_df = ratings_df[0]
ratings_df.columns = ratings_df.columns.map(lambda x: x[1])
Expand Down Expand Up @@ -66,7 +67,7 @@ def get_trends(browser):
browser.open(url)
trends = browser.get_current_page()
table = trends.find_all('table')[0]
trends_df = pd.read_html(str(table))
trends_df = pd.read_html(StringIO(str(table)))

# Dataframe tidying.
trends_df = trends_df[0]
Expand Down Expand Up @@ -103,7 +104,7 @@ def get_refs(browser, season=None):
browser.open(url)
refs = browser.get_current_page()
table = refs.find_all('table')[0]
refs_df = pd.read_html(str(table))
refs_df = pd.read_html(StringIO(str(table)))

# Dataframe tidying.
refs_df = refs_df[0]
Expand Down Expand Up @@ -132,7 +133,7 @@ def get_hca(browser):
browser.open(url)
hca = browser.get_current_page()
table = hca.find_all('table')[0]
hca_df = pd.read_html(str(table))
hca_df = pd.read_html(StringIO(str(table)))

# Dataframe tidying.
hca_df = hca_df[0]
Expand Down Expand Up @@ -171,7 +172,7 @@ def get_arenas(browser, season=None):
browser.open(url)
arenas = browser.get_current_page()
table = arenas.find_all('table')[0]
arenas_df = pd.read_html(str(table))
arenas_df = pd.read_html(StringIO(str(table)))

# Dataframe tidying.
arenas_df = arenas_df[0]
Expand Down Expand Up @@ -234,7 +235,7 @@ def get_gameattribs(browser, season=None, metric='Excitement'):
playerstats = browser.get_current_page()

table = playerstats.find_all('table')[0]
ga_df = pd.read_html(str(table))
ga_df = pd.read_html(StringIO(str(table)))

# Dataframe tidying.
ga_df = ga_df[0]
Expand Down Expand Up @@ -263,7 +264,7 @@ def get_program_ratings(browser):
browser.open(url)
programs = browser.get_current_page()
table = programs.find_all('table')[0]
programs_df = pd.read_html(str(table))
programs_df = pd.read_html(StringIO(str(table)))
programs_df = programs_df[0]

programs_df.columns = ['Rank', 'Team', 'Rating', 'kenpom.Best.Rank', 'kenpom.Best.Season', 'kenpom.Worst.Rank',
Expand Down
20 changes: 10 additions & 10 deletions kenpompy/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas as pd
import re
from bs4 import BeautifulSoup

from io import StringIO

def get_efficiency(browser, season=None):
"""
Expand Down Expand Up @@ -37,7 +37,7 @@ def get_efficiency(browser, season=None):
browser.open(url)
eff = browser.get_current_page()
table = eff.find_all('table')[0]
eff_df = pd.read_html(str(table))
eff_df = pd.read_html(StringIO(str(table)))

# Dataframe tidying.
eff_df = eff_df[0]
Expand Down Expand Up @@ -95,7 +95,7 @@ def get_fourfactors(browser, season=None):
browser.open(url)
ff = browser.get_current_page()
table = ff.find_all('table')[0]
ff_df = pd.read_html(str(table))
ff_df = pd.read_html(StringIO(str(table)))

# Dataframe tidying.
ff_df = ff_df[0]
Expand Down Expand Up @@ -153,7 +153,7 @@ def get_teamstats(browser, defense=False, season=None):
browser.open(url)
ts = browser.get_current_page()
table = ts.find_all('table')[0]
ts_df = pd.read_html(str(table))
ts_df = pd.read_html(StringIO(str(table)))

# Dataframe tidying.
ts_df = ts_df[0]
Expand Down Expand Up @@ -201,7 +201,7 @@ def get_pointdist(browser, season=None):
browser.open(url)
dist = browser.get_current_page()
table = dist.find_all('table')[0]
dist_df = pd.read_html(str(table))
dist_df = pd.read_html(StringIO(str(table)))

# Dataframe tidying.
dist_df = dist_df[0]
Expand Down Expand Up @@ -247,7 +247,7 @@ def get_height(browser, season=None):
browser.open(url)
height = browser.get_current_page()
table = height.find_all('table')[0]
h_df = pd.read_html(str(table))
h_df = pd.read_html(StringIO(str(table)))

# Dataframe tidying.
h_df = h_df[0]
Expand Down Expand Up @@ -346,7 +346,7 @@ def get_playerstats(browser, season=None, metric='EFG', conf=None, conf_only=Fal
ps_dfs = []
tables = playerstats.find_all('table')
for t in tables:
ps_df = pd.read_html(str(t))
ps_df = pd.read_html(StringIO(str(t)))
ps_df = ps_df[0]

# Split ortg column.
Expand All @@ -364,7 +364,7 @@ def get_playerstats(browser, season=None, metric='EFG', conf=None, conf_only=Fal
if metric.upper() in perc_mets:
metric = metric + '%'
table = playerstats.find_all('table')[0]
ps_df = pd.read_html(str(table))
ps_df = pd.read_html(StringIO(str(table)))

# Dataframe tidying.
ps_df = ps_df[0]
Expand Down Expand Up @@ -415,7 +415,7 @@ def get_kpoy(browser, season=None):
browser.open(url)
kpoy = browser.get_current_page()
table = kpoy.find_all('table')[0]
df = pd.read_html(str(table))
df = pd.read_html(StringIO(str(table)))

kpoy_df = df[0]
kpoy_df.columns = ['Rank', 'Player', 'KPOY Rating']
Expand All @@ -431,7 +431,7 @@ def get_kpoy(browser, season=None):
# Now the MVP table.
if int(season) >= 2013:
table = kpoy.find_all('table')[-1]
df = pd.read_html(str(table))
df = pd.read_html(StringIO(str(table)))

mvp_df = df[0]
mvp_df.columns = ['Rank', 'Player', 'Game MVPs']
Expand Down
5 changes: 3 additions & 2 deletions kenpompy/team.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import pandas as pd
import datetime
from io import StringIO
import re
from bs4 import BeautifulSoup
from codecs import encode, decode
Expand All @@ -28,7 +29,7 @@ def get_valid_teams(browser, season=None):
browser.open(url)
teams = browser.get_current_page()
table = teams.find_all('table')[0]
team_df = pd.read_html(str(table))
team_df = pd.read_html(StringIO(str(table)))
# Get only the team column.
team_df = team_df[0].iloc[:, 1]
# Remove NCAA tourny seeds for previous seasons.
Expand Down Expand Up @@ -88,7 +89,7 @@ def get_schedule(browser, team=None, season=None):
browser.open(url)
schedule = browser.get_current_page()
table = schedule.find_all('table')[1]
schedule_df = pd.read_html(str(table))
schedule_df = pd.read_html(StringIO(str(table)))

# Dataframe Tidying
schedule_df = schedule_df[0]
Expand Down
3 changes: 1 addition & 2 deletions tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def test_get_gameattribs(browser):


def test_get_program_ratings(browser):
expected = (358, 17)

df = kpmisc.get_program_ratings(browser)
expected = (len(browser.page.select("tr:not(:has(th))")), 17)
assert df.shape == expected
2 changes: 1 addition & 1 deletion tests/test_team.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_get_valid_teams(browser):
assert len(teams_2003) == 327

valid_2003_teams = ['Kentucky', 'Kansas', 'Georgetown', 'Dayton', 'South Carolina', 'Fresno St.', 'Iowa', 'SMU', 'TCU', 'North Carolina A&T']
for team in teams_2003:
for team in valid_2003_teams:
assert team in teams_2003

invalid_2003_teams = ['Loyola Marymnt University', 'YRU', 'Praget', 'Invalid U', 'SRTU', 'Kennesaw St.']
Expand Down

0 comments on commit 32f33ec

Please sign in to comment.