Skip to content

Commit

Permalink
Merge pull request #67 from j-andrews7/fix/64-current-year
Browse files Browse the repository at this point in the history
Fix for calculation of current season/year (fixes #64)
  • Loading branch information
esqew authored Nov 1, 2023
2 parents cb1752f + c6f3a49 commit a30590b
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 11 deletions.
17 changes: 17 additions & 0 deletions kenpompy/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,23 @@
import pandas as pd
from bs4 import BeautifulSoup
from io import StringIO
import re

def get_current_season(browser):
"""
Scrapes the KenPom homepage to get the latest season year that has data published
Args:
browser (mechanicalsoup StatefulBrowser): Authenticated browser with full access to kenpom.com generated
by the `login` function.
Returns:
current_season (int): Number corresponding to the last season year that has data published
"""
browser.open('https://kenpom.com/index.php')
page_title = browser.page.select_one('#content-header h2').text
YEAR_PATTERN = r'^(\d{4})'
return int(re.match(YEAR_PATTERN, page_title).group(0))

def get_pomeroy_ratings(browser, season=None):
"""
Expand Down
13 changes: 4 additions & 9 deletions kenpompy/team.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
"""

import pandas as pd
import datetime
from io import StringIO
from .misc import get_current_season

def get_valid_teams(browser, season=None):
"""
Expand Down Expand Up @@ -39,9 +39,6 @@ def get_valid_teams(browser, season=None):

return team_list




def get_schedule(browser, team=None, season=None):
"""
Scrapes a team's schedule from (https://kenpom.com/team.php) into a dataframe.
Expand All @@ -62,19 +59,17 @@ def get_schedule(browser, team=None, season=None):
"""

url = 'https://kenpom.com/team.php'

date = datetime.date.today()
currentYear = date.strftime("%Y")
current_season = get_current_season(browser)

if season:
if int(season) < 2002:
raise ValueError(
'season cannot be less than 2002, as data only goes back that far.')
if int(season) > int(currentYear):
if int(season) > int(current_season):
raise ValueError(
'season cannot be greater than the current year.')
else:
season = int(currentYear)
season = current_season

if team==None or team not in get_valid_teams(browser, season):
raise ValueError(
Expand Down
2 changes: 1 addition & 1 deletion kenpompy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,4 @@ def login(email, password):
raise Exception(
'Logging in to kenpom.com failed - account subscription is expired')

return browser
return browser
9 changes: 9 additions & 0 deletions tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
from kenpompy.FanMatch import FanMatch
import pandas as pd

def test_get_current_season(browser):
current_season = kpmisc.get_current_season(browser)

# Test that the current season is indeed published on the homepage
assert browser.request(url='https://kenpom.com/?y=' + str(current_season), method='GET', allow_redirects=False).status_code == 200

# Test that there are isn't a season beyond the "current" one (that it is indeed the latest)
assert browser.request(url='https://kenpom.com/?y=' + str(current_season + 1), method='GET', allow_redirects=False).status_code == 302

def test_get_pomeroy_ratings(browser):
expected = ['1', 'Virginia', 'ACC', '35-3', '+34.22', '123.4', '2', '89.2', '5', '59.4', '353', '+.050', '62', '+11.18', '22', '109.2', '34', '98.1', '14', '-3.24', '255', '1']
df = kpmisc.get_pomeroy_ratings(browser, season=2019)
Expand Down
3 changes: 2 additions & 1 deletion tests/test_team.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest
import datetime
import kenpompy.team as kpteam
import kenpompy.misc as kpmisc
import pandas as pd

def test_get_valid_teams(browser):
Expand Down Expand Up @@ -37,7 +38,7 @@ def test_get_schedule(browser):
assert df.shape == (34, 9)

date = datetime.date.today()
currentYear = date.strftime("%Y")
currentYear = kpmisc.get_current_season(browser)
nextYear = str(int(currentYear)+1)

with pytest.raises(ValueError):
Expand Down

0 comments on commit a30590b

Please sign in to comment.