Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rhub bug fixes: handling for missing links and skipping lastName:NREL #20

Merged
merged 1 commit into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 57 additions & 9 deletions elm/web/rhub.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,34 @@ def clean_text(html_text):

return clean

@property
def first_name(self):
"""Get the first name of this researcher.

Returns
-------
first : str
Full name of researcher.
"""
names = self.get('name')
first = names.get('firstName')

return first

@property
def last_name(self):
"""Get the last name of this researcher.

Returns
-------
last : str
Last name of researcher.
"""
names = self.get('name')
last = names.get('lastName')

return last

@property
def title(self):
"""Get the full name of this researcher.
Expand Down Expand Up @@ -329,10 +357,9 @@ def __init__(self, url, n_pages=1):
self._n_pages = 0
self._iter = 0

records = self._get_first()
for page in self._get_pages(n_pages=n_pages):
records += page
records = self._get_all(n_pages)
records = [ProfilesRecord(single) for single in records]
records = [prof for prof in records if prof.last_name != 'NREL']
super().__init__(records)

def _get_first(self):
Expand Down Expand Up @@ -398,6 +425,27 @@ def _get_pages(self, n_pages):
else:
break

def _get_all(self, n_pages):
"""Get all pages of profiles up to n_pages.

Parameters
----------
n_pages : int
Number of pages to retrieve

Returns
-------
all_records : list
List of all publication records.
"""
first_page = self._get_first()
records = first_page

for page in self._get_pages(n_pages):
records.extend(page)

return records

def meta(self):
"""Get a meta dataframe with details on all of the profiles.

Expand Down Expand Up @@ -583,12 +631,12 @@ def links(self):

doi = None
pdf_url = None

for link in ev:
if link.get('doi'):
doi = link.get('doi')
if link.get('link'):
pdf_url = link.get('link')
if ev:
for link in ev:
if link.get('doi'):
doi = link.get('doi')
if link.get('link'):
pdf_url = link.get('link')

return doi, pdf_url

Expand Down
4 changes: 2 additions & 2 deletions elm/wizard.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def engineer_query(self, query, token_budget=None, new_info_threshold=0.7,
used_index = np.array(used_index)
references = self.make_ref_list(used_index)

return message, references
return message, references, used_index

@abstractmethod
def make_ref_list(self, idx):
Expand Down Expand Up @@ -200,7 +200,7 @@ def chat(self, query,
out = self.engineer_query(query, token_budget=token_budget,
new_info_threshold=new_info_threshold,
convo=convo)
query, references = out
query, references, _ = out

messages = [{"role": "system", "content": self.MODEL_ROLE},
{"role": "user", "content": query}]
Expand Down
Loading