diff --git a/elm/web/rhub.py b/elm/web/rhub.py index 41f9b63..12104fc 100644 --- a/elm/web/rhub.py +++ b/elm/web/rhub.py @@ -51,6 +51,34 @@ def clean_text(html_text): return clean + @property + def first_name(self): + """Get the first name of this researcher. + + Returns + ------- + first : str + Full name of researcher. + """ + names = self.get('name') + first = names.get('firstName') + + return first + + @property + def last_name(self): + """Get the last name of this researcher. + + Returns + ------- + last : str + Last name of researcher. + """ + names = self.get('name') + last = names.get('lastName') + + return last + @property def title(self): """Get the full name of this researcher. @@ -329,10 +357,9 @@ def __init__(self, url, n_pages=1): self._n_pages = 0 self._iter = 0 - records = self._get_first() - for page in self._get_pages(n_pages=n_pages): - records += page + records = self._get_all(n_pages) records = [ProfilesRecord(single) for single in records] + records = [prof for prof in records if prof.last_name != 'NREL'] super().__init__(records) def _get_first(self): @@ -398,6 +425,27 @@ def _get_pages(self, n_pages): else: break + def _get_all(self, n_pages): + """Get all pages of profiles up to n_pages. + + Parameters + ---------- + n_pages : int + Number of pages to retrieve + + Returns + ------- + all_records : list + List of all publication records. + """ + first_page = self._get_first() + records = first_page + + for page in self._get_pages(n_pages): + records.extend(page) + + return records + def meta(self): """Get a meta dataframe with details on all of the profiles. @@ -583,12 +631,12 @@ def links(self): doi = None pdf_url = None - - for link in ev: - if link.get('doi'): - doi = link.get('doi') - if link.get('link'): - pdf_url = link.get('link') + if ev: + for link in ev: + if link.get('doi'): + doi = link.get('doi') + if link.get('link'): + pdf_url = link.get('link') return doi, pdf_url diff --git a/elm/wizard.py b/elm/wizard.py index f26e3e2..963ed21 100644 --- a/elm/wizard.py +++ b/elm/wizard.py @@ -126,7 +126,7 @@ def engineer_query(self, query, token_budget=None, new_info_threshold=0.7, used_index = np.array(used_index) references = self.make_ref_list(used_index) - return message, references + return message, references, used_index @abstractmethod def make_ref_list(self, idx): @@ -200,7 +200,7 @@ def chat(self, query, out = self.engineer_query(query, token_budget=token_budget, new_info_threshold=new_info_threshold, convo=convo) - query, references = out + query, references, _ = out messages = [{"role": "system", "content": self.MODEL_ROLE}, {"role": "user", "content": query}]