From 2fe4e7b16779d560e852e01ca8b272a7c3d00dc1 Mon Sep 17 00:00:00 2001 From: HONGYI001 Date: Wed, 24 Apr 2024 20:31:30 +0800 Subject: [PATCH 1/6] implement dblp search --- src/agentscope/service/web/search.py | 370 ++++++++++++++++++++++++++- 1 file changed, 368 insertions(+), 2 deletions(-) diff --git a/src/agentscope/service/web/search.py b/src/agentscope/service/web/search.py index fd72b7536..6973eb14f 100644 --- a/src/agentscope/service/web/search.py +++ b/src/agentscope/service/web/search.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- """Search question in the web""" -from typing import Any - +from typing import Any, Literal from agentscope.service.service_response import ServiceResponse from agentscope.utils.common import requests_get from agentscope.service.service_status import ServiceExecStatus @@ -194,3 +193,370 @@ def google_search( for result in results ], ) + + +def dblp_search( + search_type: Literal["publication", "author", "venue"], + question: str, + num_results: int = 30, + first_hit: int = 0, + num_completion: int = 10, +) -> ServiceResponse: + """ + Search DBLP database based on the type specified. + + Args: + search_type (`Literal["publication", "author", "venue"]`): + Type of search to perform, options are + "publication", "author", or "venue". + question (`str`): + The search query string. + num_results (`int`, defaults to `30`): + The total number of search results to fetch. + firts_hit (`int`, defaults to `0`): + The first hit in the numbered sequence of search results to return + num_completion (`int`, defaults to `10`): + The number of completions to generate for the search query. + + Returns: + `ServiceResponse`: Depending on the type, + the response structure will vary. + The detailed documentation will adjust based on the type parameter. + """ + mapping = { + "publication": dblp_search_publications, + "author": dblp_search_authors, + "venue": dblp_search_venues, + } + if search_type not in mapping: + raise ValueError( + f"Invalid type: {type}. Must be one of {list(mapping.keys())}.", + ) + selected_function = mapping[search_type] + dblp_search.__doc__ = selected_function.__doc__ + return selected_function( + question, + num_results, + first_hit, + num_completion, + ) + + +def dblp_search_publications( + question: str, + num_results: int = 30, + first_hit: int = 0, + num_completion: int = 10, +) -> ServiceResponse: + """ + Search publications in the DBLP database + via its public API and return structured + publication data. + + Args: + question (`str`): + The search query string to look up + in the DBLP database. + num_results (`int`, defaults to `30`): + The number of search results to fetch. + firts_hit (`int`, defaults to `0`): + The first hit in the numbered sequence + of search results to return + num_completion (`int`, defaults to `10`): + The number of completions to generate + for the search query. + + Returns: + `ServiceResponse`: A dictionary containing `status` and `content`. + The `status` attribute is from the ServiceExecStatus enum, + indicating success or error. + The `content` is a list of parsed publication data if successful, + or an error message if failed. + Each item in the list contains publication information + includes title, authors, venue, pages, year, type, DOI, and URL. + + Example: + .. code-block:: python + search_results = dblp_search_publications(question="Extreme + Learning Machine", + num_results=3, + results_per_page=1, + num_completion=1) + print(search_results) + + It returns the following structure: + + .. code-block:: python + + { + 'status': , + 'content': [ + { + 'title': 'Power transformers fault diagnosis + based on a meta-learning approach to kernel + extreme learning machine with opposition-based + learning sparrow search algorithm.', + 'venue': 'J. Intell. Fuzzy Syst.', + 'pages': '455-466', + 'year': '2023', + 'type': 'Journal Articles', + 'doi': '10.3233/JIFS-211862', + 'url': 'https://dblp.org/rec/journals/jifs/YuTZTCH23', + 'authors': 'Song Yu, Weimin Tan, Chengming Zhang, + Chao Tang, Lihong Cai, Dong Hu' + }, + { + 'title': 'Performance comparison of Extreme Learning + Machinesand other machine learning methods + on WBCD data set.', + 'venue': 'SIU', + 'pages': '1-4', + 'year': '2021', + 'type': 'Conference and Workshop Papers', + 'doi': '10.1109/SIU53274.2021.9477984', + 'url': 'https://dblp.org/rec/conf/siu/KeskinDAY21', + 'authors': 'Ömer Selim Keskin, Akif Durdu, + Muhammet Fatih Aslan, Abdullah Yusefi' + } + ] + } + """ + + url = "https://dblp.org/search/publ/api" + params = { + "q": question, + "format": "json", + "h": num_results, + "f": first_hit, + "c": num_completion, + } + search_results = requests_get(url, params) + + if isinstance(search_results, str): + return ServiceResponse(ServiceExecStatus.ERROR, search_results) + + hits = search_results.get("result", {}).get("hits", {}).get("hit", []) + parsed_data = [] + for hit in hits: + info = hit.get("info", {}) + title = info.get("title", "No title available") + venue = info.get("venue", "No venue available") + pages = info.get("pages", "No page information") + year = info.get("year", "Year not specified") + pub_type = info.get("type", "Type not specified") + doi = info.get("doi", "No DOI available") + url = info.get("url", "No URL available") + authors = info.get("authors", {}).get("author", []) + authors_info = info.get("authors", {}).get("author", []) + if isinstance( + authors_info, + dict, + ): # Check if there's only one author in a dict format + authors_info = [authors_info] + authors = ", ".join( + [author["text"] for author in authors_info if "text" in author], + ) + data = { + "title": title, + "venue": venue, + "pages": pages, + "year": year, + "type": pub_type, + "doi": doi, + "url": url, + "authors": authors, + } + parsed_data.append(data) + return ServiceResponse(ServiceExecStatus.SUCCESS, parsed_data) + + +def dblp_search_authors( + question: str, + num_results: int = 30, + first_hit: int = 0, + num_completion: int = 10, +) -> ServiceResponse: + """ + Search for author information in the DBLP database + via its public API and return structured author data. + + Args: + question (`str`): + The search query string to look up + authors in the DBLP database. + num_results (`int`, defaults to `30`): + The total number of search results to fetch. + firts_hit (`int`, defaults to `0`): + The first hit in the numbered sequence + of search results to return + num_completion (`int`, defaults to `10`): + The number of completions to generate + for the search query. + + Returns: + `ServiceResponse`: A dictionary containing `status` and `content`. + The `status` attribute is from the + ServiceExecStatus enum, indicating the success or error of the search. + The `content` is a list of parsed author + data if successful, or an error message if failed. + Each item in the list contains author information + including their name, URL, and affiliations. + + Example: + .. code-block:: python + + search_results = dblp_search_authors(question="Liu ZiWei", + num_results=3, + results_per_page=1, + num_completion=1) + print(search_results) + + It returns the following structure: + + .. code-block:: python + + { + 'status': , + 'content': [ + { + 'author': 'Ziwei Liu 0001', + 'url': 'https://dblp.org/pid/05/6300-1', + 'affiliations': 'Advantech Singapore Pte Ltd, + Singapore; + National University of Singapore, + Department of Computer Science, Singapore' + }, + { + 'author': 'Ziwei Liu 0002', + 'url': 'https://dblp.org/pid/05/6300-2', + 'affiliations': 'Nanyang Technological University, + S-Lab, Singapore; + Chinese University of Hong Kong, + Department of Information Engineering, + Hong Kong' + } + ] + } + """ + url = "https://dblp.org/search/author/api" + params = { + "q": question, + "format": "json", + "h": num_results, + "f": first_hit, + "c": num_completion, + } + search_results = requests_get(url, params) + if isinstance(search_results, str): + return ServiceResponse(ServiceExecStatus.ERROR, search_results) + hits = search_results.get("result", {}).get("hits", {}).get("hit", []) + parsed_data = [] + for hit in hits: + author = hit["info"]["author"] + author_url = hit["info"]["url"] + affiliations = [] + notes = hit["info"].get("notes", {}) + note_entries = notes.get("note", []) + if isinstance(note_entries, dict): + note_entries = [note_entries] + for note in note_entries: + if note["@type"] == "affiliation": + affiliations.append(note["text"]) + affiliations = "; ".join(affiliations) + entry_dict = { + "author": author, + "url": author_url, + "affiliations": affiliations, + } + parsed_data.append(entry_dict) + return ServiceResponse(ServiceExecStatus.SUCCESS, parsed_data) + + +def dblp_search_venues( + question: str, + num_results: int = 30, + first_hit: int = 0, + num_completion: int = 10, +) -> ServiceResponse: + """ + Search for venue information in the DBLP database + via its public API and return structured venue data. + + Args: + question (`str`): + The search query string to look up venues in the DBLP database. + num_results (`int`, defaults to `30`): + The total number of search results to fetch. + firts_hit (`int`, defaults to `0`): + The first hit in the numbered sequence of search results to return + num_completion (`int`, defaults to `10`): + The number of completions to generate for the search query. + + Returns: + `ServiceResponse`: A dictionary containing `status` and `content`. + The `status` attribute is from the ServiceExecStatus enum, + indicating the success or error of the search. + The `content` is a list of parsed venue data if successful, + or an error message if failed. + Each item in the list contains venue information including + its name, acronym, type, and URL. + + Example: + .. code-block:: python + + search_results = dblp_search_venues(question="AAAI", + num_results=1, + results_per_page=1, + num_completion=1) + print(search_results) + + It returns the following structure: + + .. code-block:: python + + { + 'status': , + 'content': [ + { + 'venue': 'AAAI Conference on Artificial Intelligence + (AAAI)', + 'acronym': 'AAAI', + 'type': 'Conference or Workshop', + 'url': 'https://dblp.org/db/conf/aaai/' + }, + { + 'venue': ''AAAI Fall Symposium Series', + 'acronym': 'No acronym available', + 'type': 'Conference or Workshop', + 'url': 'https://dblp.org/db/conf/aaaifs/' + } + ] + } + """ + url = "https://dblp.org/search/venue/api" + params = { + "q": question, + "format": "json", + "h": num_results, + "f": first_hit, + "c": num_completion, + } + search_results = requests_get(url, params) + if isinstance(search_results, str): + return ServiceResponse(ServiceExecStatus.ERROR, search_results) + + hits = search_results.get("result", {}).get("hits", {}).get("hit", []) + parsed_data = [] + for hit in hits: + venue = hit["info"]["venue"] + acronym = hit["info"].get("acronym", "No acronym available") + venue_type = hit["info"].get("type", "Type not specified") + url = hit["info"]["url"] + entry_dict = { + "venue": venue, + "acronym": acronym, + "type": venue_type, + "url": url, + } + parsed_data.append(entry_dict) + return ServiceResponse(ServiceExecStatus.SUCCESS, parsed_data) From b7c8f646792d84185c5f646008c0c603af78b071 Mon Sep 17 00:00:00 2001 From: HONGYI001 Date: Mon, 29 Apr 2024 18:37:51 +0800 Subject: [PATCH 2/6] move dblp_search function to a seperate file, also add it to the __init__.py --- src/agentscope/service/__init__.py | 8 + src/agentscope/service/web/dblp.py | 330 ++++++++++++++++++++++++ src/agentscope/service/web/search.py | 369 +-------------------------- 3 files changed, 339 insertions(+), 368 deletions(-) create mode 100644 src/agentscope/service/web/dblp.py diff --git a/src/agentscope/service/__init__.py b/src/agentscope/service/__init__.py index b52023514..d5ed90cd1 100644 --- a/src/agentscope/service/__init__.py +++ b/src/agentscope/service/__init__.py @@ -21,6 +21,11 @@ from .sql_query.mongodb import query_mongodb from .web.search import bing_search, google_search from .web.arxiv import arxiv_search +from .web.dblp import ( + dblp_search_publications, + dblp_search_authors, + dblp_search_venues, +) from .service_response import ServiceResponse from .service_factory import ServiceFactory from .retrieval.similarity import cos_sim @@ -69,4 +74,7 @@ def get_help() -> None: "load_web", "parse_html_to_text", "download_from_url", + "dblp_search_publications", + "dblp_search_authors", + "dblp_search_venues", ] diff --git a/src/agentscope/service/web/dblp.py b/src/agentscope/service/web/dblp.py new file mode 100644 index 000000000..770803a46 --- /dev/null +++ b/src/agentscope/service/web/dblp.py @@ -0,0 +1,330 @@ +# -*- coding: utf-8 -*- +""" Search papers, authors and venues in DBLP API. +For detail usage of the DBLP API +please refer to https://dblp.org/faq/How+can+I+fetch+DBLP+data.html +""" +from agentscope.service.service_response import ( + ServiceResponse, + ServiceExecStatus, +) +from agentscope.utils.common import requests_get + + +def dblp_search_publications( + question: str, + num_results: int = 30, + first_hit: int = 0, + num_completion: int = 10, +) -> ServiceResponse: + """ + Search publications in the DBLP database + via its public API and return structured + publication data. + + Args: + question (`str`): + The search query string to look up + in the DBLP database. + num_results (`int`, defaults to `30`): + The number of search results to fetch. + firts_hit (`int`, defaults to `0`): + The first hit in the numbered sequence + of search results to return + num_completion (`int`, defaults to `10`): + The number of completions to generate + for the search query. + + Returns: + `ServiceResponse`: A dictionary containing `status` and `content`. + The `status` attribute is from the ServiceExecStatus enum, + indicating success or error. + The `content` is a list of parsed publication data if successful, + or an error message if failed. + Each item in the list contains publication information + includes title, authors, venue, pages, year, type, DOI, and URL. + + Example: + .. code-block:: python + search_results = dblp_search_publications(question="Extreme + Learning Machine", + num_results=3, + results_per_page=1, + num_completion=1) + print(search_results) + + It returns the following structure: + + .. code-block:: python + + { + 'status': , + 'content': [ + { + 'title': 'Power transformers fault diagnosis + based on a meta-learning approach to kernel + extreme learning machine with opposition-based + learning sparrow search algorithm.', + 'venue': 'J. Intell. Fuzzy Syst.', + 'pages': '455-466', + 'year': '2023', + 'type': 'Journal Articles', + 'doi': '10.3233/JIFS-211862', + 'url': 'https://dblp.org/rec/journals/jifs/YuTZTCH23', + 'authors': 'Song Yu, Weimin Tan, Chengming Zhang, + Chao Tang, Lihong Cai, Dong Hu' + }, + { + 'title': 'Performance comparison of Extreme Learning + Machinesand other machine learning methods + on WBCD data set.', + 'venue': 'SIU', + 'pages': '1-4', + 'year': '2021', + 'type': 'Conference and Workshop Papers', + 'doi': '10.1109/SIU53274.2021.9477984', + 'url': 'https://dblp.org/rec/conf/siu/KeskinDAY21', + 'authors': 'Ömer Selim Keskin, Akif Durdu, + Muhammet Fatih Aslan, Abdullah Yusefi' + } + ] + } + """ + + url = "https://dblp.org/search/publ/api" + params = { + "q": question, + "format": "json", + "h": num_results, + "f": first_hit, + "c": num_completion, + } + search_results = requests_get(url, params) + + if isinstance(search_results, str): + return ServiceResponse(ServiceExecStatus.ERROR, search_results) + + hits = search_results.get("result", {}).get("hits", {}).get("hit", []) + parsed_data = [] + for hit in hits: + info = hit.get("info", {}) + title = info.get("title", "No title available") + venue = info.get("venue", "No venue available") + pages = info.get("pages", "No page information") + year = info.get("year", "Year not specified") + pub_type = info.get("type", "Type not specified") + doi = info.get("doi", "No DOI available") + url = info.get("url", "No URL available") + authors = info.get("authors", {}).get("author", []) + authors_info = info.get("authors", {}).get("author", []) + if isinstance( + authors_info, + dict, + ): # Check if there's only one author in a dict format + authors_info = [authors_info] + authors = ", ".join( + [author["text"] for author in authors_info if "text" in author], + ) + data = { + "title": title, + "venue": venue, + "pages": pages, + "year": year, + "type": pub_type, + "doi": doi, + "url": url, + "authors": authors, + } + parsed_data.append(data) + return ServiceResponse(ServiceExecStatus.SUCCESS, parsed_data) + + +def dblp_search_authors( + question: str, + num_results: int = 30, + first_hit: int = 0, + num_completion: int = 10, +) -> ServiceResponse: + """ + Search for author information in the DBLP database + via its public API and return structured author data. + + Args: + question (`str`): + The search query string to look up + authors in the DBLP database. + num_results (`int`, defaults to `30`): + The total number of search results to fetch. + firts_hit (`int`, defaults to `0`): + The first hit in the numbered sequence + of search results to return + num_completion (`int`, defaults to `10`): + The number of completions to generate + for the search query. + + Returns: + `ServiceResponse`: A dictionary containing `status` and `content`. + The `status` attribute is from the + ServiceExecStatus enum, indicating the success or error of the search. + The `content` is a list of parsed author + data if successful, or an error message if failed. + Each item in the list contains author information + including their name, URL, and affiliations. + + Example: + .. code-block:: python + + search_results = dblp_search_authors(question="Liu ZiWei", + num_results=3, + results_per_page=1, + num_completion=1) + print(search_results) + + It returns the following structure: + + .. code-block:: python + + { + 'status': , + 'content': [ + { + 'author': 'Ziwei Liu 0001', + 'url': 'https://dblp.org/pid/05/6300-1', + 'affiliations': 'Advantech Singapore Pte Ltd, + Singapore; + National University of Singapore, + Department of Computer Science, Singapore' + }, + { + 'author': 'Ziwei Liu 0002', + 'url': 'https://dblp.org/pid/05/6300-2', + 'affiliations': 'Nanyang Technological University, + S-Lab, Singapore; + Chinese University of Hong Kong, + Department of Information Engineering, + Hong Kong' + } + ] + } + """ + url = "https://dblp.org/search/author/api" + params = { + "q": question, + "format": "json", + "h": num_results, + "f": first_hit, + "c": num_completion, + } + search_results = requests_get(url, params) + if isinstance(search_results, str): + return ServiceResponse(ServiceExecStatus.ERROR, search_results) + hits = search_results.get("result", {}).get("hits", {}).get("hit", []) + parsed_data = [] + for hit in hits: + author = hit["info"]["author"] + author_url = hit["info"]["url"] + affiliations = [] + notes = hit["info"].get("notes", {}) + note_entries = notes.get("note", []) + if isinstance(note_entries, dict): + note_entries = [note_entries] + for note in note_entries: + if note["@type"] == "affiliation": + affiliations.append(note["text"]) + affiliations = "; ".join(affiliations) + entry_dict = { + "author": author, + "url": author_url, + "affiliations": affiliations, + } + parsed_data.append(entry_dict) + return ServiceResponse(ServiceExecStatus.SUCCESS, parsed_data) + + +def dblp_search_venues( + question: str, + num_results: int = 30, + first_hit: int = 0, + num_completion: int = 10, +) -> ServiceResponse: + """ + Search for venue information in the DBLP database + via its public API and return structured venue data. + + Args: + question (`str`): + The search query string to look up venues in the DBLP database. + num_results (`int`, defaults to `30`): + The total number of search results to fetch. + firts_hit (`int`, defaults to `0`): + The first hit in the numbered sequence of search results to return + num_completion (`int`, defaults to `10`): + The number of completions to generate for the search query. + + Returns: + `ServiceResponse`: A dictionary containing `status` and `content`. + The `status` attribute is from the ServiceExecStatus enum, + indicating the success or error of the search. + The `content` is a list of parsed venue data if successful, + or an error message if failed. + Each item in the list contains venue information including + its name, acronym, type, and URL. + + Example: + .. code-block:: python + + search_results = dblp_search_venues(question="AAAI", + num_results=1, + results_per_page=1, + num_completion=1) + print(search_results) + + It returns the following structure: + + .. code-block:: python + + { + 'status': , + 'content': [ + { + 'venue': 'AAAI Conference on Artificial Intelligence + (AAAI)', + 'acronym': 'AAAI', + 'type': 'Conference or Workshop', + 'url': 'https://dblp.org/db/conf/aaai/' + }, + { + 'venue': ''AAAI Fall Symposium Series', + 'acronym': 'No acronym available', + 'type': 'Conference or Workshop', + 'url': 'https://dblp.org/db/conf/aaaifs/' + } + ] + } + """ + url = "https://dblp.org/search/venue/api" + params = { + "q": question, + "format": "json", + "h": num_results, + "f": first_hit, + "c": num_completion, + } + search_results = requests_get(url, params) + if isinstance(search_results, str): + return ServiceResponse(ServiceExecStatus.ERROR, search_results) + + hits = search_results.get("result", {}).get("hits", {}).get("hit", []) + parsed_data = [] + for hit in hits: + venue = hit["info"]["venue"] + acronym = hit["info"].get("acronym", "No acronym available") + venue_type = hit["info"].get("type", "Type not specified") + url = hit["info"]["url"] + entry_dict = { + "venue": venue, + "acronym": acronym, + "type": venue_type, + "url": url, + } + parsed_data.append(entry_dict) + return ServiceResponse(ServiceExecStatus.SUCCESS, parsed_data) diff --git a/src/agentscope/service/web/search.py b/src/agentscope/service/web/search.py index 6973eb14f..b5ff7e59f 100644 --- a/src/agentscope/service/web/search.py +++ b/src/agentscope/service/web/search.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """Search question in the web""" -from typing import Any, Literal +from typing import Any from agentscope.service.service_response import ServiceResponse from agentscope.utils.common import requests_get from agentscope.service.service_status import ServiceExecStatus @@ -193,370 +193,3 @@ def google_search( for result in results ], ) - - -def dblp_search( - search_type: Literal["publication", "author", "venue"], - question: str, - num_results: int = 30, - first_hit: int = 0, - num_completion: int = 10, -) -> ServiceResponse: - """ - Search DBLP database based on the type specified. - - Args: - search_type (`Literal["publication", "author", "venue"]`): - Type of search to perform, options are - "publication", "author", or "venue". - question (`str`): - The search query string. - num_results (`int`, defaults to `30`): - The total number of search results to fetch. - firts_hit (`int`, defaults to `0`): - The first hit in the numbered sequence of search results to return - num_completion (`int`, defaults to `10`): - The number of completions to generate for the search query. - - Returns: - `ServiceResponse`: Depending on the type, - the response structure will vary. - The detailed documentation will adjust based on the type parameter. - """ - mapping = { - "publication": dblp_search_publications, - "author": dblp_search_authors, - "venue": dblp_search_venues, - } - if search_type not in mapping: - raise ValueError( - f"Invalid type: {type}. Must be one of {list(mapping.keys())}.", - ) - selected_function = mapping[search_type] - dblp_search.__doc__ = selected_function.__doc__ - return selected_function( - question, - num_results, - first_hit, - num_completion, - ) - - -def dblp_search_publications( - question: str, - num_results: int = 30, - first_hit: int = 0, - num_completion: int = 10, -) -> ServiceResponse: - """ - Search publications in the DBLP database - via its public API and return structured - publication data. - - Args: - question (`str`): - The search query string to look up - in the DBLP database. - num_results (`int`, defaults to `30`): - The number of search results to fetch. - firts_hit (`int`, defaults to `0`): - The first hit in the numbered sequence - of search results to return - num_completion (`int`, defaults to `10`): - The number of completions to generate - for the search query. - - Returns: - `ServiceResponse`: A dictionary containing `status` and `content`. - The `status` attribute is from the ServiceExecStatus enum, - indicating success or error. - The `content` is a list of parsed publication data if successful, - or an error message if failed. - Each item in the list contains publication information - includes title, authors, venue, pages, year, type, DOI, and URL. - - Example: - .. code-block:: python - search_results = dblp_search_publications(question="Extreme - Learning Machine", - num_results=3, - results_per_page=1, - num_completion=1) - print(search_results) - - It returns the following structure: - - .. code-block:: python - - { - 'status': , - 'content': [ - { - 'title': 'Power transformers fault diagnosis - based on a meta-learning approach to kernel - extreme learning machine with opposition-based - learning sparrow search algorithm.', - 'venue': 'J. Intell. Fuzzy Syst.', - 'pages': '455-466', - 'year': '2023', - 'type': 'Journal Articles', - 'doi': '10.3233/JIFS-211862', - 'url': 'https://dblp.org/rec/journals/jifs/YuTZTCH23', - 'authors': 'Song Yu, Weimin Tan, Chengming Zhang, - Chao Tang, Lihong Cai, Dong Hu' - }, - { - 'title': 'Performance comparison of Extreme Learning - Machinesand other machine learning methods - on WBCD data set.', - 'venue': 'SIU', - 'pages': '1-4', - 'year': '2021', - 'type': 'Conference and Workshop Papers', - 'doi': '10.1109/SIU53274.2021.9477984', - 'url': 'https://dblp.org/rec/conf/siu/KeskinDAY21', - 'authors': 'Ömer Selim Keskin, Akif Durdu, - Muhammet Fatih Aslan, Abdullah Yusefi' - } - ] - } - """ - - url = "https://dblp.org/search/publ/api" - params = { - "q": question, - "format": "json", - "h": num_results, - "f": first_hit, - "c": num_completion, - } - search_results = requests_get(url, params) - - if isinstance(search_results, str): - return ServiceResponse(ServiceExecStatus.ERROR, search_results) - - hits = search_results.get("result", {}).get("hits", {}).get("hit", []) - parsed_data = [] - for hit in hits: - info = hit.get("info", {}) - title = info.get("title", "No title available") - venue = info.get("venue", "No venue available") - pages = info.get("pages", "No page information") - year = info.get("year", "Year not specified") - pub_type = info.get("type", "Type not specified") - doi = info.get("doi", "No DOI available") - url = info.get("url", "No URL available") - authors = info.get("authors", {}).get("author", []) - authors_info = info.get("authors", {}).get("author", []) - if isinstance( - authors_info, - dict, - ): # Check if there's only one author in a dict format - authors_info = [authors_info] - authors = ", ".join( - [author["text"] for author in authors_info if "text" in author], - ) - data = { - "title": title, - "venue": venue, - "pages": pages, - "year": year, - "type": pub_type, - "doi": doi, - "url": url, - "authors": authors, - } - parsed_data.append(data) - return ServiceResponse(ServiceExecStatus.SUCCESS, parsed_data) - - -def dblp_search_authors( - question: str, - num_results: int = 30, - first_hit: int = 0, - num_completion: int = 10, -) -> ServiceResponse: - """ - Search for author information in the DBLP database - via its public API and return structured author data. - - Args: - question (`str`): - The search query string to look up - authors in the DBLP database. - num_results (`int`, defaults to `30`): - The total number of search results to fetch. - firts_hit (`int`, defaults to `0`): - The first hit in the numbered sequence - of search results to return - num_completion (`int`, defaults to `10`): - The number of completions to generate - for the search query. - - Returns: - `ServiceResponse`: A dictionary containing `status` and `content`. - The `status` attribute is from the - ServiceExecStatus enum, indicating the success or error of the search. - The `content` is a list of parsed author - data if successful, or an error message if failed. - Each item in the list contains author information - including their name, URL, and affiliations. - - Example: - .. code-block:: python - - search_results = dblp_search_authors(question="Liu ZiWei", - num_results=3, - results_per_page=1, - num_completion=1) - print(search_results) - - It returns the following structure: - - .. code-block:: python - - { - 'status': , - 'content': [ - { - 'author': 'Ziwei Liu 0001', - 'url': 'https://dblp.org/pid/05/6300-1', - 'affiliations': 'Advantech Singapore Pte Ltd, - Singapore; - National University of Singapore, - Department of Computer Science, Singapore' - }, - { - 'author': 'Ziwei Liu 0002', - 'url': 'https://dblp.org/pid/05/6300-2', - 'affiliations': 'Nanyang Technological University, - S-Lab, Singapore; - Chinese University of Hong Kong, - Department of Information Engineering, - Hong Kong' - } - ] - } - """ - url = "https://dblp.org/search/author/api" - params = { - "q": question, - "format": "json", - "h": num_results, - "f": first_hit, - "c": num_completion, - } - search_results = requests_get(url, params) - if isinstance(search_results, str): - return ServiceResponse(ServiceExecStatus.ERROR, search_results) - hits = search_results.get("result", {}).get("hits", {}).get("hit", []) - parsed_data = [] - for hit in hits: - author = hit["info"]["author"] - author_url = hit["info"]["url"] - affiliations = [] - notes = hit["info"].get("notes", {}) - note_entries = notes.get("note", []) - if isinstance(note_entries, dict): - note_entries = [note_entries] - for note in note_entries: - if note["@type"] == "affiliation": - affiliations.append(note["text"]) - affiliations = "; ".join(affiliations) - entry_dict = { - "author": author, - "url": author_url, - "affiliations": affiliations, - } - parsed_data.append(entry_dict) - return ServiceResponse(ServiceExecStatus.SUCCESS, parsed_data) - - -def dblp_search_venues( - question: str, - num_results: int = 30, - first_hit: int = 0, - num_completion: int = 10, -) -> ServiceResponse: - """ - Search for venue information in the DBLP database - via its public API and return structured venue data. - - Args: - question (`str`): - The search query string to look up venues in the DBLP database. - num_results (`int`, defaults to `30`): - The total number of search results to fetch. - firts_hit (`int`, defaults to `0`): - The first hit in the numbered sequence of search results to return - num_completion (`int`, defaults to `10`): - The number of completions to generate for the search query. - - Returns: - `ServiceResponse`: A dictionary containing `status` and `content`. - The `status` attribute is from the ServiceExecStatus enum, - indicating the success or error of the search. - The `content` is a list of parsed venue data if successful, - or an error message if failed. - Each item in the list contains venue information including - its name, acronym, type, and URL. - - Example: - .. code-block:: python - - search_results = dblp_search_venues(question="AAAI", - num_results=1, - results_per_page=1, - num_completion=1) - print(search_results) - - It returns the following structure: - - .. code-block:: python - - { - 'status': , - 'content': [ - { - 'venue': 'AAAI Conference on Artificial Intelligence - (AAAI)', - 'acronym': 'AAAI', - 'type': 'Conference or Workshop', - 'url': 'https://dblp.org/db/conf/aaai/' - }, - { - 'venue': ''AAAI Fall Symposium Series', - 'acronym': 'No acronym available', - 'type': 'Conference or Workshop', - 'url': 'https://dblp.org/db/conf/aaaifs/' - } - ] - } - """ - url = "https://dblp.org/search/venue/api" - params = { - "q": question, - "format": "json", - "h": num_results, - "f": first_hit, - "c": num_completion, - } - search_results = requests_get(url, params) - if isinstance(search_results, str): - return ServiceResponse(ServiceExecStatus.ERROR, search_results) - - hits = search_results.get("result", {}).get("hits", {}).get("hit", []) - parsed_data = [] - for hit in hits: - venue = hit["info"]["venue"] - acronym = hit["info"].get("acronym", "No acronym available") - venue_type = hit["info"].get("type", "Type not specified") - url = hit["info"]["url"] - entry_dict = { - "venue": venue, - "acronym": acronym, - "type": venue_type, - "url": url, - } - parsed_data.append(entry_dict) - return ServiceResponse(ServiceExecStatus.SUCCESS, parsed_data) From 49265c2f79554884569dcec39d4167856d6a5656 Mon Sep 17 00:00:00 2001 From: HONGYI001 Date: Wed, 8 May 2024 14:12:23 +0800 Subject: [PATCH 3/6] small modifications were added to the dblp search documentation in order to pass the test. --- src/agentscope/service/web/dblp.py | 62 ++++++++++++------------------ 1 file changed, 25 insertions(+), 37 deletions(-) diff --git a/src/agentscope/service/web/dblp.py b/src/agentscope/service/web/dblp.py index 770803a46..7d6ab9c1c 100644 --- a/src/agentscope/service/web/dblp.py +++ b/src/agentscope/service/web/dblp.py @@ -13,26 +13,20 @@ def dblp_search_publications( question: str, num_results: int = 30, - first_hit: int = 0, + start: int = 0, num_completion: int = 10, ) -> ServiceResponse: - """ - Search publications in the DBLP database - via its public API and return structured - publication data. + """Search publications in the DBLP database. Args: question (`str`): - The search query string to look up - in the DBLP database. + The search query string. num_results (`int`, defaults to `30`): - The number of search results to fetch. - firts_hit (`int`, defaults to `0`): - The first hit in the numbered sequence - of search results to return + The number of search results to return. + start (`int`, defaults to `0`): + The index of the first search result to return. num_completion (`int`, defaults to `10`): - The number of completions to generate - for the search query. + The number of completions to generate. Returns: `ServiceResponse`: A dictionary containing `status` and `content`. @@ -95,7 +89,7 @@ def dblp_search_publications( "q": question, "format": "json", "h": num_results, - "f": first_hit, + "f": start, "c": num_completion, } search_results = requests_get(url, params) @@ -141,25 +135,21 @@ def dblp_search_publications( def dblp_search_authors( question: str, num_results: int = 30, - first_hit: int = 0, + start: int = 0, num_completion: int = 10, ) -> ServiceResponse: - """ - Search for author information in the DBLP database - via its public API and return structured author data. + """Search for author information in the DBLP database. Args: question (`str`): - The search query string to look up - authors in the DBLP database. + The search query string. num_results (`int`, defaults to `30`): - The total number of search results to fetch. - firts_hit (`int`, defaults to `0`): - The first hit in the numbered sequence - of search results to return + The number of search results to return. + start (`int`, defaults to `0`): + The index of the first search result to return. num_completion (`int`, defaults to `10`): - The number of completions to generate - for the search query. + The number of completions to generate. + Returns: `ServiceResponse`: A dictionary containing `status` and `content`. @@ -211,7 +201,7 @@ def dblp_search_authors( "q": question, "format": "json", "h": num_results, - "f": first_hit, + "f": start, "c": num_completion, } search_results = requests_get(url, params) @@ -243,22 +233,20 @@ def dblp_search_authors( def dblp_search_venues( question: str, num_results: int = 30, - first_hit: int = 0, + start: int = 0, num_completion: int = 10, ) -> ServiceResponse: - """ - Search for venue information in the DBLP database - via its public API and return structured venue data. + """Search for venue information in the DBLP database. Args: question (`str`): - The search query string to look up venues in the DBLP database. + The search query string. num_results (`int`, defaults to `30`): - The total number of search results to fetch. - firts_hit (`int`, defaults to `0`): - The first hit in the numbered sequence of search results to return + The number of search results to return. + start (`int`, defaults to `0`): + The index of the first search result to return. num_completion (`int`, defaults to `10`): - The number of completions to generate for the search query. + The number of completions to generate. Returns: `ServiceResponse`: A dictionary containing `status` and `content`. @@ -306,7 +294,7 @@ def dblp_search_venues( "q": question, "format": "json", "h": num_results, - "f": first_hit, + "f": start, "c": num_completion, } search_results = requests_get(url, params) From f9304425f9e490346a3c64e568acf15e65352d4d Mon Sep 17 00:00:00 2001 From: HONGYI001 Date: Thu, 9 May 2024 14:13:26 +0800 Subject: [PATCH 4/6] modified documentation (both zh-cn and en) --- docs/sphinx_doc/en/source/tutorial/204-service.md | 5 ++++- docs/sphinx_doc/zh_CN/source/tutorial/204-service.md | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/sphinx_doc/en/source/tutorial/204-service.md b/docs/sphinx_doc/en/source/tutorial/204-service.md index 826b1fec6..78b70a1d6 100644 --- a/docs/sphinx_doc/en/source/tutorial/204-service.md +++ b/docs/sphinx_doc/en/source/tutorial/204-service.md @@ -26,7 +26,10 @@ The following table outlines the various Service functions by type. These functi | | `arxiv_search` | Perform arXiv search | | | `download_from_url` | Download file from given URL. | | | `load_web` | Load and parse the web page of the specified url (currently only supports HTML). | -| | `digest_webpage` | Digest the content of a already loaded web page (currently only supports HTML). | +| | `digest_webpage` | Digest the content of a already loaded web page (currently only supports HTML). +| | `dblp_search_publications` | Search publications in the DBLP database +| | `dblp_search_authors` | Search for author information in the DBLP database | +| | `dblp_search_venues` | Search for venue information in the DBLP database | | File | `create_file` | Create a new file at a specified path, optionally with initial content. | | | `delete_file` | Delete a file specified by a file path. | | | `move_file` | Move or rename a file from one path to another. | diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md b/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md index e3938d1db..e9b5e973a 100644 --- a/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md +++ b/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md @@ -23,7 +23,10 @@ | | `arxiv_search` | 使用arxiv搜索。 | | | `download_from_url` | 从指定的 URL 下载文件。 | | | `load_web` | 爬取并解析指定的网页链接 (目前仅支持爬取 HTML 页面) | -| | `digest_webpage` | 对已经爬取好的网页生成摘要信息(目前仅支持 HTML 页面) | +| | `digest_webpage` | 对已经爬取好的网页生成摘要信息(目前仅支持 HTML 页面 +| | `dblp_search_publications` | 在dblp数据库里搜索文献。 +| | `dblp_search_authors` | 在dblp数据库里搜索作者。 | +| | `dblp_search_venues` | 在dblp数据库里搜索期刊,会议及研讨会。 | | 文件处理 | `create_file` | 在指定路径创建一个新文件,并可选择添加初始内容。 | | | `delete_file` | 删除由文件路径指定的文件。 | | | `move_file` | 将文件从一个路径移动或重命名到另一个路径。 | From be06ce1a8e104b862e2d828ec21976aa3f328712 Mon Sep 17 00:00:00 2001 From: HONGYI001 Date: Thu, 9 May 2024 14:16:34 +0800 Subject: [PATCH 5/6] resolve conflicts in service/__init__.py --- src/agentscope/service/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/agentscope/service/__init__.py b/src/agentscope/service/__init__.py index d5ed90cd1..c306bd243 100644 --- a/src/agentscope/service/__init__.py +++ b/src/agentscope/service/__init__.py @@ -77,4 +77,6 @@ def get_help() -> None: "dblp_search_publications", "dblp_search_authors", "dblp_search_venues", + # to be deprecated + "ServiceFactory", ] From 3a3b17fc1fbaa526c0eea87fd336dfbd66612c28 Mon Sep 17 00:00:00 2001 From: HONGYI001 Date: Thu, 9 May 2024 14:26:44 +0800 Subject: [PATCH 6/6] black format --- src/agentscope/service/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/agentscope/service/__init__.py b/src/agentscope/service/__init__.py index 3f808bcfc..67cb7ce73 100644 --- a/src/agentscope/service/__init__.py +++ b/src/agentscope/service/__init__.py @@ -75,7 +75,6 @@ def get_help() -> None: "load_web", "parse_html_to_text", "download_from_url", - "dblp_search_publications", "dblp_search_authors", "dblp_search_venues",