From e8d445ace68b5c10a05589b8a6a4c762d42b3544 Mon Sep 17 00:00:00 2001
From: PengfeiHePower <billhe@yahoo.com>
Date: Wed, 26 Jun 2024 17:26:26 -0400
Subject: [PATCH 01/16] add wiki retriever

---
 src/agentscope/service/web/wiki.py | 483 +++++++++++++++++++++++++++++
 tests/wiki_test.py                 | 430 +++++++++++++++++++++++++
 2 files changed, 913 insertions(+)
 create mode 100644 src/agentscope/service/web/wiki.py
 create mode 100644 tests/wiki_test.py

diff --git a/src/agentscope/service/web/wiki.py b/src/agentscope/service/web/wiki.py
new file mode 100644
index 000000000..24b6198b9
--- /dev/null
+++ b/src/agentscope/service/web/wiki.py
@@ -0,0 +1,483 @@
+"""Search contents from WikiPedia, including texts, categories, infotable, table,..."""
+import requests
+import json
+from bs4 import BeautifulSoup
+import re
+
+from agentscope.service.service_response import (
+    ServiceResponse,
+    ServiceExecStatus,
+)
+from agentscope.utils.common import requests_get
+
+def get_category_members(
+    entity: str, 
+    max_members: int=1000, 
+    limit_per_request: int=500
+    ) -> ServiceResponse:
+    """Function to retrieve category members from Wikipedia:Category pages
+    
+    Args:
+        entity (str): searching keywords 
+        max_members (int): maximum number of members to output
+        limit_per_request (int): number of members retrieved per quest
+    
+    Returns:
+        `ServiceResponse`: A dictionary containing `status` and `content`.
+        The `status` attribute is from the ServiceExecStatus enum,
+        indicating success or error.
+        If the entity does not exist, `status`=ERROR and return top-5 similar entities in `content`.
+        If the entity exists, `status`=SUCCESS, and return `content` as a list of dicts.
+        Keys of each dict:
+            
+            "pageid": unique page ID for the member
+            
+            "ns": namespace for the member, indicating if the corresponding page is Article/User/... See https://en.wikipedia.org/wiki/Wikipedia:Namespace for details.
+            
+            "title": title of the member
+            
+        Example:
+        
+        .. code-block:: python
+            members = get_category_members("Machine_learning", max_members=10)
+            print(members)
+        
+        It returns contents:
+
+        .. code-block:: python
+            {
+                'status': <ServiceExecStatus.SUCCESS: 1>,
+                'content': [{'pageid': 67911196, 'ns': 0, 'title': 'Bayesian learning mechanisms'},
+                            {'pageid': 233488, 'ns': 0, 'title': 'Machine learning'},
+                            {'pageid': 53587467, 'ns': 0, 'title': 'Outline of machine learning'},
+                            {'pageid': 64439717, 'ns': 0, 'title': '80 Million Tiny Images'},
+                            {'pageid': 75530149, 'ns': 0, 'title': 'Accelerated Linear Algebra'}]
+            
+            }
+    
+    """
+    url = "https://en.wikipedia.org/w/api.php"
+    params = {
+        'action': 'query',
+        'list': 'categorymembers',
+        'cmtitle': f'Category:{entity}',
+        'cmlimit': limit_per_request,  # Maximum number of results per request
+        'format': 'json'
+    }
+
+    members = []
+    total_fetched = 0
+
+    while total_fetched < max_members:
+        data = requests_get(url, params=params)
+        batch_members = data['query']['categorymembers']
+        members.extend(batch_members)
+        total_fetched += len(batch_members)
+
+        # Check if there is a continuation token
+        if 'continue' in data and total_fetched < max_members:
+            params['cmcontinue'] = data['continue']['cmcontinue']
+        else:
+            break
+    
+    # If more members were fetched than max_members, trim the list
+    if len(members) > max_members:
+        members = members[:max_members]
+    
+    if len(members) > 0:
+        return ServiceResponse(ServiceExecStatus.SUCCESS, members)
+    else:
+        return ServiceResponse(ServiceExecStatus.ERROR, members)
+    
+
+def get_infobox(
+    entity: str
+    ) -> ServiceResponse:
+    """
+    Function to retrieve InfoBox from the WikiPedia page
+    
+    Args:
+        entity (str): searching keywords 
+    
+    Returns:
+        `ServiceResponse`: A dictionary containing `status` and `content`.
+        The `status` attribute is from the ServiceExecStatus enum,
+        indicating success or error.
+        If the entity does not exist, `status`=ERROR and return top-5 similar entities in `content`.
+        If the entity exists, `status`=SUCCESS, and return `content` as a dict containing information in the InfoBox.
+        
+    Example:
+    
+    .. code-block:: python
+        infobox_data = get_infobox(entity="Python (programming language)")
+        print(infobox_data)
+        
+    It returns content:
+    
+    .. code-block:: python
+    {
+        'status': <ServiceExecStatus.SUCCESS: 1>,
+        'content':  {'Paradigm': 'Multi-paradigm : object-oriented , [1] procedural ( imperative ), functional , structured , reflective', 
+                    'Designed\xa0by': 'Guido van Rossum', 
+                    'Developer': 'Python Software Foundation', 
+                    'First\xa0appeared': '20\xa0February 1991 ; 33 years ago ( 1991-02-20 ) [2]', 
+                    'Stable release': '3.12.4 / 6 June 2024 ; 14 days ago ( 6 June 2024 )', 
+                    'Typing discipline': 'duck , dynamic , strong ; [3] optional type annotations (since 3.5, but those hints are ignored, except with unofficial tools) [4]', 
+                    'OS': 'Tier 1 : 64-bit Linux , macOS ; 64- and 32-bit Windows 10+ [5] Tier 2 : E.g. 32-bit WebAssembly (WASI) Tier 3 : 64-bit FreeBSD , iOS ; e.g. Raspberry Pi OS Unofficial (or has been known to work): Other Unix-like / BSD variants and e.g. Android 5.0+ (official from Python 3.13 planned [6] ) and a few other platforms [7] [8] [9]', 
+                    'License': 'Python Software Foundation License', 
+                    'Filename extensions': '.py, .pyw, .pyz, [10] .pyi, .pyc, .pyd', 
+                    'Website': 'python.org'}
+    }
+    """
+    
+    url = "https://en.wikipedia.org/w/api.php"
+    
+    # Step 1: Check if the entity exists
+    search_params = {
+        'action': 'query',
+        'list': 'search',
+        'srsearch': entity,
+        'format': 'json'
+    }
+    
+    search_data = requests_get(url, params=search_params)
+    
+    if 'query' in search_data and search_data['query']['search']:
+        # Check if the exact title exists
+        exact_match = None
+        for result in search_data['query']['search']:
+            if result['title'].lower() == entity.lower():
+                exact_match = result['title']
+                break
+        if not exact_match:
+            similar_entities = [result['title'] for result in search_data['query']['search'][:5]]
+            return ServiceResponse(ServiceExecStatus.ERROR, f"Entity not found. Here are similar entities:{similar_entities}")
+
+        entity = exact_match
+
+        # Step 2: Fetch the infobox content if the entity exists
+        parse_params = {
+            'action': 'parse',
+            'page': entity,
+            'prop': 'text',
+            'format': 'json'
+        }
+        
+        parse_data = requests_get(url, params=parse_params)
+        
+        if 'parse' in parse_data:
+            raw_html = parse_data['parse']['text']['*']
+            soup = BeautifulSoup(raw_html, 'html.parser')
+            infobox = soup.find('table', {'class': 'infobox'})
+            
+            if not infobox:
+                return ServiceResponse(ServiceExecStatus.ERROR, None)
+
+            infobox_data = {}
+            for row in infobox.find_all('tr'):
+                header = row.find('th')
+                value = row.find('td')
+                if header and value:
+                    key = header.get_text(" ", strip=True)
+                    val = value.get_text(" ", strip=True)
+                    infobox_data[key] = val
+
+            return ServiceResponse(ServiceExecStatus.SUCCESS, infobox_data)
+        else:
+            error_message = parse_data.get('error', {}).get('info', 'Unknown error occurred')
+            return ServiceResponse(ServiceExecStatus.ERROR, {"error": error_message})
+    else:
+        return ServiceResponse(ServiceExecStatus.ERROR, {"error": "Entity not found"})
+
+
+def get_page_content_by_paragraph(
+    entity: str, 
+    max_paragraphs: int=1
+    ) -> ServiceResponse:
+    """
+    Retrieve content from a Wikipedia page and split it into paragraphs,
+    excluding section headers.
+
+    Args:
+        entity (str): search word.
+        max_paragraphs (int, optional): The maximum number of paragraphs to retrieve. Default is None (retrieve all paragraphs).
+
+    Returns:
+        `ServiceResponse`: A dictionary containing `status` and `content`.
+        The `status` attribute is from the ServiceExecStatus enum,
+        indicating success or error.
+        If the entity does not exist, `status`=ERROR and return top-5 similar entities in `content`.
+        If the entity exists, `status`=SUCCESS, and return `content` as a list of paragraphs from the Wikipedia page.
+        
+    Example:
+    
+        .. code-block:: python
+            wiki_paragraph = get_page_content_by_paragraph(entity="Python (programming language)", max_paragraphs=1)
+            print(wiki_paragraph)
+        
+        It will return content:
+        .. code-block:: python
+            {
+                'status': <ServiceExecStatus.SUCCESS: 1>, 
+                'content': ['Python is a high-level, general-purpose programming...']
+            }
+
+    """
+    url = "https://en.wikipedia.org/w/api.php"
+    
+    # Step 1: Check if the entity exists
+    search_params = {
+        'action': 'query',
+        'list': 'search',
+        'srsearch': entity,
+        'format': 'json'
+    }
+    
+    search_data = requests_get(url, params=search_params)
+    
+    if 'query' in search_data and search_data['query']['search']:
+        # Check if the exact title exists
+        exact_match = None
+        for result in search_data['query']['search']:
+            if result['title'].lower() == entity.lower():
+                exact_match = result['title']
+                break
+        if not exact_match:
+            similar_entities = [result['title'] for result in search_data['query']['search'][:5]]
+            return ServiceResponse(ServiceExecStatus.ERROR, f"Entity not found. Here are similar entities: {similar_entities}")
+
+        entity = exact_match
+
+        # Step 2: Fetch the page content if the entity exists
+        params = {
+            'action': 'query',
+            'prop': 'extracts',
+            'explaintext': True,
+            'titles': entity,
+            'format': 'json'
+        }
+
+        data = requests_get(url, params=params)
+        page = next(iter(data['query']['pages'].values()))
+        content = page.get('extract', 'No content found.')
+        if content == 'No content found.':
+            return ServiceResponse(ServiceExecStatus.ERROR, content)
+        
+        # Split content into paragraphs and filter out headers
+        paragraphs = [para.strip() for para in content.split('\n\n') if not re.match(r'^\s*==.*==\s*$', para) and para.strip() != '']
+        
+        # Return the specified number of paragraphs
+        if max_paragraphs:
+            paragraphs = paragraphs[:max_paragraphs]
+        
+        return ServiceResponse(ServiceExecStatus.SUCCESS, paragraphs)
+    else:
+        return ServiceResponse(ServiceExecStatus.ERROR, {"error": "Entity not found"})
+
+def get_all_wikipedia_tables(
+    entity: str
+    ) -> ServiceResponse:
+    """
+    Retrieve tables on the Wikipedia page
+    
+    Args:
+        entity (str): search word.
+        
+    Returns:
+        `ServiceResponse`: A dictionary containing `status` and `content`.
+        The `status` attribute is from the ServiceExecStatus enum,
+        indicating success or error.
+        If the entity does not exist, `status`=ERROR and return top-5 similar entities in `content`.
+        If the entity exists, `status`=SUCCESS, and return `content` as a list of tables from the Wikipedia page.
+        Each table is presented as a dict, where key is the column name and value is the values for each column.
+        
+    Example:
+    
+        .. code-block:: python
+            wiki_table = get_all_wikipedia_tables(entity="Python (programming language)")
+            print(wiki_table)
+        
+        It will return content:
+        .. code-block:: python
+            {
+                'status': <ServiceExecStatus.SUCCESS: 1>,
+                'content': [
+                            {
+                                'Type': ['bool','bytearray','bytes','complex',...],
+                                'Mutability': ['immutable','mutable','immutable','immutable',...],
+                                ...
+                            }
+                           ]
+            }
+
+    """
+    url = "https://en.wikipedia.org/w/api.php"
+    
+    # Step 1: Check if the entity exists
+    search_params = {
+        'action': 'query',
+        'list': 'search',
+        'srsearch': entity,
+        'format': 'json'
+    }
+    
+    search_response = requests_get(url, params=search_params)
+    search_data = search_response
+    
+    if 'query' in search_data and search_data['query']['search']:
+        # Check if the exact title exists
+        exact_match = None
+        for result in search_data['query']['search']:
+            if result['title'].lower() == entity.lower():
+                exact_match = result['title']
+                break
+        if not exact_match:
+            similar_entities = [result['title'] for result in search_data['query']['search'][:5]]
+            return ServiceResponse(ServiceExecStatus.ERROR, f"Entity not found. Here are similar entities:{similar_entities}")
+
+        entity = exact_match
+
+        # Step 2: Fetch the page content if the entity exists
+        params = {
+            'action': 'parse',
+            'page': entity,
+            'prop': 'text',
+            'format': 'json'
+        }
+
+        data = requests_get(url, params=params)
+        raw_html = data['parse']['text']['*']
+
+        soup = BeautifulSoup(raw_html, 'html.parser')
+        tables = soup.find_all('table', {'class': 'wikitable'})
+        
+        if not tables:
+            return ServiceResponse(ServiceExecStatus.ERROR, None)
+
+        all_tables_data = []
+        for table_index, table in enumerate(tables):
+            headers = [header.get_text(strip=True) for header in table.find_all('th')]
+            table_dict = {header: [] for header in headers}
+
+            for row in table.find_all('tr')[1:]:  # Skip the header row
+                cells = row.find_all(['td', 'th'])
+                if len(cells) == len(headers):  # Ensure the row has the correct number of cells
+                    for i, cell in enumerate(cells):
+                        table_dict[headers[i]].append(cell.get_text(strip=True))
+            
+            all_tables_data.append(table_dict)
+
+        return ServiceResponse(ServiceExecStatus.SUCCESS, all_tables_data)
+    else:
+        return ServiceResponse(ServiceExecStatus.ERROR, {"error": "Entity not found"})
+
+
+def get_page_images_with_captions(
+    entity: str
+    ) -> ServiceResponse:
+    """
+    Function to retrive images and details on the Wikipedia page
+    
+    Args:
+        entity (str): search word.
+        
+    Returns:
+        `ServiceResponse`: A dictionary containing `status` and `content`.
+        The `status` attribute is from the ServiceExecStatus enum,
+        indicating success or error.
+        If the entity does not exist, `status`=ERROR and return top-5 similar entities in `content`.
+        If the entity exists, `status`=SUCCESS and return the `content` as a list of dict from the Wikipedia page.
+        
+        Each dict has:
+        'title': title of the image
+        'url': link to the image
+        'caption': caption of the image
+    
+    Example:
+        .. code-block:: python
+            wiki_images = get_page_images_with_captions(entity="Python (programming language)")
+            print(wiki_images)
+        
+        It will return:
+        
+        .. code-block:: python
+            {
+                'status': <ServiceExecStatus.SUCCESS: 1>,
+                'content': [{
+                            'title': 'File:Commons-logo.svg',
+                            'url': 'https://upload.wikimedia.org/wikipedia/en/4/4a/Commons-logo.svg',
+                            'caption': 'The Wikimedia Commons logo, SVG version.'},
+                            ...
+                            ]
+            }
+    """
+    
+    url = "https://en.wikipedia.org/w/api.php"
+    
+    # Step 1: Check if the entity exists
+    search_params = {
+        'action': 'query',
+        'list': 'search',
+        'srsearch': entity,
+        'format': 'json'
+    }
+    
+    search_response = requests_get(url, params=search_params)
+    search_data = search_response
+    
+    if 'query' in search_data and search_data['query']['search']:
+        # Check if the exact title exists
+        exact_match = None
+        for result in search_data['query']['search']:
+            if result['title'].lower() == entity.lower():
+                exact_match = result['title']
+                break
+        if not exact_match:
+            similar_entities = [result['title'] for result in search_data['query']['search'][:5]]
+            return ServiceResponse(ServiceExecStatus.ERROR, {"similar_entities": similar_entities})
+
+        entity = exact_match
+
+        # Step 2: Get the list of images
+        params = {
+            'action': 'query',
+            'prop': 'images',
+            'titles': entity,
+            'format': 'json'
+        }
+        data = requests_get(url, params=params)
+        page = next(iter(data['query']['pages'].values()))
+        images = page.get('images', [])
+        if len(images) == 0:
+            return ServiceResponse(ServiceExecStatus.ERROR, None)
+
+        # Step 3: Get details for each image
+        image_details = []
+        for image in images:
+            image_title = image['title']
+            params = {
+                'action': 'query',
+                'titles': image_title,
+                'prop': 'imageinfo',
+                'iiprop': 'url|extmetadata',
+                'format': 'json'
+            }
+            response = requests.get(url, params=params)
+            data = response.json()
+            image_page = next(iter(data['query']['pages'].values()))
+            if 'imageinfo' in image_page:
+                image_info = image_page['imageinfo'][0]
+                image_url = image_info.get('url', '')
+                extmetadata = image_info.get('extmetadata', {})
+                caption = extmetadata.get('ImageDescription', {}).get('value', 'No caption available')
+                image_details.append({
+                    'title': image_title,
+                    'url': image_url,
+                    'caption': caption
+                })
+
+        return ServiceResponse(ServiceExecStatus.SUCCESS, image_details)
+    else:
+        return ServiceResponse(ServiceExecStatus.ERROR, {"error": "Entity not found"})
+
+
diff --git a/tests/wiki_test.py b/tests/wiki_test.py
new file mode 100644
index 000000000..9481d45c2
--- /dev/null
+++ b/tests/wiki_test.py
@@ -0,0 +1,430 @@
+"""Wiki retriever test."""
+import unittest
+from unittest.mock import Mock, patch, MagicMock
+
+from agentscope.service import ServiceResponse
+from agentscope.service import get_category_members, get_infobox, get_page_content_by_paragraph, get_all_wikipedia_tables, get_page_images_with_captions
+from agentscope.service.service_status import ServiceExecStatus
+
+class TestWiki(unittest.TestCase):
+    """ExampleTest for a unit test."""
+    
+    @patch("agentscope.utils.common.requests.get")
+    def test_get_category_members(self, mock_get: MagicMock) -> None:
+        """Test test_get_category_members"""
+        mock_response = Mock()
+        mock_dict = {
+            'query': {
+                'categorymembers': [{
+                    'pageid': 20, 
+                    'ns': 0, 
+                    'title': 'This is a test'
+                    }]
+                }
+            }
+        
+        expected_result = ServiceResponse(
+            status=ServiceExecStatus.SUCCESS,
+            content=[
+                {'pageid': 20, 
+                 'ns': 0, 
+                 'title': 'This is a test'
+                 }
+                ]
+        )
+        
+        mock_response.json.return_value = mock_dict
+        mock_get.return_value = mock_response
+        
+        test_entity = "Test"
+        max_members=1
+        limit_per_request=100
+        params = {
+        'action': 'query',
+        'list': 'categorymembers',
+        'cmtitle': f'Category:{test_entity}',
+        'cmlimit': limit_per_request,  # Maximum number of results per request
+        'format': 'json'
+    }
+        
+        results = get_category_members(
+            entity=test_entity, 
+            max_members=max_members,
+            limit_per_request=limit_per_request
+            )
+        mock_get.assert_called_once_with(
+            "https://en.wikipedia.org/w/api.php",
+            params=params,
+        )
+        
+        self.assertEqual(
+            results,
+            expected_result,
+        )
+    
+    @patch("agentscope.utils.common.requests.get")
+    def test_get_infobox(self, mock_get: MagicMock) -> None:
+        """Test get_infobox with different parameters and responses"""
+        
+        # Mock responses for search query
+        mock_response_search = Mock()
+        mock_dict_search = {
+            'query': {
+                'search': [
+                    {'title': 'Test'}
+                ]
+            }
+        }
+        
+        # Mock responses for parse query
+        mock_response_parse = Mock()
+        mock_dict_parse = {
+            'parse': {
+                'title': 'Test', 
+                'pageid': 20, 
+                'text': { '*':"""
+                         <table class="infobox vevent">
+                         <tr>
+                         <th>Column1</th>
+                         <td>Data1</td>
+                         </tr>
+                         <tr>
+                         <th>Column2</th>
+                         <td>Data2</td>
+                         </tr>
+                         </table>
+                         """
+                }
+            }
+        }
+
+        expected_result = ServiceResponse(
+            status=ServiceExecStatus.SUCCESS,
+            content={
+                'Column1': 'Data1', 
+                'Column2': 'Data2'
+            }
+        )
+
+        # Set the side effect of the mock_get to return different responses in sequence
+        mock_response_search.json.return_value = mock_dict_search
+        mock_response_parse.json.return_value = mock_dict_parse
+        mock_get.side_effect = [mock_response_search, mock_response_parse]
+
+        test_entity = "Test"
+
+        results = get_infobox(entity=test_entity)
+
+        # Define expected calls
+        calls = [
+            unittest.mock.call("https://en.wikipedia.org/w/api.php", params={
+                'action': 'query',
+                'list': 'search',
+                'srsearch': test_entity,
+                'format': 'json'
+            }),
+            unittest.mock.call("https://en.wikipedia.org/w/api.php", params={
+                'action': 'parse',
+                'page': test_entity,
+                'prop': 'text',
+                'format': 'json'
+            })
+        ]
+        
+        mock_get.assert_has_calls(calls, any_order=True)
+
+        self.assertEqual(results, expected_result)
+        
+    
+    @patch("agentscope.utils.common.requests.get")
+    def test_get_page_content_by_paragraph(self, mock_get: MagicMock) -> None:
+        """Test get_page_content_by_paragraph with different parameters and responses"""
+        
+        # Mock responses for search query
+        mock_response_search = Mock()
+        mock_dict_search = {
+            'query': {
+                'search': [
+                    {'title': 'Test'}
+                ]
+            }
+        }
+        
+        # Mock responses for extract query
+        mock_response_extract = Mock()
+        mock_dict_extract = {
+            'query': {
+                'pages': {
+                    '20': {
+                        'pageid': 20,
+                        'title': 'Test',
+                        'extract': """
+                            This is the first paragraph.
+
+                            This is the second paragraph.
+
+                            == Section Header ==
+
+                            This is the third paragraph under a section header.
+                        """
+                    }
+                }
+            }
+        }
+
+        expected_result = ServiceResponse(
+            status=ServiceExecStatus.SUCCESS,
+            content=[
+                "This is the first paragraph.",
+                "This is the second paragraph."
+            ]
+        )
+
+        # Set the side effect of the mock_get to return different responses in sequence
+        mock_response_search.json.return_value = mock_dict_search
+        mock_response_extract.json.return_value = mock_dict_extract
+        mock_get.side_effect = [mock_response_search, mock_response_extract]
+
+        test_entity = "Test"
+
+        results = get_page_content_by_paragraph(entity=test_entity, max_paragraphs=2)
+
+        # Define expected calls
+        params1 = {
+            'action': 'query',
+            'list': 'search',
+            'srsearch': test_entity,
+            'format': 'json'
+        }
+        params2 = {
+            'action': 'query',
+            'prop': 'extracts',
+            'explaintext': True,
+            'titles': test_entity,
+            'format': 'json'
+        }
+
+        calls = [
+            unittest.mock.call("https://en.wikipedia.org/w/api.php", params=params1),
+            unittest.mock.call("https://en.wikipedia.org/w/api.php", params=params2)
+        ]
+        
+        mock_get.assert_has_calls(calls, any_order=True)
+
+        self.assertEqual(results, expected_result)
+            
+    @patch("agentscope.utils.common.requests.get")
+    def test_get_all_wikipedia_tables(self, mock_get: MagicMock) -> None:
+        """Test get_all_wikipedia_tables with different parameters and responses"""
+        
+        # Mock responses for search query
+        mock_response_search = Mock()
+        mock_dict_search = {
+            'query': {
+                'search': [
+                    {'title': 'Test'}
+                ]
+            }
+        }
+        
+        # Mock responses for parse query
+        mock_response_parse = Mock()
+        mock_dict_parse = {
+            'parse': {
+                'title': 'Test', 
+                'pageid': 20, 
+                'text': { '*':"""
+                         <table class="wikitable">
+                         <tr>
+                         <th>Header1</th>
+                         <th>Header2</th>
+                         </tr>
+                         <tr>
+                         <td>Row1Col1</td>
+                         <td>Row1Col2</td>
+                         </tr>
+                         <tr>
+                         <td>Row2Col1</td>
+                         <td>Row2Col2</td>
+                         </tr>
+                         </table>
+                         """
+                }
+            }
+        }
+
+        expected_result = ServiceResponse(
+            status=ServiceExecStatus.SUCCESS,
+            content=[{
+                'Header1': ['Row1Col1', 'Row2Col1'],
+                'Header2': ['Row1Col2', 'Row2Col2']
+            }]
+        )
+
+        # Set the side effect of the mock_get to return different responses in sequence
+        mock_response_search.json.return_value = mock_dict_search
+        mock_response_parse.json.return_value = mock_dict_parse
+        mock_get.side_effect = [mock_response_search, mock_response_parse]
+
+        test_entity = "Test"
+
+        results = get_all_wikipedia_tables(entity=test_entity)
+
+        # Define expected calls
+        params1 = {
+            'action': 'query',
+            'list': 'search',
+            'srsearch': test_entity,
+            'format': 'json'
+        }
+        params2 = {
+            'action': 'parse',
+            'page': test_entity,
+            'prop': 'text',
+            'format': 'json'
+        }
+
+        calls = [
+            unittest.mock.call("https://en.wikipedia.org/w/api.php", params=params1),
+            unittest.mock.call("https://en.wikipedia.org/w/api.php", params=params2)
+        ]
+        
+        mock_get.assert_has_calls(calls, any_order=True)
+
+        self.assertEqual(results, expected_result)
+        
+    
+    @patch("agentscope.utils.common.requests.get")
+    def test_get_page_images_with_captions(self, mock_get: MagicMock) -> None:
+        """Test get_page_images_with_captions with different parameters and responses"""
+        
+        # Mock responses for search query
+        mock_response_search = Mock()
+        mock_dict_search = {
+            'query': {
+                'search': [
+                    {'title': 'Test'}
+                ]
+            }
+        }
+        
+        # Mock responses for images query
+        mock_response_images = Mock()
+        mock_dict_images = {
+            'query': {
+                'pages': {
+                    '20': {
+                        'pageid': 20,
+                        'title': 'Test',
+                        'images': [
+                            {'title': 'Image1'},
+                            {'title': 'Image2'}
+                        ]
+                    }
+                }
+            }
+        }
+        
+        # Mock responses for image details query
+        mock_response_image1 = Mock()
+        mock_dict_image1 = {
+            'query': {
+                'pages': {
+                    '30': {
+                        'pageid': 30,
+                        'imageinfo': [{
+                            'url': 'http://example.com/image1.jpg',
+                            'extmetadata': {
+                                'ImageDescription': {'value': 'Caption for image 1'}
+                            }
+                        }]
+                    }
+                }
+            }
+        }
+        
+        mock_response_image2 = Mock()
+        mock_dict_image2 = {
+            'query': {
+                'pages': {
+                    '31': {
+                        'pageid': 31,
+                        'imageinfo': [{
+                            'url': 'http://example.com/image2.jpg',
+                            'extmetadata': {
+                                'ImageDescription': {'value': 'Caption for image 2'}
+                            }
+                        }]
+                    }
+                }
+            }
+        }
+
+        expected_result = ServiceResponse(
+            status=ServiceExecStatus.SUCCESS,
+            content=[
+                {
+                    'title': 'Image1',
+                    'url': 'http://example.com/image1.jpg',
+                    'caption': 'Caption for image 1'
+                },
+                {
+                    'title': 'Image2',
+                    'url': 'http://example.com/image2.jpg',
+                    'caption': 'Caption for image 2'
+                }
+            ]
+        )
+
+        # Set the side effect of the mock_get to return different responses in sequence
+        mock_response_search.json.return_value = mock_dict_search
+        mock_response_images.json.return_value = mock_dict_images
+        mock_response_image1.json.return_value = mock_dict_image1
+        mock_response_image2.json.return_value = mock_dict_image2
+        mock_get.side_effect = [mock_response_search, mock_response_images, mock_response_image1, mock_response_image2]
+
+        test_entity = "Test"
+
+        results = get_page_images_with_captions(entity=test_entity)
+
+        # Define expected calls
+        params1 = {
+            'action': 'query',
+            'list': 'search',
+            'srsearch': test_entity,
+            'format': 'json'
+        }
+        params2 = {
+            'action': 'query',
+            'prop': 'images',
+            'titles': test_entity,
+            'format': 'json'
+        }
+        params3_image1 = {
+            'action': 'query',
+            'titles': 'Image1',
+            'prop': 'imageinfo',
+            'iiprop': 'url|extmetadata',
+            'format': 'json'
+        }
+        params4_image2 = {
+            'action': 'query',
+            'titles': 'Image2',
+            'prop': 'imageinfo',
+            'iiprop': 'url|extmetadata',
+            'format': 'json'
+        }
+
+        calls = [
+            unittest.mock.call("https://en.wikipedia.org/w/api.php", params=params1),
+            unittest.mock.call("https://en.wikipedia.org/w/api.php", params=params2),
+            unittest.mock.call("https://en.wikipedia.org/w/api.php", params=params3_image1),
+            unittest.mock.call("https://en.wikipedia.org/w/api.php", params=params4_image2)
+        ]
+        
+        mock_get.assert_has_calls(calls, any_order=True)
+
+        self.assertEqual(results, expected_result)
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

From fad1ba6994067eb338cbff79336a0175534f6edb Mon Sep 17 00:00:00 2001
From: PengfeiHePower <billhe@yahoo.com>
Date: Wed, 26 Jun 2024 17:34:12 -0400
Subject: [PATCH 02/16] modify readme

---
 README.md    | 1 +
 README_ZH.md | 1 +
 2 files changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 25efd9909..337a9e950 100644
--- a/README.md
+++ b/README.md
@@ -134,6 +134,7 @@ the following libraries.
 - File Operation
 - Text Processing
 - Multi Modality
+- Wikipedia search and retrieval
 
 **Example Applications**
 
diff --git a/README_ZH.md b/README_ZH.md
index 1c13b1e55..4575c3f33 100644
--- a/README_ZH.md
+++ b/README_ZH.md
@@ -120,6 +120,7 @@ AgentScope支持使用以下库快速部署本地模型服务。
 - 文件操作
 - 文本处理
 - 多模态生成
+- 维基百科搜索
 
 **样例应用**
 

From 4150fb97b8b971b7e696e1dc4c34b781257acb57 Mon Sep 17 00:00:00 2001
From: PengfeiHePower <billhe@yahoo.com>
Date: Wed, 26 Jun 2024 17:47:42 -0400
Subject: [PATCH 03/16] modify

---
 src/agentscope/service/__init__.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/agentscope/service/__init__.py b/src/agentscope/service/__init__.py
index 3561734f8..b47627daf 100644
--- a/src/agentscope/service/__init__.py
+++ b/src/agentscope/service/__init__.py
@@ -86,6 +86,11 @@ def get_help() -> None:
     "dashscope_image_to_text",
     "dashscope_text_to_image",
     "dashscope_text_to_audio",
+    "get_category_members",
+    "get_infobox",
+    "get_page_content_by_paragraph",
+    "get_all_wikipedia_tables",
+    "get_page_images_with_captions",
     # to be deprecated
     "ServiceFactory",
 ]

From 77d04d523f6ca2bc8ee61804d56de3745929df4a Mon Sep 17 00:00:00 2001
From: PengfeiHePower <billhe@yahoo.com>
Date: Wed, 26 Jun 2024 19:10:41 -0400
Subject: [PATCH 04/16] modify

---
 src/agentscope/service/__init__.py |  18 +-
 src/agentscope/service/web/wiki.py | 630 ++++++++++++++---------------
 tests/wiki_test.py                 | 519 ++++++++++++++----------
 3 files changed, 618 insertions(+), 549 deletions(-)

diff --git a/src/agentscope/service/__init__.py b/src/agentscope/service/__init__.py
index b47627daf..0de13f91d 100644
--- a/src/agentscope/service/__init__.py
+++ b/src/agentscope/service/__init__.py
@@ -41,6 +41,14 @@
 from .web.web_digest import digest_webpage, load_web, parse_html_to_text
 from .web.download import download_from_url
 
+from .web.wiki import (
+    wiki_get_category_members,
+    wiki_get_infobox,
+    wiki_get_page_content_by_paragraph,
+    wiki_get_all_wikipedia_tables,
+    wiki_get_page_images_with_captions,
+)
+
 
 def get_help() -> None:
     """Get help message."""
@@ -86,11 +94,11 @@ def get_help() -> None:
     "dashscope_image_to_text",
     "dashscope_text_to_image",
     "dashscope_text_to_audio",
-    "get_category_members",
-    "get_infobox",
-    "get_page_content_by_paragraph",
-    "get_all_wikipedia_tables",
-    "get_page_images_with_captions",
+    "wiki_get_category_members",
+    "wiki_get_infobox",
+    "wiki_get_page_content_by_paragraph",
+    "wiki_get_all_wikipedia_tables",
+    "wiki_get_page_images_with_captions",
     # to be deprecated
     "ServiceFactory",
 ]
diff --git a/src/agentscope/service/web/wiki.py b/src/agentscope/service/web/wiki.py
index 24b6198b9..a138c36c4 100644
--- a/src/agentscope/service/web/wiki.py
+++ b/src/agentscope/service/web/wiki.py
@@ -1,8 +1,12 @@
-"""Search contents from WikiPedia, including texts, categories, infotable, table,..."""
+# -*- coding: utf-8 -*-
+"""
+Search contents from WikiPedia,
+including texts, categories, infotable, table,...
+"""
+
+import re
 import requests
-import json
 from bs4 import BeautifulSoup
-import re
 
 from agentscope.service.service_response import (
     ServiceResponse,
@@ -10,59 +14,102 @@
 )
 from agentscope.utils.common import requests_get
 
-def get_category_members(
-    entity: str, 
-    max_members: int=1000, 
-    limit_per_request: int=500
-    ) -> ServiceResponse:
+
+def _check_entity_existence(entity: str) -> ServiceResponse:
+    url = "https://en.wikipedia.org/w/api.php"
+    search_params = {
+        "action": "query",
+        "list": "search",
+        "srsearch": entity,
+        "format": "json",
+    }
+
+    search_data = requests_get(url, params=search_params)
+
+    if "query" in search_data and search_data["query"]["search"]:
+        exact_match = None
+        for result in search_data["query"]["search"]:
+            if result["title"].lower() == entity.lower():
+                exact_match = result["title"]
+                break
+        if not exact_match:
+            similar_entities = [
+                result["title"]
+                for result in search_data["query"]["search"][:5]
+            ]
+            return ServiceResponse(
+                ServiceExecStatus.ERROR,
+                {"similar_entities": similar_entities},
+            )
+        return ServiceResponse(
+            ServiceExecStatus.SUCCESS,
+            {"entity": exact_match},
+        )
+    else:
+        return ServiceResponse(
+            ServiceExecStatus.ERROR,
+            {"error": "Entity not found"},
+        )
+
+
+def wiki_get_category_members(
+    entity: str,
+    max_members: int = 1000,
+    limit_per_request: int = 500,
+) -> ServiceResponse:
     """Function to retrieve category members from Wikipedia:Category pages
-    
+
     Args:
-        entity (str): searching keywords 
+        entity (str): searching keywords
         max_members (int): maximum number of members to output
         limit_per_request (int): number of members retrieved per quest
-    
+
     Returns:
         `ServiceResponse`: A dictionary containing `status` and `content`.
         The `status` attribute is from the ServiceExecStatus enum,
         indicating success or error.
-        If the entity does not exist, `status`=ERROR and return top-5 similar entities in `content`.
-        If the entity exists, `status`=SUCCESS, and return `content` as a list of dicts.
+        If the entity does not exist, `status`=ERROR
+        and return top-5 similar entities in `content`.
+        If the entity exists, `status`=SUCCESS,
+        and return `content` as a list of dicts.
         Keys of each dict:
-            
+
             "pageid": unique page ID for the member
-            
-            "ns": namespace for the member, indicating if the corresponding page is Article/User/... See https://en.wikipedia.org/wiki/Wikipedia:Namespace for details.
-            
+
+            "ns": namespace for the member,
+                indicating if the corresponding page is Article/User/...
+
             "title": title of the member
-            
+
         Example:
-        
+
         .. code-block:: python
-            members = get_category_members("Machine_learning", max_members=10)
+            members = wiki_get_category_members(
+                "Machine_learning",
+                max_members=10
+                )
             print(members)
-        
+
         It returns contents:
 
         .. code-block:: python
             {
                 'status': <ServiceExecStatus.SUCCESS: 1>,
-                'content': [{'pageid': 67911196, 'ns': 0, 'title': 'Bayesian learning mechanisms'},
-                            {'pageid': 233488, 'ns': 0, 'title': 'Machine learning'},
-                            {'pageid': 53587467, 'ns': 0, 'title': 'Outline of machine learning'},
-                            {'pageid': 64439717, 'ns': 0, 'title': '80 Million Tiny Images'},
-                            {'pageid': 75530149, 'ns': 0, 'title': 'Accelerated Linear Algebra'}]
-            
+                'content': [
+                {'pageid': 67911196, 'ns': 0, 'title': 'Bayesian learning mechanisms'},
+                {'pageid': 233488, 'ns': 0, 'title': 'Machine learning'},
+                ...]
+
             }
-    
+
     """
     url = "https://en.wikipedia.org/w/api.php"
     params = {
-        'action': 'query',
-        'list': 'categorymembers',
-        'cmtitle': f'Category:{entity}',
-        'cmlimit': limit_per_request,  # Maximum number of results per request
-        'format': 'json'
+        "action": "query",
+        "list": "categorymembers",
+        "cmtitle": f"Category:{entity}",
+        "cmlimit": limit_per_request,  # Maximum number of results per request
+        "format": "json",
     }
 
     members = []
@@ -70,414 +117,357 @@ def get_category_members(
 
     while total_fetched < max_members:
         data = requests_get(url, params=params)
-        batch_members = data['query']['categorymembers']
+        batch_members = data["query"]["categorymembers"]
         members.extend(batch_members)
         total_fetched += len(batch_members)
 
         # Check if there is a continuation token
-        if 'continue' in data and total_fetched < max_members:
-            params['cmcontinue'] = data['continue']['cmcontinue']
+        if "continue" in data and total_fetched < max_members:
+            params["cmcontinue"] = data["continue"]["cmcontinue"]
         else:
             break
-    
+
     # If more members were fetched than max_members, trim the list
     if len(members) > max_members:
         members = members[:max_members]
-    
+
     if len(members) > 0:
         return ServiceResponse(ServiceExecStatus.SUCCESS, members)
     else:
         return ServiceResponse(ServiceExecStatus.ERROR, members)
-    
 
-def get_infobox(
-    entity: str
-    ) -> ServiceResponse:
+
+def wiki_get_infobox(
+    entity: str,
+) -> ServiceResponse:
     """
     Function to retrieve InfoBox from the WikiPedia page
-    
+
     Args:
-        entity (str): searching keywords 
-    
+        entity (str): searching keywords
+
     Returns:
         `ServiceResponse`: A dictionary containing `status` and `content`.
         The `status` attribute is from the ServiceExecStatus enum,
         indicating success or error.
-        If the entity does not exist, `status`=ERROR and return top-5 similar entities in `content`.
-        If the entity exists, `status`=SUCCESS, and return `content` as a dict containing information in the InfoBox.
-        
+        If the entity does not exist, `status`=ERROR,
+        and return top-5 similar entities in `content`.
+        If the entity exists, `status`=SUCCESS,
+        and return `content` as a dict containing information in the InfoBox.
+
     Example:
-    
+
     .. code-block:: python
-        infobox_data = get_infobox(entity="Python (programming language)")
+        infobox_data = wiki_get_infobox(entity="Python (programming language)")
         print(infobox_data)
-        
+
     It returns content:
-    
+
     .. code-block:: python
     {
         'status': <ServiceExecStatus.SUCCESS: 1>,
-        'content':  {'Paradigm': 'Multi-paradigm : object-oriented , [1] procedural ( imperative ), functional , structured , reflective', 
-                    'Designed\xa0by': 'Guido van Rossum', 
-                    'Developer': 'Python Software Foundation', 
-                    'First\xa0appeared': '20\xa0February 1991 ; 33 years ago ( 1991-02-20 ) [2]', 
-                    'Stable release': '3.12.4 / 6 June 2024 ; 14 days ago ( 6 June 2024 )', 
-                    'Typing discipline': 'duck , dynamic , strong ; [3] optional type annotations (since 3.5, but those hints are ignored, except with unofficial tools) [4]', 
-                    'OS': 'Tier 1 : 64-bit Linux , macOS ; 64- and 32-bit Windows 10+ [5] Tier 2 : E.g. 32-bit WebAssembly (WASI) Tier 3 : 64-bit FreeBSD , iOS ; e.g. Raspberry Pi OS Unofficial (or has been known to work): Other Unix-like / BSD variants and e.g. Android 5.0+ (official from Python 3.13 planned [6] ) and a few other platforms [7] [8] [9]', 
-                    'License': 'Python Software Foundation License', 
-                    'Filename extensions': '.py, .pyw, .pyz, [10] .pyi, .pyc, .pyd', 
+        'content':  {'Paradigm': 'Multi-paradigm : object-oriented ...',
+                    'Designed\xa0by': 'Guido van Rossum',
+                    'Developer': 'Python Software Foundation',
+                    'First\xa0appeared': '20\xa0February 1991 ...',
+                    'Stable release': '3.12.4 / 6 June 2024 ; ...',
+                    'Typing discipline': 'duck , dynamic , strong ; ...',
+                    'OS': 'Tier 1 : 64-bit Linux , macOS ; 。。。',
+                    'License': 'Python Software Foundation License',
+                    'Filename extensions': '.py, .pyw, .pyz, [10] .pyi, ...',
                     'Website': 'python.org'}
     }
     """
-    
+
+    existence_response = _check_entity_existence(entity)
+    if existence_response.status == ServiceExecStatus.ERROR:
+        return existence_response
+
     url = "https://en.wikipedia.org/w/api.php"
-    
-    # Step 1: Check if the entity exists
-    search_params = {
-        'action': 'query',
-        'list': 'search',
-        'srsearch': entity,
-        'format': 'json'
+    parse_params = {
+        "action": "parse",
+        "page": entity,
+        "prop": "text",
+        "format": "json",
     }
-    
-    search_data = requests_get(url, params=search_params)
-    
-    if 'query' in search_data and search_data['query']['search']:
-        # Check if the exact title exists
-        exact_match = None
-        for result in search_data['query']['search']:
-            if result['title'].lower() == entity.lower():
-                exact_match = result['title']
-                break
-        if not exact_match:
-            similar_entities = [result['title'] for result in search_data['query']['search'][:5]]
-            return ServiceResponse(ServiceExecStatus.ERROR, f"Entity not found. Here are similar entities:{similar_entities}")
 
-        entity = exact_match
+    parse_data = requests_get(url, params=parse_params)
 
-        # Step 2: Fetch the infobox content if the entity exists
-        parse_params = {
-            'action': 'parse',
-            'page': entity,
-            'prop': 'text',
-            'format': 'json'
-        }
-        
-        parse_data = requests_get(url, params=parse_params)
-        
-        if 'parse' in parse_data:
-            raw_html = parse_data['parse']['text']['*']
-            soup = BeautifulSoup(raw_html, 'html.parser')
-            infobox = soup.find('table', {'class': 'infobox'})
-            
-            if not infobox:
-                return ServiceResponse(ServiceExecStatus.ERROR, None)
-
-            infobox_data = {}
-            for row in infobox.find_all('tr'):
-                header = row.find('th')
-                value = row.find('td')
-                if header and value:
-                    key = header.get_text(" ", strip=True)
-                    val = value.get_text(" ", strip=True)
-                    infobox_data[key] = val
-
-            return ServiceResponse(ServiceExecStatus.SUCCESS, infobox_data)
-        else:
-            error_message = parse_data.get('error', {}).get('info', 'Unknown error occurred')
-            return ServiceResponse(ServiceExecStatus.ERROR, {"error": error_message})
-    else:
-        return ServiceResponse(ServiceExecStatus.ERROR, {"error": "Entity not found"})
+    if "parse" in parse_data:
+        raw_html = parse_data["parse"]["text"]["*"]
+        soup = BeautifulSoup(raw_html, "html.parser")
+        infobox = soup.find("table", {"class": "infobox"})
 
+        if not infobox:
+            return ServiceResponse(ServiceExecStatus.ERROR, None)
 
-def get_page_content_by_paragraph(
-    entity: str, 
-    max_paragraphs: int=1
-    ) -> ServiceResponse:
+        infobox_data = {}
+        for row in infobox.find_all("tr"):
+            header = row.find("th")
+            value = row.find("td")
+            if header and value:
+                key = header.get_text(" ", strip=True)
+                val = value.get_text(" ", strip=True)
+                infobox_data[key] = val
+
+        return ServiceResponse(ServiceExecStatus.SUCCESS, infobox_data)
+    else:
+        error_message = parse_data.get("error", {}).get(
+            "info",
+            "Unknown error occurred",
+        )
+        return ServiceResponse(
+            ServiceExecStatus.ERROR,
+            {"error": error_message},
+        )
+
+
+def wiki_get_page_content_by_paragraph(
+    entity: str,
+    max_paragraphs: int = 1,
+) -> ServiceResponse:
     """
     Retrieve content from a Wikipedia page and split it into paragraphs,
     excluding section headers.
 
     Args:
         entity (str): search word.
-        max_paragraphs (int, optional): The maximum number of paragraphs to retrieve. Default is None (retrieve all paragraphs).
+        max_paragraphs (int, optional):
+            The maximum number of paragraphs to retrieve.
+            Default is None (retrieve all paragraphs).
 
     Returns:
         `ServiceResponse`: A dictionary containing `status` and `content`.
         The `status` attribute is from the ServiceExecStatus enum,
         indicating success or error.
-        If the entity does not exist, `status`=ERROR and return top-5 similar entities in `content`.
-        If the entity exists, `status`=SUCCESS, and return `content` as a list of paragraphs from the Wikipedia page.
-        
+        If the entity does not exist, `status`=ERROR,
+        and return top-5 similar entities in `content`.
+        If the entity exists, `status`=SUCCESS,
+        and return `content` as a list of paragraphs from the Wikipedia page.
+
     Example:
-    
+
         .. code-block:: python
-            wiki_paragraph = get_page_content_by_paragraph(entity="Python (programming language)", max_paragraphs=1)
+            wiki_paragraph = wiki_get_page_content_by_paragraph(
+                entity="Python (programming language)",
+                max_paragraphs=1)
             print(wiki_paragraph)
-        
+
         It will return content:
         .. code-block:: python
             {
-                'status': <ServiceExecStatus.SUCCESS: 1>, 
-                'content': ['Python is a high-level, general-purpose programming...']
+                'status': <ServiceExecStatus.SUCCESS: 1>,
+                'content': ['Python is a high-level...']
             }
 
     """
+    existence_response = _check_entity_existence(entity)
+    if existence_response.status == ServiceExecStatus.ERROR:
+        return existence_response
+
     url = "https://en.wikipedia.org/w/api.php"
-    
-    # Step 1: Check if the entity exists
-    search_params = {
-        'action': 'query',
-        'list': 'search',
-        'srsearch': entity,
-        'format': 'json'
+    params = {
+        "action": "query",
+        "prop": "extracts",
+        "explaintext": True,
+        "titles": entity,
+        "format": "json",
     }
-    
-    search_data = requests_get(url, params=search_params)
-    
-    if 'query' in search_data and search_data['query']['search']:
-        # Check if the exact title exists
-        exact_match = None
-        for result in search_data['query']['search']:
-            if result['title'].lower() == entity.lower():
-                exact_match = result['title']
-                break
-        if not exact_match:
-            similar_entities = [result['title'] for result in search_data['query']['search'][:5]]
-            return ServiceResponse(ServiceExecStatus.ERROR, f"Entity not found. Here are similar entities: {similar_entities}")
 
-        entity = exact_match
+    data = requests_get(url, params=params)
+    page = next(iter(data["query"]["pages"].values()))
+    content = page.get("extract", "No content found.")
+    if content == "No content found.":
+        return ServiceResponse(ServiceExecStatus.ERROR, content)
 
-        # Step 2: Fetch the page content if the entity exists
-        params = {
-            'action': 'query',
-            'prop': 'extracts',
-            'explaintext': True,
-            'titles': entity,
-            'format': 'json'
-        }
+    # Split content into paragraphs and filter out headers
+    paragraphs = [
+        para.strip()
+        for para in content.split("\n\n")
+        if not re.match(r"^\s*==.*==\s*$", para) and para.strip() != ""
+    ]
+
+    # Return the specified number of paragraphs
+    if max_paragraphs:
+        paragraphs = paragraphs[:max_paragraphs]
+
+    return ServiceResponse(ServiceExecStatus.SUCCESS, paragraphs)
 
-        data = requests_get(url, params=params)
-        page = next(iter(data['query']['pages'].values()))
-        content = page.get('extract', 'No content found.')
-        if content == 'No content found.':
-            return ServiceResponse(ServiceExecStatus.ERROR, content)
-        
-        # Split content into paragraphs and filter out headers
-        paragraphs = [para.strip() for para in content.split('\n\n') if not re.match(r'^\s*==.*==\s*$', para) and para.strip() != '']
-        
-        # Return the specified number of paragraphs
-        if max_paragraphs:
-            paragraphs = paragraphs[:max_paragraphs]
-        
-        return ServiceResponse(ServiceExecStatus.SUCCESS, paragraphs)
-    else:
-        return ServiceResponse(ServiceExecStatus.ERROR, {"error": "Entity not found"})
 
-def get_all_wikipedia_tables(
-    entity: str
-    ) -> ServiceResponse:
+def wiki_get_all_wikipedia_tables(
+    entity: str,
+) -> ServiceResponse:
     """
     Retrieve tables on the Wikipedia page
-    
+
     Args:
         entity (str): search word.
-        
+
     Returns:
         `ServiceResponse`: A dictionary containing `status` and `content`.
         The `status` attribute is from the ServiceExecStatus enum,
         indicating success or error.
-        If the entity does not exist, `status`=ERROR and return top-5 similar entities in `content`.
-        If the entity exists, `status`=SUCCESS, and return `content` as a list of tables from the Wikipedia page.
-        Each table is presented as a dict, where key is the column name and value is the values for each column.
-        
+        If the entity does not exist, `status`=ERROR,
+        and return top-5 similar entities in `content`.
+        If the entity exists, `status`=SUCCESS,
+        and return `content` as a list of tables from the Wikipedia page.
+        Each table is presented as a dict,
+        where key is the column name and value is the values for each column.
+
     Example:
-    
+
         .. code-block:: python
-            wiki_table = get_all_wikipedia_tables(entity="Python (programming language)")
+            wiki_table = wiki_get_all_wikipedia_tables(
+                entity="Python (programming language)"
+                )
             print(wiki_table)
-        
+
         It will return content:
         .. code-block:: python
             {
                 'status': <ServiceExecStatus.SUCCESS: 1>,
                 'content': [
                             {
-                                'Type': ['bool','bytearray','bytes','complex',...],
-                                'Mutability': ['immutable','mutable','immutable','immutable',...],
+                                'Type': ['bool','bytearray',...],
+                                'Mutability': ['immutable','mutable',...],
                                 ...
                             }
                            ]
             }
 
     """
+    existence_response = _check_entity_existence(entity)
+    if existence_response.status == ServiceExecStatus.ERROR:
+        return existence_response
+
     url = "https://en.wikipedia.org/w/api.php"
-    
-    # Step 1: Check if the entity exists
-    search_params = {
-        'action': 'query',
-        'list': 'search',
-        'srsearch': entity,
-        'format': 'json'
+    params = {
+        "action": "parse",
+        "page": entity,
+        "prop": "text",
+        "format": "json",
     }
-    
-    search_response = requests_get(url, params=search_params)
-    search_data = search_response
-    
-    if 'query' in search_data and search_data['query']['search']:
-        # Check if the exact title exists
-        exact_match = None
-        for result in search_data['query']['search']:
-            if result['title'].lower() == entity.lower():
-                exact_match = result['title']
-                break
-        if not exact_match:
-            similar_entities = [result['title'] for result in search_data['query']['search'][:5]]
-            return ServiceResponse(ServiceExecStatus.ERROR, f"Entity not found. Here are similar entities:{similar_entities}")
 
-        entity = exact_match
+    data = requests_get(url, params=params)
+    raw_html = data["parse"]["text"]["*"]
 
-        # Step 2: Fetch the page content if the entity exists
-        params = {
-            'action': 'parse',
-            'page': entity,
-            'prop': 'text',
-            'format': 'json'
-        }
+    soup = BeautifulSoup(raw_html, "html.parser")
+    tables = soup.find_all("table", {"class": "wikitable"})
 
-        data = requests_get(url, params=params)
-        raw_html = data['parse']['text']['*']
+    if not tables:
+        return ServiceResponse(ServiceExecStatus.ERROR, None)
 
-        soup = BeautifulSoup(raw_html, 'html.parser')
-        tables = soup.find_all('table', {'class': 'wikitable'})
-        
-        if not tables:
-            return ServiceResponse(ServiceExecStatus.ERROR, None)
+    all_tables_data = []
+    for table_index, table in enumerate(tables):
+        headers = [
+            header.get_text(strip=True) for header in table.find_all("th")
+        ]
+        table_dict = {header: [] for header in headers}
 
-        all_tables_data = []
-        for table_index, table in enumerate(tables):
-            headers = [header.get_text(strip=True) for header in table.find_all('th')]
-            table_dict = {header: [] for header in headers}
+        for row in table.find_all("tr")[1:]:  # Skip the header row
+            cells = row.find_all(["td", "th"])
+            if len(cells) == len(
+                headers,
+            ):  # Ensure the row has the correct number of cells
+                for i, cell in enumerate(cells):
+                    table_dict[headers[i]].append(
+                        cell.get_text(strip=True),
+                    )
 
-            for row in table.find_all('tr')[1:]:  # Skip the header row
-                cells = row.find_all(['td', 'th'])
-                if len(cells) == len(headers):  # Ensure the row has the correct number of cells
-                    for i, cell in enumerate(cells):
-                        table_dict[headers[i]].append(cell.get_text(strip=True))
-            
-            all_tables_data.append(table_dict)
+        all_tables_data.append(table_dict)
 
-        return ServiceResponse(ServiceExecStatus.SUCCESS, all_tables_data)
-    else:
-        return ServiceResponse(ServiceExecStatus.ERROR, {"error": "Entity not found"})
+    return ServiceResponse(ServiceExecStatus.SUCCESS, all_tables_data)
 
 
-def get_page_images_with_captions(
-    entity: str
-    ) -> ServiceResponse:
+def wiki_get_page_images_with_captions(
+    entity: str,
+) -> ServiceResponse:
     """
     Function to retrive images and details on the Wikipedia page
-    
+
     Args:
         entity (str): search word.
-        
+
     Returns:
         `ServiceResponse`: A dictionary containing `status` and `content`.
         The `status` attribute is from the ServiceExecStatus enum,
         indicating success or error.
-        If the entity does not exist, `status`=ERROR and return top-5 similar entities in `content`.
-        If the entity exists, `status`=SUCCESS and return the `content` as a list of dict from the Wikipedia page.
-        
+        If the entity does not exist, `status`=ERROR,
+        and return top-5 similar entities in `content`.
+        If the entity exists, `status`=SUCCESS,
+        and return the `content` as a list of dict from the Wikipedia page.
+
         Each dict has:
         'title': title of the image
         'url': link to the image
         'caption': caption of the image
-    
+
     Example:
         .. code-block:: python
-            wiki_images = get_page_images_with_captions(entity="Python (programming language)")
+            wiki_images = wiki_get_page_images_with_captions(
+                entity="Python (programming language)"
+                )
             print(wiki_images)
-        
+
         It will return:
-        
+
         .. code-block:: python
             {
                 'status': <ServiceExecStatus.SUCCESS: 1>,
                 'content': [{
-                            'title': 'File:Commons-logo.svg',
-                            'url': 'https://upload.wikimedia.org/wikipedia/en/4/4a/Commons-logo.svg',
-                            'caption': 'The Wikimedia Commons logo, SVG version.'},
-                            ...
+                'title': 'File:Commons-logo.svg',
+                'url': 'https://upload.wikimedia.org...',
+                'caption': 'The Wikimedia Commons logo,...'},
+                ...
                             ]
             }
     """
-    
-    url = "https://en.wikipedia.org/w/api.php"
-    
-    # Step 1: Check if the entity exists
-    search_params = {
-        'action': 'query',
-        'list': 'search',
-        'srsearch': entity,
-        'format': 'json'
-    }
-    
-    search_response = requests_get(url, params=search_params)
-    search_data = search_response
-    
-    if 'query' in search_data and search_data['query']['search']:
-        # Check if the exact title exists
-        exact_match = None
-        for result in search_data['query']['search']:
-            if result['title'].lower() == entity.lower():
-                exact_match = result['title']
-                break
-        if not exact_match:
-            similar_entities = [result['title'] for result in search_data['query']['search'][:5]]
-            return ServiceResponse(ServiceExecStatus.ERROR, {"similar_entities": similar_entities})
 
-        entity = exact_match
+    existence_response = _check_entity_existence(entity)
+    if existence_response.status == ServiceExecStatus.ERROR:
+        return existence_response
+
+    url = "https://en.wikipedia.org/w/api.php"
 
-        # Step 2: Get the list of images
+    params = {
+        "action": "query",
+        "prop": "images",
+        "titles": entity,
+        "format": "json",
+    }
+    data = requests_get(url, params=params)
+    page = next(iter(data["query"]["pages"].values()))
+    images = page.get("images", [])
+    if len(images) == 0:
+        return ServiceResponse(ServiceExecStatus.ERROR, None)
+
+    image_details = []
+    for image in images:
+        image_title = image["title"]
         params = {
-            'action': 'query',
-            'prop': 'images',
-            'titles': entity,
-            'format': 'json'
+            "action": "query",
+            "titles": image_title,
+            "prop": "imageinfo",
+            "iiprop": "url|extmetadata",
+            "format": "json",
         }
-        data = requests_get(url, params=params)
-        page = next(iter(data['query']['pages'].values()))
-        images = page.get('images', [])
-        if len(images) == 0:
-            return ServiceResponse(ServiceExecStatus.ERROR, None)
-
-        # Step 3: Get details for each image
-        image_details = []
-        for image in images:
-            image_title = image['title']
-            params = {
-                'action': 'query',
-                'titles': image_title,
-                'prop': 'imageinfo',
-                'iiprop': 'url|extmetadata',
-                'format': 'json'
-            }
-            response = requests.get(url, params=params)
-            data = response.json()
-            image_page = next(iter(data['query']['pages'].values()))
-            if 'imageinfo' in image_page:
-                image_info = image_page['imageinfo'][0]
-                image_url = image_info.get('url', '')
-                extmetadata = image_info.get('extmetadata', {})
-                caption = extmetadata.get('ImageDescription', {}).get('value', 'No caption available')
-                image_details.append({
-                    'title': image_title,
-                    'url': image_url,
-                    'caption': caption
-                })
-
-        return ServiceResponse(ServiceExecStatus.SUCCESS, image_details)
-    else:
-        return ServiceResponse(ServiceExecStatus.ERROR, {"error": "Entity not found"})
-
-
+        response = requests.get(url, params=params)
+        data = response.json()
+        image_page = next(iter(data["query"]["pages"].values()))
+        if "imageinfo" in image_page:
+            image_info = image_page["imageinfo"][0]
+            image_url = image_info.get("url", "")
+            extmetadata = image_info.get("extmetadata", {})
+            caption = extmetadata.get("ImageDescription", {}).get(
+                "value",
+                "No caption available",
+            )
+            image_details.append(
+                {
+                    "title": image_title,
+                    "url": image_url,
+                    "caption": caption,
+                },
+            )
+
+    return ServiceResponse(ServiceExecStatus.SUCCESS, image_details)
diff --git a/tests/wiki_test.py b/tests/wiki_test.py
index 9481d45c2..1279e436f 100644
--- a/tests/wiki_test.py
+++ b/tests/wiki_test.py
@@ -1,88 +1,106 @@
+# -*- coding: utf-8 -*-
 """Wiki retriever test."""
 import unittest
 from unittest.mock import Mock, patch, MagicMock
 
 from agentscope.service import ServiceResponse
-from agentscope.service import get_category_members, get_infobox, get_page_content_by_paragraph, get_all_wikipedia_tables, get_page_images_with_captions
+from agentscope.service import (
+    wiki_get_category_members,
+    wiki_get_infobox,
+    wiki_get_page_content_by_paragraph,
+    wiki_get_all_wikipedia_tables,
+    wiki_get_page_images_with_captions,
+)
 from agentscope.service.service_status import ServiceExecStatus
 
+
 class TestWiki(unittest.TestCase):
     """ExampleTest for a unit test."""
-    
+
     @patch("agentscope.utils.common.requests.get")
-    def test_get_category_members(self, mock_get: MagicMock) -> None:
+    def test_wiki_get_category_members(
+        self,
+        mock_get: MagicMock,
+    ) -> None:
         """Test test_get_category_members"""
         mock_response = Mock()
         mock_dict = {
-            'query': {
-                'categorymembers': [{
-                    'pageid': 20, 
-                    'ns': 0, 
-                    'title': 'This is a test'
-                    }]
-                }
-            }
-        
+            "query": {
+                "categorymembers": [
+                    {
+                        "pageid": 20,
+                        "ns": 0,
+                        "title": "This is a test",
+                    },
+                ],
+            },
+        }
+
         expected_result = ServiceResponse(
             status=ServiceExecStatus.SUCCESS,
             content=[
-                {'pageid': 20, 
-                 'ns': 0, 
-                 'title': 'This is a test'
-                 }
-                ]
+                {
+                    "pageid": 20,
+                    "ns": 0,
+                    "title": "This is a test",
+                },
+            ],
         )
-        
+
         mock_response.json.return_value = mock_dict
         mock_get.return_value = mock_response
-        
+
         test_entity = "Test"
-        max_members=1
-        limit_per_request=100
+        max_members = 1
+        limit_per_request = 100
         params = {
-        'action': 'query',
-        'list': 'categorymembers',
-        'cmtitle': f'Category:{test_entity}',
-        'cmlimit': limit_per_request,  # Maximum number of results per request
-        'format': 'json'
-    }
-        
-        results = get_category_members(
-            entity=test_entity, 
+            "action": "query",
+            "list": "categorymembers",
+            "cmtitle": f"Category:{test_entity}",
+            "cmlimit": limit_per_request,
+            "format": "json",
+        }
+
+        results = wiki_get_category_members(
+            entity=test_entity,
             max_members=max_members,
-            limit_per_request=limit_per_request
-            )
+            limit_per_request=limit_per_request,
+        )
         mock_get.assert_called_once_with(
             "https://en.wikipedia.org/w/api.php",
             params=params,
         )
-        
+
         self.assertEqual(
             results,
             expected_result,
         )
-    
+
     @patch("agentscope.utils.common.requests.get")
-    def test_get_infobox(self, mock_get: MagicMock) -> None:
+    def test_wiki_get_infobox(
+        self,
+        mock_get: MagicMock,
+    ) -> None:
         """Test get_infobox with different parameters and responses"""
-        
+
         # Mock responses for search query
         mock_response_search = Mock()
         mock_dict_search = {
-            'query': {
-                'search': [
-                    {'title': 'Test'}
-                ]
-            }
+            "query": {
+                "search": [
+                    {"title": "Test"},
+                ],
+            },
         }
-        
+
         # Mock responses for parse query
         mock_response_parse = Mock()
         mock_dict_parse = {
-            'parse': {
-                'title': 'Test', 
-                'pageid': 20, 
-                'text': { '*':"""
+            "parse": {
+                "title": "Test",
+                "pageid": 20,
+                "text": {
+                    "*": """
                          <table class="infobox vevent">
                          <tr>
                          <th>Column1</th>
@@ -93,72 +111,79 @@ def test_get_infobox(self, mock_get: MagicMock) -> None:
                          <td>Data2</td>
                          </tr>
                          </table>
-                         """
-                }
-            }
+                         """,
+                },
+            },
         }
 
         expected_result = ServiceResponse(
             status=ServiceExecStatus.SUCCESS,
             content={
-                'Column1': 'Data1', 
-                'Column2': 'Data2'
-            }
+                "Column1": "Data1",
+                "Column2": "Data2",
+            },
         )
 
-        # Set the side effect of the mock_get to return different responses in sequence
         mock_response_search.json.return_value = mock_dict_search
         mock_response_parse.json.return_value = mock_dict_parse
         mock_get.side_effect = [mock_response_search, mock_response_parse]
 
         test_entity = "Test"
 
-        results = get_infobox(entity=test_entity)
+        results = wiki_get_infobox(entity=test_entity)
 
         # Define expected calls
         calls = [
-            unittest.mock.call("https://en.wikipedia.org/w/api.php", params={
-                'action': 'query',
-                'list': 'search',
-                'srsearch': test_entity,
-                'format': 'json'
-            }),
-            unittest.mock.call("https://en.wikipedia.org/w/api.php", params={
-                'action': 'parse',
-                'page': test_entity,
-                'prop': 'text',
-                'format': 'json'
-            })
+            unittest.mock.call(
+                "https://en.wikipedia.org/w/api.php",
+                params={
+                    "action": "query",
+                    "list": "search",
+                    "srsearch": test_entity,
+                    "format": "json",
+                },
+            ),
+            unittest.mock.call(
+                "https://en.wikipedia.org/w/api.php",
+                params={
+                    "action": "parse",
+                    "page": test_entity,
+                    "prop": "text",
+                    "format": "json",
+                },
+            ),
         ]
-        
+
         mock_get.assert_has_calls(calls, any_order=True)
 
         self.assertEqual(results, expected_result)
-        
-    
+
     @patch("agentscope.utils.common.requests.get")
-    def test_get_page_content_by_paragraph(self, mock_get: MagicMock) -> None:
-        """Test get_page_content_by_paragraph with different parameters and responses"""
-        
+    def test_wiki_get_page_content_by_paragraph(
+        self,
+        mock_get: MagicMock,
+    ) -> None:
+        """Test get_page_content_by_paragraph"""
+
         # Mock responses for search query
         mock_response_search = Mock()
         mock_dict_search = {
-            'query': {
-                'search': [
-                    {'title': 'Test'}
-                ]
-            }
+            "query": {
+                "search": [
+                    {"title": "Test"},
+                ],
+            },
         }
-        
+
         # Mock responses for extract query
         mock_response_extract = Mock()
         mock_dict_extract = {
-            'query': {
-                'pages': {
-                    '20': {
-                        'pageid': 20,
-                        'title': 'Test',
-                        'extract': """
+            "query": {
+                "pages": {
+                    "20": {
+                        "pageid": 20,
+                        "title": "Test",
+                        "extract": """
                             This is the first paragraph.
 
                             This is the second paragraph.
@@ -166,74 +191,86 @@ def test_get_page_content_by_paragraph(self, mock_get: MagicMock) -> None:
                             == Section Header ==
 
                             This is the third paragraph under a section header.
-                        """
-                    }
-                }
-            }
+                        """,
+                    },
+                },
+            },
         }
 
         expected_result = ServiceResponse(
             status=ServiceExecStatus.SUCCESS,
             content=[
                 "This is the first paragraph.",
-                "This is the second paragraph."
-            ]
+                "This is the second paragraph.",
+            ],
         )
 
-        # Set the side effect of the mock_get to return different responses in sequence
         mock_response_search.json.return_value = mock_dict_search
         mock_response_extract.json.return_value = mock_dict_extract
         mock_get.side_effect = [mock_response_search, mock_response_extract]
 
         test_entity = "Test"
 
-        results = get_page_content_by_paragraph(entity=test_entity, max_paragraphs=2)
+        results = wiki_get_page_content_by_paragraph(
+            entity=test_entity,
+            max_paragraphs=2,
+        )
 
         # Define expected calls
         params1 = {
-            'action': 'query',
-            'list': 'search',
-            'srsearch': test_entity,
-            'format': 'json'
+            "action": "query",
+            "list": "search",
+            "srsearch": test_entity,
+            "format": "json",
         }
         params2 = {
-            'action': 'query',
-            'prop': 'extracts',
-            'explaintext': True,
-            'titles': test_entity,
-            'format': 'json'
+            "action": "query",
+            "prop": "extracts",
+            "explaintext": True,
+            "titles": test_entity,
+            "format": "json",
         }
 
         calls = [
-            unittest.mock.call("https://en.wikipedia.org/w/api.php", params=params1),
-            unittest.mock.call("https://en.wikipedia.org/w/api.php", params=params2)
+            unittest.mock.call(
+                "https://en.wikipedia.org/w/api.php",
+                params=params1,
+            ),
+            unittest.mock.call(
+                "https://en.wikipedia.org/w/api.php",
+                params=params2,
+            ),
         ]
-        
+
         mock_get.assert_has_calls(calls, any_order=True)
 
         self.assertEqual(results, expected_result)
-            
+
     @patch("agentscope.utils.common.requests.get")
-    def test_get_all_wikipedia_tables(self, mock_get: MagicMock) -> None:
-        """Test get_all_wikipedia_tables with different parameters and responses"""
-        
+    def test_wiki_get_all_wikipedia_tables(
+        self,
+        mock_get: MagicMock,
+    ) -> None:
+        """Test get_all_wikipedia_tables"""
+
         # Mock responses for search query
         mock_response_search = Mock()
         mock_dict_search = {
-            'query': {
-                'search': [
-                    {'title': 'Test'}
-                ]
-            }
+            "query": {
+                "search": [
+                    {"title": "Test"},
+                ],
+            },
         }
-        
+
         # Mock responses for parse query
         mock_response_parse = Mock()
         mock_dict_parse = {
-            'parse': {
-                'title': 'Test', 
-                'pageid': 20, 
-                'text': { '*':"""
+            "parse": {
+                "title": "Test",
+                "pageid": 20,
+                "text": {
+                    "*": """
                          <table class="wikitable">
                          <tr>
                          <th>Header1</th>
@@ -248,183 +285,217 @@ def test_get_all_wikipedia_tables(self, mock_get: MagicMock) -> None:
                          <td>Row2Col2</td>
                          </tr>
                          </table>
-                         """
-                }
-            }
+                         """,
+                },
+            },
         }
 
         expected_result = ServiceResponse(
             status=ServiceExecStatus.SUCCESS,
-            content=[{
-                'Header1': ['Row1Col1', 'Row2Col1'],
-                'Header2': ['Row1Col2', 'Row2Col2']
-            }]
+            content=[
+                {
+                    "Header1": ["Row1Col1", "Row2Col1"],
+                    "Header2": ["Row1Col2", "Row2Col2"],
+                },
+            ],
         )
 
-        # Set the side effect of the mock_get to return different responses in sequence
         mock_response_search.json.return_value = mock_dict_search
         mock_response_parse.json.return_value = mock_dict_parse
         mock_get.side_effect = [mock_response_search, mock_response_parse]
 
         test_entity = "Test"
 
-        results = get_all_wikipedia_tables(entity=test_entity)
+        results = wiki_get_all_wikipedia_tables(entity=test_entity)
 
         # Define expected calls
         params1 = {
-            'action': 'query',
-            'list': 'search',
-            'srsearch': test_entity,
-            'format': 'json'
+            "action": "query",
+            "list": "search",
+            "srsearch": test_entity,
+            "format": "json",
         }
         params2 = {
-            'action': 'parse',
-            'page': test_entity,
-            'prop': 'text',
-            'format': 'json'
+            "action": "parse",
+            "page": test_entity,
+            "prop": "text",
+            "format": "json",
         }
 
         calls = [
-            unittest.mock.call("https://en.wikipedia.org/w/api.php", params=params1),
-            unittest.mock.call("https://en.wikipedia.org/w/api.php", params=params2)
+            unittest.mock.call(
+                "https://en.wikipedia.org/w/api.php",
+                params=params1,
+            ),
+            unittest.mock.call(
+                "https://en.wikipedia.org/w/api.php",
+                params=params2,
+            ),
         ]
-        
+
         mock_get.assert_has_calls(calls, any_order=True)
 
         self.assertEqual(results, expected_result)
-        
-    
+
     @patch("agentscope.utils.common.requests.get")
-    def test_get_page_images_with_captions(self, mock_get: MagicMock) -> None:
-        """Test get_page_images_with_captions with different parameters and responses"""
-        
+    def test_get_page_images_with_captions(
+        self,
+        mock_get: MagicMock,
+    ) -> None:
+        """Test get_page_images_with_captions"""
+
         # Mock responses for search query
         mock_response_search = Mock()
         mock_dict_search = {
-            'query': {
-                'search': [
-                    {'title': 'Test'}
-                ]
-            }
+            "query": {
+                "search": [
+                    {"title": "Test"},
+                ],
+            },
         }
-        
+
         # Mock responses for images query
         mock_response_images = Mock()
         mock_dict_images = {
-            'query': {
-                'pages': {
-                    '20': {
-                        'pageid': 20,
-                        'title': 'Test',
-                        'images': [
-                            {'title': 'Image1'},
-                            {'title': 'Image2'}
-                        ]
-                    }
-                }
-            }
+            "query": {
+                "pages": {
+                    "20": {
+                        "pageid": 20,
+                        "title": "Test",
+                        "images": [
+                            {"title": "Image1"},
+                            {"title": "Image2"},
+                        ],
+                    },
+                },
+            },
         }
-        
+
         # Mock responses for image details query
         mock_response_image1 = Mock()
         mock_dict_image1 = {
-            'query': {
-                'pages': {
-                    '30': {
-                        'pageid': 30,
-                        'imageinfo': [{
-                            'url': 'http://example.com/image1.jpg',
-                            'extmetadata': {
-                                'ImageDescription': {'value': 'Caption for image 1'}
-                            }
-                        }]
-                    }
-                }
-            }
+            "query": {
+                "pages": {
+                    "30": {
+                        "pageid": 30,
+                        "imageinfo": [
+                            {
+                                "url": "http://example.com/image1.jpg",
+                                "extmetadata": {
+                                    "ImageDescription": {
+                                        "value": "Caption for image 1",
+                                    },
+                                },
+                            },
+                        ],
+                    },
+                },
+            },
         }
-        
+
         mock_response_image2 = Mock()
         mock_dict_image2 = {
-            'query': {
-                'pages': {
-                    '31': {
-                        'pageid': 31,
-                        'imageinfo': [{
-                            'url': 'http://example.com/image2.jpg',
-                            'extmetadata': {
-                                'ImageDescription': {'value': 'Caption for image 2'}
-                            }
-                        }]
-                    }
-                }
-            }
+            "query": {
+                "pages": {
+                    "31": {
+                        "pageid": 31,
+                        "imageinfo": [
+                            {
+                                "url": "http://example.com/image2.jpg",
+                                "extmetadata": {
+                                    "ImageDescription": {
+                                        "value": "Caption for image 2",
+                                    },
+                                },
+                            },
+                        ],
+                    },
+                },
+            },
         }
 
         expected_result = ServiceResponse(
             status=ServiceExecStatus.SUCCESS,
             content=[
                 {
-                    'title': 'Image1',
-                    'url': 'http://example.com/image1.jpg',
-                    'caption': 'Caption for image 1'
+                    "title": "Image1",
+                    "url": "http://example.com/image1.jpg",
+                    "caption": "Caption for image 1",
                 },
                 {
-                    'title': 'Image2',
-                    'url': 'http://example.com/image2.jpg',
-                    'caption': 'Caption for image 2'
-                }
-            ]
+                    "title": "Image2",
+                    "url": "http://example.com/image2.jpg",
+                    "caption": "Caption for image 2",
+                },
+            ],
         )
 
-        # Set the side effect of the mock_get to return different responses in sequence
         mock_response_search.json.return_value = mock_dict_search
         mock_response_images.json.return_value = mock_dict_images
         mock_response_image1.json.return_value = mock_dict_image1
         mock_response_image2.json.return_value = mock_dict_image2
-        mock_get.side_effect = [mock_response_search, mock_response_images, mock_response_image1, mock_response_image2]
+        mock_get.side_effect = [
+            mock_response_search,
+            mock_response_images,
+            mock_response_image1,
+            mock_response_image2,
+        ]
 
         test_entity = "Test"
 
-        results = get_page_images_with_captions(entity=test_entity)
+        results = wiki_get_page_images_with_captions(entity=test_entity)
 
         # Define expected calls
         params1 = {
-            'action': 'query',
-            'list': 'search',
-            'srsearch': test_entity,
-            'format': 'json'
+            "action": "query",
+            "list": "search",
+            "srsearch": test_entity,
+            "format": "json",
         }
         params2 = {
-            'action': 'query',
-            'prop': 'images',
-            'titles': test_entity,
-            'format': 'json'
+            "action": "query",
+            "prop": "images",
+            "titles": test_entity,
+            "format": "json",
         }
         params3_image1 = {
-            'action': 'query',
-            'titles': 'Image1',
-            'prop': 'imageinfo',
-            'iiprop': 'url|extmetadata',
-            'format': 'json'
+            "action": "query",
+            "titles": "Image1",
+            "prop": "imageinfo",
+            "iiprop": "url|extmetadata",
+            "format": "json",
         }
         params4_image2 = {
-            'action': 'query',
-            'titles': 'Image2',
-            'prop': 'imageinfo',
-            'iiprop': 'url|extmetadata',
-            'format': 'json'
+            "action": "query",
+            "titles": "Image2",
+            "prop": "imageinfo",
+            "iiprop": "url|extmetadata",
+            "format": "json",
         }
 
         calls = [
-            unittest.mock.call("https://en.wikipedia.org/w/api.php", params=params1),
-            unittest.mock.call("https://en.wikipedia.org/w/api.php", params=params2),
-            unittest.mock.call("https://en.wikipedia.org/w/api.php", params=params3_image1),
-            unittest.mock.call("https://en.wikipedia.org/w/api.php", params=params4_image2)
+            unittest.mock.call(
+                "https://en.wikipedia.org/w/api.php",
+                params=params1,
+            ),
+            unittest.mock.call(
+                "https://en.wikipedia.org/w/api.php",
+                params=params2,
+            ),
+            unittest.mock.call(
+                "https://en.wikipedia.org/w/api.php",
+                params=params3_image1,
+            ),
+            unittest.mock.call(
+                "https://en.wikipedia.org/w/api.php",
+                params=params4_image2,
+            ),
         ]
-        
+
         mock_get.assert_has_calls(calls, any_order=True)
 
         self.assertEqual(results, expected_result)
 
+
 if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
+    unittest.main()

From cb8706765447402bd7aa2769af14901c60a54635 Mon Sep 17 00:00:00 2001
From: PengfeiHePower <billhe@yahoo.com>
Date: Fri, 28 Jun 2024 13:50:21 -0400
Subject: [PATCH 05/16] fix format

---
 src/agentscope/service/web/wiki.py | 41 +++++++++++++++---------------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/src/agentscope/service/web/wiki.py b/src/agentscope/service/web/wiki.py
index a138c36c4..c13f097b6 100644
--- a/src/agentscope/service/web/wiki.py
+++ b/src/agentscope/service/web/wiki.py
@@ -5,7 +5,8 @@
 """
 
 import re
-import requests
+
+# import requests
 from bs4 import BeautifulSoup
 
 from agentscope.service.service_response import (
@@ -45,11 +46,10 @@ def _check_entity_existence(entity: str) -> ServiceResponse:
             ServiceExecStatus.SUCCESS,
             {"entity": exact_match},
         )
-    else:
-        return ServiceResponse(
-            ServiceExecStatus.ERROR,
-            {"error": "Entity not found"},
-        )
+    return ServiceResponse(
+        ServiceExecStatus.ERROR,
+        {"error": "Entity not found"},
+    )
 
 
 def wiki_get_category_members(
@@ -96,7 +96,8 @@ def wiki_get_category_members(
             {
                 'status': <ServiceExecStatus.SUCCESS: 1>,
                 'content': [
-                {'pageid': 67911196, 'ns': 0, 'title': 'Bayesian learning mechanisms'},
+                {'pageid': 67911196, 'ns': 0,
+                'title': 'Bayesian learning mechanisms'},
                 {'pageid': 233488, 'ns': 0, 'title': 'Machine learning'},
                 ...]
 
@@ -133,8 +134,8 @@ def wiki_get_category_members(
 
     if len(members) > 0:
         return ServiceResponse(ServiceExecStatus.SUCCESS, members)
-    else:
-        return ServiceResponse(ServiceExecStatus.ERROR, members)
+
+    return ServiceResponse(ServiceExecStatus.ERROR, members)
 
 
 def wiki_get_infobox(
@@ -211,15 +212,14 @@ def wiki_get_infobox(
                 infobox_data[key] = val
 
         return ServiceResponse(ServiceExecStatus.SUCCESS, infobox_data)
-    else:
-        error_message = parse_data.get("error", {}).get(
-            "info",
-            "Unknown error occurred",
-        )
-        return ServiceResponse(
-            ServiceExecStatus.ERROR,
-            {"error": error_message},
-        )
+    error_message = parse_data.get("error", {}).get(
+        "info",
+        "Unknown error occurred",
+    )
+    return ServiceResponse(
+        ServiceExecStatus.ERROR,
+        {"error": error_message},
+    )
 
 
 def wiki_get_page_content_by_paragraph(
@@ -358,7 +358,7 @@ def wiki_get_all_wikipedia_tables(
         return ServiceResponse(ServiceExecStatus.ERROR, None)
 
     all_tables_data = []
-    for table_index, table in enumerate(tables):
+    for _, table in enumerate(tables):
         headers = [
             header.get_text(strip=True) for header in table.find_all("th")
         ]
@@ -451,8 +451,7 @@ def wiki_get_page_images_with_captions(
             "iiprop": "url|extmetadata",
             "format": "json",
         }
-        response = requests.get(url, params=params)
-        data = response.json()
+        data = requests_get(url, params=params)
         image_page = next(iter(data["query"]["pages"].values()))
         if "imageinfo" in image_page:
             image_info = image_page["imageinfo"][0]

From ebb2af839d3cae600034832d1a5cce3818ffee4e Mon Sep 17 00:00:00 2001
From: PengfeiHePower <billhe@yahoo.com>
Date: Wed, 3 Jul 2024 13:44:30 -0400
Subject: [PATCH 06/16] modify url

---
 src/agentscope/service/web/wiki.py | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/src/agentscope/service/web/wiki.py b/src/agentscope/service/web/wiki.py
index c13f097b6..17df81c37 100644
--- a/src/agentscope/service/web/wiki.py
+++ b/src/agentscope/service/web/wiki.py
@@ -16,8 +16,12 @@
 from agentscope.utils.common import requests_get
 
 
-def _check_entity_existence(entity: str) -> ServiceResponse:
+def wiki_api(params):
     url = "https://en.wikipedia.org/w/api.php"
+    return requests_get(url, params=params)
+
+
+def _check_entity_existence(entity: str) -> ServiceResponse:
     search_params = {
         "action": "query",
         "list": "search",
@@ -25,7 +29,7 @@ def _check_entity_existence(entity: str) -> ServiceResponse:
         "format": "json",
     }
 
-    search_data = requests_get(url, params=search_params)
+    search_data = wiki_api(search_params)
 
     if "query" in search_data and search_data["query"]["search"]:
         exact_match = None
@@ -104,7 +108,6 @@ def wiki_get_category_members(
             }
 
     """
-    url = "https://en.wikipedia.org/w/api.php"
     params = {
         "action": "query",
         "list": "categorymembers",
@@ -117,7 +120,7 @@ def wiki_get_category_members(
     total_fetched = 0
 
     while total_fetched < max_members:
-        data = requests_get(url, params=params)
+        data = wiki_api(params)
         batch_members = data["query"]["categorymembers"]
         members.extend(batch_members)
         total_fetched += len(batch_members)
@@ -184,7 +187,6 @@ def wiki_get_infobox(
     if existence_response.status == ServiceExecStatus.ERROR:
         return existence_response
 
-    url = "https://en.wikipedia.org/w/api.php"
     parse_params = {
         "action": "parse",
         "page": entity,
@@ -192,7 +194,7 @@ def wiki_get_infobox(
         "format": "json",
     }
 
-    parse_data = requests_get(url, params=parse_params)
+    parse_data = wiki_api(parse_params)
 
     if "parse" in parse_data:
         raw_html = parse_data["parse"]["text"]["*"]
@@ -265,7 +267,6 @@ def wiki_get_page_content_by_paragraph(
     if existence_response.status == ServiceExecStatus.ERROR:
         return existence_response
 
-    url = "https://en.wikipedia.org/w/api.php"
     params = {
         "action": "query",
         "prop": "extracts",
@@ -274,7 +275,7 @@ def wiki_get_page_content_by_paragraph(
         "format": "json",
     }
 
-    data = requests_get(url, params=params)
+    data = wiki_api(params)
     page = next(iter(data["query"]["pages"].values()))
     content = page.get("extract", "No content found.")
     if content == "No content found.":
@@ -340,7 +341,6 @@ def wiki_get_all_wikipedia_tables(
     if existence_response.status == ServiceExecStatus.ERROR:
         return existence_response
 
-    url = "https://en.wikipedia.org/w/api.php"
     params = {
         "action": "parse",
         "page": entity,
@@ -348,7 +348,7 @@ def wiki_get_all_wikipedia_tables(
         "format": "json",
     }
 
-    data = requests_get(url, params=params)
+    data = wiki_api(params)
     raw_html = data["parse"]["text"]["*"]
 
     soup = BeautifulSoup(raw_html, "html.parser")
@@ -427,15 +427,13 @@ def wiki_get_page_images_with_captions(
     if existence_response.status == ServiceExecStatus.ERROR:
         return existence_response
 
-    url = "https://en.wikipedia.org/w/api.php"
-
     params = {
         "action": "query",
         "prop": "images",
         "titles": entity,
         "format": "json",
     }
-    data = requests_get(url, params=params)
+    data = wiki_api(params)
     page = next(iter(data["query"]["pages"].values()))
     images = page.get("images", [])
     if len(images) == 0:
@@ -451,7 +449,7 @@ def wiki_get_page_images_with_captions(
             "iiprop": "url|extmetadata",
             "format": "json",
         }
-        data = requests_get(url, params=params)
+        data = wiki_api(params)
         image_page = next(iter(data["query"]["pages"].values()))
         if "imageinfo" in image_page:
             image_info = image_page["imageinfo"][0]

From 44ec6c3f7241a48095ee42f34b1ae78fb97179a7 Mon Sep 17 00:00:00 2001
From: PengfeiHePower <billhe@yahoo.com>
Date: Wed, 3 Jul 2024 14:09:49 -0400
Subject: [PATCH 07/16] add bs4

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 8055476f2..c87755f06 100644
--- a/setup.py
+++ b/setup.py
@@ -83,6 +83,7 @@
     "litellm",
     "psutil",
     "scipy",
+    "bs4",
 ]
 
 distribute_requires = minimal_requires + rpc_requires

From 62b31b58974ccc8169cab28083ed58e8c8338f42 Mon Sep 17 00:00:00 2001
From: PengfeiHePower <billhe@yahoo.com>
Date: Wed, 3 Jul 2024 14:11:24 -0400
Subject: [PATCH 08/16] modify minimal requirements

---
 setup.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index c87755f06..b319de417 100644
--- a/setup.py
+++ b/setup.py
@@ -29,8 +29,6 @@
     "docker",
     "pymongo",
     "pymysql",
-    "bs4",
-    "beautifulsoup4",
     "feedparser",
 ]
 
@@ -84,6 +82,7 @@
     "psutil",
     "scipy",
     "bs4",
+    "beautifulsoup4",
 ]
 
 distribute_requires = minimal_requires + rpc_requires

From 3bc4039761559559b4cff5fd1b2dc9c4cb6f0309 Mon Sep 17 00:00:00 2001
From: PengfeiHePower <billhe@yahoo.com>
Date: Wed, 3 Jul 2024 17:30:16 -0400
Subject: [PATCH 09/16] correct formats

---
 src/agentscope/service/web/wiki.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/agentscope/service/web/wiki.py b/src/agentscope/service/web/wiki.py
index 17df81c37..05e3ef3e0 100644
--- a/src/agentscope/service/web/wiki.py
+++ b/src/agentscope/service/web/wiki.py
@@ -16,7 +16,8 @@
 from agentscope.utils.common import requests_get
 
 
-def wiki_api(params):
+def wiki_api(params: dict) -> dict:
+    """Scratch information via Wiki API"""
     url = "https://en.wikipedia.org/w/api.php"
     return requests_get(url, params=params)
 

From 925d856ea3f79047c4e28667e308fca9edb5df42 Mon Sep 17 00:00:00 2001
From: PengfeiHePower <billhe@yahoo.com>
Date: Wed, 3 Jul 2024 19:22:52 -0400
Subject: [PATCH 10/16] modify _check_entity_existence

---
 src/agentscope/service/web/wiki.py | 35 +++++++++++++++---------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/agentscope/service/web/wiki.py b/src/agentscope/service/web/wiki.py
index 05e3ef3e0..4d75f3103 100644
--- a/src/agentscope/service/web/wiki.py
+++ b/src/agentscope/service/web/wiki.py
@@ -32,25 +32,26 @@ def _check_entity_existence(entity: str) -> ServiceResponse:
 
     search_data = wiki_api(search_params)
 
-    if "query" in search_data and search_data["query"]["search"]:
-        exact_match = None
-        for result in search_data["query"]["search"]:
-            if result["title"].lower() == entity.lower():
-                exact_match = result["title"]
-                break
-        if not exact_match:
-            similar_entities = [
-                result["title"]
-                for result in search_data["query"]["search"][:5]
-            ]
+    if "query" in search_data and "search" in search_data["query"]:
+        if search_data["query"]["search"]:
+            exact_match = None
+            for result in search_data["query"]["search"]:
+                if result["title"].lower() == entity.lower():
+                    exact_match = result["title"]
+                    break
+            if not exact_match:
+                similar_entities = [
+                    result["title"]
+                    for result in search_data["query"]["search"][:5]
+                ]
+                return ServiceResponse(
+                    ServiceExecStatus.ERROR,
+                    {"similar_entities": similar_entities},
+                )
             return ServiceResponse(
-                ServiceExecStatus.ERROR,
-                {"similar_entities": similar_entities},
+                ServiceExecStatus.SUCCESS,
+                {"entity": exact_match},
             )
-        return ServiceResponse(
-            ServiceExecStatus.SUCCESS,
-            {"entity": exact_match},
-        )
     return ServiceResponse(
         ServiceExecStatus.ERROR,
         {"error": "Entity not found"},

From e9d540c220d2c43ab424db615ccde4f1b5bd74d8 Mon Sep 17 00:00:00 2001
From: PengfeiHePower <billhe@yahoo.com>
Date: Fri, 26 Jul 2024 02:28:18 -0400
Subject: [PATCH 11/16] fix comments

---
 src/agentscope/service/web/wiki.py | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/src/agentscope/service/web/wiki.py b/src/agentscope/service/web/wiki.py
index 4d75f3103..eba9e33c9 100644
--- a/src/agentscope/service/web/wiki.py
+++ b/src/agentscope/service/web/wiki.py
@@ -6,7 +6,7 @@
 
 import re
 
-# import requests
+
 from bs4 import BeautifulSoup
 
 from agentscope.service.service_response import (
@@ -16,13 +16,24 @@
 from agentscope.utils.common import requests_get
 
 
-def wiki_api(params: dict) -> dict:
+def _wiki_api(params: dict) -> dict:
     """Scratch information via Wiki API"""
     url = "https://en.wikipedia.org/w/api.php"
     return requests_get(url, params=params)
 
 
 def _check_entity_existence(entity: str) -> ServiceResponse:
+    """
+    Function to check if the eneity exists in Wikipedia
+    If yes, continue searching;
+    if not, return top 5 similar entities
+    
+    Args:
+        entity (str): searching keywords
+        
+    Returns:
+
+    """
     search_params = {
         "action": "query",
         "list": "search",
@@ -30,7 +41,7 @@ def _check_entity_existence(entity: str) -> ServiceResponse:
         "format": "json",
     }
 
-    search_data = wiki_api(search_params)
+    search_data = _wiki_api(search_params)
 
     if "query" in search_data and "search" in search_data["query"]:
         if search_data["query"]["search"]:
@@ -68,7 +79,7 @@ def wiki_get_category_members(
     Args:
         entity (str): searching keywords
         max_members (int): maximum number of members to output
-        limit_per_request (int): number of members retrieved per quest
+        limit_per_request (int): number of members retrieved per request
 
     Returns:
         `ServiceResponse`: A dictionary containing `status` and `content`.
@@ -122,7 +133,7 @@ def wiki_get_category_members(
     total_fetched = 0
 
     while total_fetched < max_members:
-        data = wiki_api(params)
+        data = _wiki_api(params)
         batch_members = data["query"]["categorymembers"]
         members.extend(batch_members)
         total_fetched += len(batch_members)
@@ -277,7 +288,7 @@ def wiki_get_page_content_by_paragraph(
         "format": "json",
     }
 
-    data = wiki_api(params)
+    data = _wiki_api(params)
     page = next(iter(data["query"]["pages"].values()))
     content = page.get("extract", "No content found.")
     if content == "No content found.":
@@ -350,7 +361,7 @@ def wiki_get_all_wikipedia_tables(
         "format": "json",
     }
 
-    data = wiki_api(params)
+    data = _wiki_api(params)
     raw_html = data["parse"]["text"]["*"]
 
     soup = BeautifulSoup(raw_html, "html.parser")
@@ -435,7 +446,7 @@ def wiki_get_page_images_with_captions(
         "titles": entity,
         "format": "json",
     }
-    data = wiki_api(params)
+    data = _wiki_api(params)
     page = next(iter(data["query"]["pages"].values()))
     images = page.get("images", [])
     if len(images) == 0:
@@ -451,7 +462,7 @@ def wiki_get_page_images_with_captions(
             "iiprop": "url|extmetadata",
             "format": "json",
         }
-        data = wiki_api(params)
+        data = _wiki_api(params)
         image_page = next(iter(data["query"]["pages"].values()))
         if "imageinfo" in image_page:
             image_info = image_page["imageinfo"][0]

From 732db08fe4dca648d090e37d65fe5b8e1d44b92d Mon Sep 17 00:00:00 2001
From: PengfeiHePower <billhe@yahoo.com>
Date: Mon, 29 Jul 2024 01:42:53 -0400
Subject: [PATCH 12/16] fix comments

---
 src/agentscope/service/__init__.py |   1 +
 src/agentscope/service/web/wiki.py | 162 ++++++++++++++++++++++++++++-
 tests/wiki_test.py                 |   2 +
 3 files changed, 160 insertions(+), 5 deletions(-)

diff --git a/src/agentscope/service/__init__.py b/src/agentscope/service/__init__.py
index 0de13f91d..f4af8e325 100644
--- a/src/agentscope/service/__init__.py
+++ b/src/agentscope/service/__init__.py
@@ -47,6 +47,7 @@
     wiki_get_page_content_by_paragraph,
     wiki_get_all_wikipedia_tables,
     wiki_get_page_images_with_captions,
+    wiki_page_retrieval
 )
 
 
diff --git a/src/agentscope/service/web/wiki.py b/src/agentscope/service/web/wiki.py
index eba9e33c9..9dc9f10c8 100644
--- a/src/agentscope/service/web/wiki.py
+++ b/src/agentscope/service/web/wiki.py
@@ -32,7 +32,49 @@ def _check_entity_existence(entity: str) -> ServiceResponse:
         entity (str): searching keywords
         
     Returns:
-
+        `ServiceResponse`: A dictionary containing `status` and `content`.
+        The `status` attribute is from the ServiceExecStatus enum,
+        indicating success or error.
+        If the entity does not exist, `status`=ERROR
+        and return top-5 similar entities in `content`.
+        If entity exists, `status`=SUCCESS, return the original entity in `content`.
+        
+    Example 1 (entity exists):
+        .. code-block:: python
+        
+            _check_entity_existence('Hello')
+        
+        It returns:
+        .. code-block:: python
+        
+            {
+                'status': <ServiceExecStatus.SUCCESS: 1>, 
+                'content': {
+                    'entity': 'Hello'
+                    }
+            }
+        
+    Example 2 (entity does not exist):   
+        .. code-block:: python
+        
+             _check_entity_existence('nihao')
+             
+        It returns:
+        .. code-block:: python
+        
+            {
+                'status': <ServiceExecStatus.ERROR: -1>,
+                'content': {
+                    'similar_entities': [
+                        'Ni Hao',
+                        'Ranma ½',
+                        'Ni Hao, Kai-Lan',
+                        'List of Ranma ½ episodes',
+                        'Studio Deen'
+                    ]
+                }
+            }
+        
     """
     search_params = {
         "action": "query",
@@ -101,6 +143,7 @@ def wiki_get_category_members(
         Example:
 
         .. code-block:: python
+        
             members = wiki_get_category_members(
                 "Machine_learning",
                 max_members=10
@@ -110,6 +153,7 @@ def wiki_get_category_members(
         It returns contents:
 
         .. code-block:: python
+        
             {
                 'status': <ServiceExecStatus.SUCCESS: 1>,
                 'content': [
@@ -175,12 +219,14 @@ def wiki_get_infobox(
     Example:
 
     .. code-block:: python
+    
         infobox_data = wiki_get_infobox(entity="Python (programming language)")
         print(infobox_data)
 
     It returns content:
 
     .. code-block:: python
+    
     {
         'status': <ServiceExecStatus.SUCCESS: 1>,
         'content':  {'Paradigm': 'Multi-paradigm : object-oriented ...',
@@ -207,7 +253,7 @@ def wiki_get_infobox(
         "format": "json",
     }
 
-    parse_data = wiki_api(parse_params)
+    parse_data = _wiki_api(parse_params)
 
     if "parse" in parse_data:
         raw_html = parse_data["parse"]["text"]["*"]
@@ -249,7 +295,7 @@ def wiki_get_page_content_by_paragraph(
         entity (str): search word.
         max_paragraphs (int, optional):
             The maximum number of paragraphs to retrieve.
-            Default is None (retrieve all paragraphs).
+            Default is 1 (retrieve the first paragraph).
 
     Returns:
         `ServiceResponse`: A dictionary containing `status` and `content`.
@@ -263,6 +309,7 @@ def wiki_get_page_content_by_paragraph(
     Example:
 
         .. code-block:: python
+        
             wiki_paragraph = wiki_get_page_content_by_paragraph(
                 entity="Python (programming language)",
                 max_paragraphs=1)
@@ -270,6 +317,7 @@ def wiki_get_page_content_by_paragraph(
 
         It will return content:
         .. code-block:: python
+        
             {
                 'status': <ServiceExecStatus.SUCCESS: 1>,
                 'content': ['Python is a high-level...']
@@ -294,11 +342,11 @@ def wiki_get_page_content_by_paragraph(
     if content == "No content found.":
         return ServiceResponse(ServiceExecStatus.ERROR, content)
 
-    # Split content into paragraphs and filter out headers
+    # Split content into paragraphs, including headers
     paragraphs = [
         para.strip()
         for para in content.split("\n\n")
-        if not re.match(r"^\s*==.*==\s*$", para) and para.strip() != ""
+        if para.strip() != ""
     ]
 
     # Return the specified number of paragraphs
@@ -331,6 +379,7 @@ def wiki_get_all_wikipedia_tables(
     Example:
 
         .. code-block:: python
+        
             wiki_table = wiki_get_all_wikipedia_tables(
                 entity="Python (programming language)"
                 )
@@ -338,6 +387,7 @@ def wiki_get_all_wikipedia_tables(
 
         It will return content:
         .. code-block:: python
+        
             {
                 'status': <ServiceExecStatus.SUCCESS: 1>,
                 'content': [
@@ -417,6 +467,7 @@ def wiki_get_page_images_with_captions(
 
     Example:
         .. code-block:: python
+        
             wiki_images = wiki_get_page_images_with_captions(
                 entity="Python (programming language)"
                 )
@@ -425,6 +476,7 @@ def wiki_get_page_images_with_captions(
         It will return:
 
         .. code-block:: python
+        
             {
                 'status': <ServiceExecStatus.SUCCESS: 1>,
                 'content': [{
@@ -481,3 +533,103 @@ def wiki_get_page_images_with_captions(
             )
 
     return ServiceResponse(ServiceExecStatus.SUCCESS, image_details)
+
+
+def wiki_page_retrieval(
+    entity: str,
+    max_paragraphs: int = 1,
+)-> ServiceResponse:
+    """
+    Function to retrive different format 
+    (infobox, paragraphs, tables, images)
+    of information on the Wikipedia page
+
+    Args:
+        entity (str): search word.
+        max_paragraphs (int, optional):
+            The maximum number of paragraphs to retrieve.
+            Default is 1 (retrieve the first paragraph).
+        
+    Returns:
+        A dictionary contains retrieved information of different format.
+        Keys are four formats: `infobox`, `paragraph`, `table`, `image`.
+        The value for each key is a `ServiceResponse` object containing
+        `status` and `content`. 
+        The `status` attribute is from the ServiceExecStatus enum,
+        indicating success or error.
+        If the entity does not exist, `status`=ERROR,
+        otherwise `status`=SUCCESS.
+        The `content` attribute is the retrieved contents 
+        if  `status`=SUCCESS. Contents are different for each format.
+        `infobox`: Information in the InfoBox.
+        `paragraph`: A list of paragraphs from the Wikipedia page. The number 
+                of paragraphs is determined by arg `max_paragraphs`.
+        `table`: A list of tables from the Wikipedia page. Each table 
+                is presented as a dict, where key is the 
+                column name and value is the values for each column.
+        `image`: A list of dict from the Wikipedia page. 
+                Each dict has:
+                'title': title of the image
+                'url': link to the image
+                'caption': caption of the image
+
+    Example:
+        .. code-block:: python
+        
+            wiki_page_retrieval(entity='Hello', max_paragraphs=1)
+
+        It will return:
+
+        .. code-block:: python
+        
+            {
+                'infobox': {
+                    'status': <ServiceExecStatus.ERROR: -1>, 
+                'content': None
+                },
+                'paragraph': {
+                    'status': <ServiceExecStatus.SUCCESS: 1>,
+                    'content': ['Hello is a salutation or greeting in the English language. It is first attested in writing from 1826.']
+                },
+                'table': {
+                    'status': <ServiceExecStatus.ERROR: -1>, 
+                    'content': None
+                },
+                'image': {
+                    'status': <ServiceExecStatus.SUCCESS: 1>,
+                    'content': [
+                        {
+                            'title': 'File:Semi-protection-shackle.svg',
+                            'url': 'https://upload.wikimedia.org/wikipedia/en/1/1b/Semi-protection-shackle.svg',
+                            'caption': '<p>English: <span lang="en"><a href="//en.wikipedia.org/wiki/Wikipedia:Semiprotection" class="mw-redirect" title="Wikipedia:Semiprotection">Semi-protection</a> lock with grey shackle</span>\n</p>'
+                        },
+                        {
+                            'title': 'File:TelephoneHelloNellie.jpg',
+                            'url': 'https://upload.wikimedia.org/wikipedia/commons/b/b3/TelephoneHelloNellie.jpg',
+                            'caption': 'No caption available'
+                        },
+                        {
+                            'title': 'File:Wiktionary-logo-en-v2.svg',
+                            'url': 'https://upload.wikimedia.org/wikipedia/commons/9/99/Wiktionary-logo-en-v2.svg',
+                            'caption': 'A logo derived from ...'
+                        }
+                    ]
+                }
+            }
+                    
+    """
+    
+    infobox_retrieval = wiki_get_infobox(entity=entity)
+    paragraph_retrieval = wiki_get_page_content_by_paragraph(
+        entity=entity, max_paragraphs=max_paragraphs)
+    table_retrieval = wiki_get_all_wikipedia_tables(entity=entity)
+    image_retrieval = wiki_get_page_images_with_captions(entity=entity)
+    
+    total_retrieval = {
+        'infobox': infobox_retrieval,
+        'paragraph': paragraph_retrieval,
+        'table': table_retrieval,
+        'image': image_retrieval
+    }
+    
+    return total_retrieval
diff --git a/tests/wiki_test.py b/tests/wiki_test.py
index 1279e436f..25390176f 100644
--- a/tests/wiki_test.py
+++ b/tests/wiki_test.py
@@ -10,6 +10,7 @@
     wiki_get_page_content_by_paragraph,
     wiki_get_all_wikipedia_tables,
     wiki_get_page_images_with_captions,
+    wiki_page_retrieval
 )
 from agentscope.service.service_status import ServiceExecStatus
 
@@ -495,6 +496,7 @@ def test_get_page_images_with_captions(
         mock_get.assert_has_calls(calls, any_order=True)
 
         self.assertEqual(results, expected_result)
+        
 
 
 if __name__ == "__main__":

From 8985cd4a2a63d0149b7706fe277bc10e201ec8b3 Mon Sep 17 00:00:00 2001
From: PengfeiHePower <billhe@yahoo.com>
Date: Wed, 7 Aug 2024 16:12:31 -0400
Subject: [PATCH 13/16] modify libraries

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index b319de417..4d6c3f169 100644
--- a/setup.py
+++ b/setup.py
@@ -30,6 +30,8 @@
     "pymongo",
     "pymysql",
     "feedparser",
+    "bs4",
+    "beautifulsoup4",
 ]
 
 doc_requires = [
@@ -81,8 +83,6 @@
     "litellm",
     "psutil",
     "scipy",
-    "bs4",
-    "beautifulsoup4",
 ]
 
 distribute_requires = minimal_requires + rpc_requires

From 42ad7cc2fb5e71c1d53505114bb0fa292b2c61cb Mon Sep 17 00:00:00 2001
From: DavdGao <gaodawei.gdw@alibaba-inc.com>
Date: Tue, 13 Aug 2024 16:06:42 +0800
Subject: [PATCH 14/16] Rewrite wikipedia search; Modify unittests

---
 .../en/source/tutorial/204-service.md         |  66 +-
 .../zh_CN/source/tutorial/204-service.md      |   4 +-
 src/agentscope/service/__init__.py            |  17 +-
 src/agentscope/service/web/wiki.py            | 635 ------------------
 src/agentscope/service/web/wikipedia.py       | 161 +++++
 tests/wiki_test.py                            | 443 +-----------
 6 files changed, 229 insertions(+), 1097 deletions(-)
 delete mode 100644 src/agentscope/service/web/wiki.py
 create mode 100644 src/agentscope/service/web/wikipedia.py

diff --git a/docs/sphinx_doc/en/source/tutorial/204-service.md b/docs/sphinx_doc/en/source/tutorial/204-service.md
index dad6fa3d9..5c9456dee 100644
--- a/docs/sphinx_doc/en/source/tutorial/204-service.md
+++ b/docs/sphinx_doc/en/source/tutorial/204-service.md
@@ -12,38 +12,40 @@ AgentScope and how to use them to enhance the capabilities of your agents.
 
 The following table outlines the various Service functions by type. These functions can be called using `agentscope.service.{function_name}`.
 
-| Service Scene               | Service Function Name      | Description                                                                                                    |
-|-----------------------------|----------------------------|----------------------------------------------------------------------------------------------------------------|
-| Code                        | `execute_python_code`      | Execute a piece of Python code, optionally inside a Docker container.                                          |
-| Retrieval                   | `retrieve_from_list`       | Retrieve a specific item from a list based on given criteria.                                                  |
-|                             | `cos_sim`                  | Compute the cosine similarity between two different embeddings.                                                |
-| SQL Query                   | `query_mysql`              | Execute SQL queries on a MySQL database and return results.                                                    |
-|                             | `query_sqlite`             | Execute SQL queries on a SQLite database and return results.                                                   |
-|                             | `query_mongodb`            | Perform queries or operations on a MongoDB collection.                                                         |
-| Text Processing             | `summarization`            | Summarize a piece of text using a large language model to highlight its main points.                           |
-| Web                         | `bing_search`              | Perform bing search                                                                                            |
-|                             | `google_search`            | Perform google search                                                                                          |
-|                             | `arxiv_search`             | Perform arXiv search                                                                                           |
-|                             | `download_from_url`        | Download file from given URL.                                                                                  |
-|                             | `load_web`                 | Load and parse the web page of the specified url (currently only supports HTML).                               |
-|                             | `digest_webpage`           | Digest the content of a already loaded web page (currently only supports HTML).
-|                             | `dblp_search_publications` | Search publications in the DBLP database
-|                             | `dblp_search_authors`      | Search for author information in the DBLP database                                                             |
-|                             | `dblp_search_venues`       | Search for venue information in the DBLP database                                                              |
-| File                        | `create_file`              | Create a new file at a specified path, optionally with initial content.                                        |
-|                             | `delete_file`              | Delete a file specified by a file path.                                                                        |
-|                             | `move_file`                | Move or rename a file from one path to another.                                                                |
-|                             | `create_directory`         | Create a new directory at a specified path.                                                                    |
-|                             | `delete_directory`         | Delete a directory and all its contents.                                                                       |
-|                             | `move_directory`           | Move or rename a directory from one path to another.                                                           |
-|                             | `read_text_file`           | Read and return the content of a text file.                                                                    |
-|                             | `write_text_file`          | Write text content to a file at a specified path.                                                              |
-|                             | `read_json_file`           | Read and parse the content of a JSON file.                                                                     |
-|                             | `write_json_file`          | Serialize a Python object to JSON and write to a file.                                                         |
-| Multi Modality              | `dashscope_text_to_image`  | Convert text to image using Dashscope API.                                                                     |
-|                             | `dashscope_image_to_text`  | Convert image to text using Dashscope API.                                                                     |
-|                             | `dashscope_text_to_audio`  | Convert text to audio using Dashscope API.                                                                     |
-| *More services coming soon* |                            | More service functions are in development and will be added to AgentScope to further enhance its capabilities. |
+| Service Scene               | Service Function Name          | Description                                                                                                    |
+|-----------------------------|--------------------------------|----------------------------------------------------------------------------------------------------------------|
+| Code                        | `execute_python_code`          | Execute a piece of Python code, optionally inside a Docker container.                                          |
+| Retrieval                   | `retrieve_from_list`           | Retrieve a specific item from a list based on given criteria.                                                  |
+|                             | `cos_sim`                      | Compute the cosine similarity between two different embeddings.                                                |
+| SQL Query                   | `query_mysql`                  | Execute SQL queries on a MySQL database and return results.                                                    |
+|                             | `query_sqlite`                 | Execute SQL queries on a SQLite database and return results.                                                   |
+|                             | `query_mongodb`                | Perform queries or operations on a MongoDB collection.                                                         |
+| Text Processing             | `summarization`                | Summarize a piece of text using a large language model to highlight its main points.                           |
+| Web                         | `bing_search`                  | Perform bing search                                                                                            |
+|                             | `google_search`                | Perform google search                                                                                          |
+|                             | `arxiv_search`                 | Perform arXiv search                                                                                           |
+|                             | `download_from_url`            | Download file from given URL.                                                                                  |
+|                             | `load_web`                     | Load and parse the web page of the specified url (currently only supports HTML).                               |
+|                             | `digest_webpage`               | Digest the content of a already loaded web page (currently only supports HTML).
+|                             | `dblp_search_publications`     | Search publications in the DBLP database
+|                             | `dblp_search_authors`          | Search for author information in the DBLP database                                                             |
+|                             | `dblp_search_venues`           | Search for venue information in the DBLP database                                                              |
+|                             | `wikipedia_search`             | Search for the given query in Wikipedia API                                                                    |
+|                             | `wikipedia_search_categories`  | Search categories for the given query in Wikipedia:Category pages.                                             |
+| File                        | `create_file`                  | Create a new file at a specified path, optionally with initial content.                                        |
+|                             | `delete_file`                  | Delete a file specified by a file path.                                                                        |
+|                             | `move_file`                    | Move or rename a file from one path to another.                                                                |
+|                             | `create_directory`             | Create a new directory at a specified path.                                                                    |
+|                             | `delete_directory`             | Delete a directory and all its contents.                                                                       |
+|                             | `move_directory`               | Move or rename a directory from one path to another.                                                           |
+|                             | `read_text_file`               | Read and return the content of a text file.                                                                    |
+|                             | `write_text_file`              | Write text content to a file at a specified path.                                                              |
+|                             | `read_json_file`               | Read and parse the content of a JSON file.                                                                     |
+|                             | `write_json_file`              | Serialize a Python object to JSON and write to a file.                                                         |
+| Multi Modality              | `dashscope_text_to_image`      | Convert text to image using Dashscope API.                                                                     |
+|                             | `dashscope_image_to_text`      | Convert image to text using Dashscope API.                                                                     |
+|                             | `dashscope_text_to_audio`      | Convert text to audio using Dashscope API.                                                                     |
+| *More services coming soon* |                                | More service functions are in development and will be added to AgentScope to further enhance its capabilities. |
 
 About each service function, you can find detailed information in the
 [API document](https://modelscope.github.io/agentscope/).
diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md b/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md
index 788d2bdad..85e4174fb 100644
--- a/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md
+++ b/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md
@@ -27,6 +27,8 @@
 |            | `dblp_search_publications`      | 在dblp数据库里搜索文献。
 |            | `dblp_search_authors`      | 在dblp数据库里搜索作者。                          |
 |            | `dblp_search_venues`      | 在dblp数据库里搜索期刊，会议及研讨会。                   |
+|            | `wikipedia_search`             | 在Wikipedia中进行搜索。                        |
+|            | `wikipedia_search_categories`  | 在Wikipedia的Category中搜索相关的category。      |
 | 文件处理       | `create_file`         | 在指定路径创建一个新文件，并可选择添加初始内容。                |
 |            | `delete_file`         | 删除由文件路径指定的文件。                           |
 |            | `move_file`           | 将文件从一个路径移动或重命名到另一个路径。                   |
@@ -39,7 +41,7 @@
 |            | `write_json_file`     | 将 Python 对象序列化为 JSON 并写入到文件。            |
 | 多模态        | `dashscope_text_to_image`  | 使用 DashScope API 将文本生成图片。               |
 |            | `dashscope_image_to_text`  | 使用 DashScope API 根据图片生成文字。              |
-|            | `dashscope_text_to_audio`  | 使用 DashScope API 根据文本生成音频。             |
+|            | `dashscope_text_to_audio`  | 使用 DashScope API 根据文本生成音频。              |
 | *更多服务即将推出* |                       | 正在开发更多服务功能，并将添加到 AgentScope 以进一步增强其能力。  |
 
 关于详细的参数、预期输入格式、返回类型，请参阅[API文档](https://modelscope.github.io/agentscope/)。
diff --git a/src/agentscope/service/__init__.py b/src/agentscope/service/__init__.py
index f4af8e325..185da8a13 100644
--- a/src/agentscope/service/__init__.py
+++ b/src/agentscope/service/__init__.py
@@ -41,13 +41,9 @@
 from .web.web_digest import digest_webpage, load_web, parse_html_to_text
 from .web.download import download_from_url
 
-from .web.wiki import (
-    wiki_get_category_members,
-    wiki_get_infobox,
-    wiki_get_page_content_by_paragraph,
-    wiki_get_all_wikipedia_tables,
-    wiki_get_page_images_with_captions,
-    wiki_page_retrieval
+from .web.wikipedia import (
+    wikipedia_search,
+    wikipedia_search_categories,
 )
 
 
@@ -95,11 +91,8 @@ def get_help() -> None:
     "dashscope_image_to_text",
     "dashscope_text_to_image",
     "dashscope_text_to_audio",
-    "wiki_get_category_members",
-    "wiki_get_infobox",
-    "wiki_get_page_content_by_paragraph",
-    "wiki_get_all_wikipedia_tables",
-    "wiki_get_page_images_with_captions",
+    "wikipedia_search",
+    "wikipedia_search_categories",
     # to be deprecated
     "ServiceFactory",
 ]
diff --git a/src/agentscope/service/web/wiki.py b/src/agentscope/service/web/wiki.py
deleted file mode 100644
index 9dc9f10c8..000000000
--- a/src/agentscope/service/web/wiki.py
+++ /dev/null
@@ -1,635 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Search contents from WikiPedia,
-including texts, categories, infotable, table,...
-"""
-
-import re
-
-
-from bs4 import BeautifulSoup
-
-from agentscope.service.service_response import (
-    ServiceResponse,
-    ServiceExecStatus,
-)
-from agentscope.utils.common import requests_get
-
-
-def _wiki_api(params: dict) -> dict:
-    """Scratch information via Wiki API"""
-    url = "https://en.wikipedia.org/w/api.php"
-    return requests_get(url, params=params)
-
-
-def _check_entity_existence(entity: str) -> ServiceResponse:
-    """
-    Function to check if the eneity exists in Wikipedia
-    If yes, continue searching;
-    if not, return top 5 similar entities
-    
-    Args:
-        entity (str): searching keywords
-        
-    Returns:
-        `ServiceResponse`: A dictionary containing `status` and `content`.
-        The `status` attribute is from the ServiceExecStatus enum,
-        indicating success or error.
-        If the entity does not exist, `status`=ERROR
-        and return top-5 similar entities in `content`.
-        If entity exists, `status`=SUCCESS, return the original entity in `content`.
-        
-    Example 1 (entity exists):
-        .. code-block:: python
-        
-            _check_entity_existence('Hello')
-        
-        It returns:
-        .. code-block:: python
-        
-            {
-                'status': <ServiceExecStatus.SUCCESS: 1>, 
-                'content': {
-                    'entity': 'Hello'
-                    }
-            }
-        
-    Example 2 (entity does not exist):   
-        .. code-block:: python
-        
-             _check_entity_existence('nihao')
-             
-        It returns:
-        .. code-block:: python
-        
-            {
-                'status': <ServiceExecStatus.ERROR: -1>,
-                'content': {
-                    'similar_entities': [
-                        'Ni Hao',
-                        'Ranma ½',
-                        'Ni Hao, Kai-Lan',
-                        'List of Ranma ½ episodes',
-                        'Studio Deen'
-                    ]
-                }
-            }
-        
-    """
-    search_params = {
-        "action": "query",
-        "list": "search",
-        "srsearch": entity,
-        "format": "json",
-    }
-
-    search_data = _wiki_api(search_params)
-
-    if "query" in search_data and "search" in search_data["query"]:
-        if search_data["query"]["search"]:
-            exact_match = None
-            for result in search_data["query"]["search"]:
-                if result["title"].lower() == entity.lower():
-                    exact_match = result["title"]
-                    break
-            if not exact_match:
-                similar_entities = [
-                    result["title"]
-                    for result in search_data["query"]["search"][:5]
-                ]
-                return ServiceResponse(
-                    ServiceExecStatus.ERROR,
-                    {"similar_entities": similar_entities},
-                )
-            return ServiceResponse(
-                ServiceExecStatus.SUCCESS,
-                {"entity": exact_match},
-            )
-    return ServiceResponse(
-        ServiceExecStatus.ERROR,
-        {"error": "Entity not found"},
-    )
-
-
-def wiki_get_category_members(
-    entity: str,
-    max_members: int = 1000,
-    limit_per_request: int = 500,
-) -> ServiceResponse:
-    """Function to retrieve category members from Wikipedia:Category pages
-
-    Args:
-        entity (str): searching keywords
-        max_members (int): maximum number of members to output
-        limit_per_request (int): number of members retrieved per request
-
-    Returns:
-        `ServiceResponse`: A dictionary containing `status` and `content`.
-        The `status` attribute is from the ServiceExecStatus enum,
-        indicating success or error.
-        If the entity does not exist, `status`=ERROR
-        and return top-5 similar entities in `content`.
-        If the entity exists, `status`=SUCCESS,
-        and return `content` as a list of dicts.
-        Keys of each dict:
-
-            "pageid": unique page ID for the member
-
-            "ns": namespace for the member,
-                indicating if the corresponding page is Article/User/...
-
-            "title": title of the member
-
-        Example:
-
-        .. code-block:: python
-        
-            members = wiki_get_category_members(
-                "Machine_learning",
-                max_members=10
-                )
-            print(members)
-
-        It returns contents:
-
-        .. code-block:: python
-        
-            {
-                'status': <ServiceExecStatus.SUCCESS: 1>,
-                'content': [
-                {'pageid': 67911196, 'ns': 0,
-                'title': 'Bayesian learning mechanisms'},
-                {'pageid': 233488, 'ns': 0, 'title': 'Machine learning'},
-                ...]
-
-            }
-
-    """
-    params = {
-        "action": "query",
-        "list": "categorymembers",
-        "cmtitle": f"Category:{entity}",
-        "cmlimit": limit_per_request,  # Maximum number of results per request
-        "format": "json",
-    }
-
-    members = []
-    total_fetched = 0
-
-    while total_fetched < max_members:
-        data = _wiki_api(params)
-        batch_members = data["query"]["categorymembers"]
-        members.extend(batch_members)
-        total_fetched += len(batch_members)
-
-        # Check if there is a continuation token
-        if "continue" in data and total_fetched < max_members:
-            params["cmcontinue"] = data["continue"]["cmcontinue"]
-        else:
-            break
-
-    # If more members were fetched than max_members, trim the list
-    if len(members) > max_members:
-        members = members[:max_members]
-
-    if len(members) > 0:
-        return ServiceResponse(ServiceExecStatus.SUCCESS, members)
-
-    return ServiceResponse(ServiceExecStatus.ERROR, members)
-
-
-def wiki_get_infobox(
-    entity: str,
-) -> ServiceResponse:
-    """
-    Function to retrieve InfoBox from the WikiPedia page
-
-    Args:
-        entity (str): searching keywords
-
-    Returns:
-        `ServiceResponse`: A dictionary containing `status` and `content`.
-        The `status` attribute is from the ServiceExecStatus enum,
-        indicating success or error.
-        If the entity does not exist, `status`=ERROR,
-        and return top-5 similar entities in `content`.
-        If the entity exists, `status`=SUCCESS,
-        and return `content` as a dict containing information in the InfoBox.
-
-    Example:
-
-    .. code-block:: python
-    
-        infobox_data = wiki_get_infobox(entity="Python (programming language)")
-        print(infobox_data)
-
-    It returns content:
-
-    .. code-block:: python
-    
-    {
-        'status': <ServiceExecStatus.SUCCESS: 1>,
-        'content':  {'Paradigm': 'Multi-paradigm : object-oriented ...',
-                    'Designed\xa0by': 'Guido van Rossum',
-                    'Developer': 'Python Software Foundation',
-                    'First\xa0appeared': '20\xa0February 1991 ...',
-                    'Stable release': '3.12.4 / 6 June 2024 ; ...',
-                    'Typing discipline': 'duck , dynamic , strong ; ...',
-                    'OS': 'Tier 1 : 64-bit Linux , macOS ; 。。。',
-                    'License': 'Python Software Foundation License',
-                    'Filename extensions': '.py, .pyw, .pyz, [10] .pyi, ...',
-                    'Website': 'python.org'}
-    }
-    """
-
-    existence_response = _check_entity_existence(entity)
-    if existence_response.status == ServiceExecStatus.ERROR:
-        return existence_response
-
-    parse_params = {
-        "action": "parse",
-        "page": entity,
-        "prop": "text",
-        "format": "json",
-    }
-
-    parse_data = _wiki_api(parse_params)
-
-    if "parse" in parse_data:
-        raw_html = parse_data["parse"]["text"]["*"]
-        soup = BeautifulSoup(raw_html, "html.parser")
-        infobox = soup.find("table", {"class": "infobox"})
-
-        if not infobox:
-            return ServiceResponse(ServiceExecStatus.ERROR, None)
-
-        infobox_data = {}
-        for row in infobox.find_all("tr"):
-            header = row.find("th")
-            value = row.find("td")
-            if header and value:
-                key = header.get_text(" ", strip=True)
-                val = value.get_text(" ", strip=True)
-                infobox_data[key] = val
-
-        return ServiceResponse(ServiceExecStatus.SUCCESS, infobox_data)
-    error_message = parse_data.get("error", {}).get(
-        "info",
-        "Unknown error occurred",
-    )
-    return ServiceResponse(
-        ServiceExecStatus.ERROR,
-        {"error": error_message},
-    )
-
-
-def wiki_get_page_content_by_paragraph(
-    entity: str,
-    max_paragraphs: int = 1,
-) -> ServiceResponse:
-    """
-    Retrieve content from a Wikipedia page and split it into paragraphs,
-    excluding section headers.
-
-    Args:
-        entity (str): search word.
-        max_paragraphs (int, optional):
-            The maximum number of paragraphs to retrieve.
-            Default is 1 (retrieve the first paragraph).
-
-    Returns:
-        `ServiceResponse`: A dictionary containing `status` and `content`.
-        The `status` attribute is from the ServiceExecStatus enum,
-        indicating success or error.
-        If the entity does not exist, `status`=ERROR,
-        and return top-5 similar entities in `content`.
-        If the entity exists, `status`=SUCCESS,
-        and return `content` as a list of paragraphs from the Wikipedia page.
-
-    Example:
-
-        .. code-block:: python
-        
-            wiki_paragraph = wiki_get_page_content_by_paragraph(
-                entity="Python (programming language)",
-                max_paragraphs=1)
-            print(wiki_paragraph)
-
-        It will return content:
-        .. code-block:: python
-        
-            {
-                'status': <ServiceExecStatus.SUCCESS: 1>,
-                'content': ['Python is a high-level...']
-            }
-
-    """
-    existence_response = _check_entity_existence(entity)
-    if existence_response.status == ServiceExecStatus.ERROR:
-        return existence_response
-
-    params = {
-        "action": "query",
-        "prop": "extracts",
-        "explaintext": True,
-        "titles": entity,
-        "format": "json",
-    }
-
-    data = _wiki_api(params)
-    page = next(iter(data["query"]["pages"].values()))
-    content = page.get("extract", "No content found.")
-    if content == "No content found.":
-        return ServiceResponse(ServiceExecStatus.ERROR, content)
-
-    # Split content into paragraphs, including headers
-    paragraphs = [
-        para.strip()
-        for para in content.split("\n\n")
-        if para.strip() != ""
-    ]
-
-    # Return the specified number of paragraphs
-    if max_paragraphs:
-        paragraphs = paragraphs[:max_paragraphs]
-
-    return ServiceResponse(ServiceExecStatus.SUCCESS, paragraphs)
-
-
-def wiki_get_all_wikipedia_tables(
-    entity: str,
-) -> ServiceResponse:
-    """
-    Retrieve tables on the Wikipedia page
-
-    Args:
-        entity (str): search word.
-
-    Returns:
-        `ServiceResponse`: A dictionary containing `status` and `content`.
-        The `status` attribute is from the ServiceExecStatus enum,
-        indicating success or error.
-        If the entity does not exist, `status`=ERROR,
-        and return top-5 similar entities in `content`.
-        If the entity exists, `status`=SUCCESS,
-        and return `content` as a list of tables from the Wikipedia page.
-        Each table is presented as a dict,
-        where key is the column name and value is the values for each column.
-
-    Example:
-
-        .. code-block:: python
-        
-            wiki_table = wiki_get_all_wikipedia_tables(
-                entity="Python (programming language)"
-                )
-            print(wiki_table)
-
-        It will return content:
-        .. code-block:: python
-        
-            {
-                'status': <ServiceExecStatus.SUCCESS: 1>,
-                'content': [
-                            {
-                                'Type': ['bool','bytearray',...],
-                                'Mutability': ['immutable','mutable',...],
-                                ...
-                            }
-                           ]
-            }
-
-    """
-    existence_response = _check_entity_existence(entity)
-    if existence_response.status == ServiceExecStatus.ERROR:
-        return existence_response
-
-    params = {
-        "action": "parse",
-        "page": entity,
-        "prop": "text",
-        "format": "json",
-    }
-
-    data = _wiki_api(params)
-    raw_html = data["parse"]["text"]["*"]
-
-    soup = BeautifulSoup(raw_html, "html.parser")
-    tables = soup.find_all("table", {"class": "wikitable"})
-
-    if not tables:
-        return ServiceResponse(ServiceExecStatus.ERROR, None)
-
-    all_tables_data = []
-    for _, table in enumerate(tables):
-        headers = [
-            header.get_text(strip=True) for header in table.find_all("th")
-        ]
-        table_dict = {header: [] for header in headers}
-
-        for row in table.find_all("tr")[1:]:  # Skip the header row
-            cells = row.find_all(["td", "th"])
-            if len(cells) == len(
-                headers,
-            ):  # Ensure the row has the correct number of cells
-                for i, cell in enumerate(cells):
-                    table_dict[headers[i]].append(
-                        cell.get_text(strip=True),
-                    )
-
-        all_tables_data.append(table_dict)
-
-    return ServiceResponse(ServiceExecStatus.SUCCESS, all_tables_data)
-
-
-def wiki_get_page_images_with_captions(
-    entity: str,
-) -> ServiceResponse:
-    """
-    Function to retrive images and details on the Wikipedia page
-
-    Args:
-        entity (str): search word.
-
-    Returns:
-        `ServiceResponse`: A dictionary containing `status` and `content`.
-        The `status` attribute is from the ServiceExecStatus enum,
-        indicating success or error.
-        If the entity does not exist, `status`=ERROR,
-        and return top-5 similar entities in `content`.
-        If the entity exists, `status`=SUCCESS,
-        and return the `content` as a list of dict from the Wikipedia page.
-
-        Each dict has:
-        'title': title of the image
-        'url': link to the image
-        'caption': caption of the image
-
-    Example:
-        .. code-block:: python
-        
-            wiki_images = wiki_get_page_images_with_captions(
-                entity="Python (programming language)"
-                )
-            print(wiki_images)
-
-        It will return:
-
-        .. code-block:: python
-        
-            {
-                'status': <ServiceExecStatus.SUCCESS: 1>,
-                'content': [{
-                'title': 'File:Commons-logo.svg',
-                'url': 'https://upload.wikimedia.org...',
-                'caption': 'The Wikimedia Commons logo,...'},
-                ...
-                            ]
-            }
-    """
-
-    existence_response = _check_entity_existence(entity)
-    if existence_response.status == ServiceExecStatus.ERROR:
-        return existence_response
-
-    params = {
-        "action": "query",
-        "prop": "images",
-        "titles": entity,
-        "format": "json",
-    }
-    data = _wiki_api(params)
-    page = next(iter(data["query"]["pages"].values()))
-    images = page.get("images", [])
-    if len(images) == 0:
-        return ServiceResponse(ServiceExecStatus.ERROR, None)
-
-    image_details = []
-    for image in images:
-        image_title = image["title"]
-        params = {
-            "action": "query",
-            "titles": image_title,
-            "prop": "imageinfo",
-            "iiprop": "url|extmetadata",
-            "format": "json",
-        }
-        data = _wiki_api(params)
-        image_page = next(iter(data["query"]["pages"].values()))
-        if "imageinfo" in image_page:
-            image_info = image_page["imageinfo"][0]
-            image_url = image_info.get("url", "")
-            extmetadata = image_info.get("extmetadata", {})
-            caption = extmetadata.get("ImageDescription", {}).get(
-                "value",
-                "No caption available",
-            )
-            image_details.append(
-                {
-                    "title": image_title,
-                    "url": image_url,
-                    "caption": caption,
-                },
-            )
-
-    return ServiceResponse(ServiceExecStatus.SUCCESS, image_details)
-
-
-def wiki_page_retrieval(
-    entity: str,
-    max_paragraphs: int = 1,
-)-> ServiceResponse:
-    """
-    Function to retrive different format 
-    (infobox, paragraphs, tables, images)
-    of information on the Wikipedia page
-
-    Args:
-        entity (str): search word.
-        max_paragraphs (int, optional):
-            The maximum number of paragraphs to retrieve.
-            Default is 1 (retrieve the first paragraph).
-        
-    Returns:
-        A dictionary contains retrieved information of different format.
-        Keys are four formats: `infobox`, `paragraph`, `table`, `image`.
-        The value for each key is a `ServiceResponse` object containing
-        `status` and `content`. 
-        The `status` attribute is from the ServiceExecStatus enum,
-        indicating success or error.
-        If the entity does not exist, `status`=ERROR,
-        otherwise `status`=SUCCESS.
-        The `content` attribute is the retrieved contents 
-        if  `status`=SUCCESS. Contents are different for each format.
-        `infobox`: Information in the InfoBox.
-        `paragraph`: A list of paragraphs from the Wikipedia page. The number 
-                of paragraphs is determined by arg `max_paragraphs`.
-        `table`: A list of tables from the Wikipedia page. Each table 
-                is presented as a dict, where key is the 
-                column name and value is the values for each column.
-        `image`: A list of dict from the Wikipedia page. 
-                Each dict has:
-                'title': title of the image
-                'url': link to the image
-                'caption': caption of the image
-
-    Example:
-        .. code-block:: python
-        
-            wiki_page_retrieval(entity='Hello', max_paragraphs=1)
-
-        It will return:
-
-        .. code-block:: python
-        
-            {
-                'infobox': {
-                    'status': <ServiceExecStatus.ERROR: -1>, 
-                'content': None
-                },
-                'paragraph': {
-                    'status': <ServiceExecStatus.SUCCESS: 1>,
-                    'content': ['Hello is a salutation or greeting in the English language. It is first attested in writing from 1826.']
-                },
-                'table': {
-                    'status': <ServiceExecStatus.ERROR: -1>, 
-                    'content': None
-                },
-                'image': {
-                    'status': <ServiceExecStatus.SUCCESS: 1>,
-                    'content': [
-                        {
-                            'title': 'File:Semi-protection-shackle.svg',
-                            'url': 'https://upload.wikimedia.org/wikipedia/en/1/1b/Semi-protection-shackle.svg',
-                            'caption': '<p>English: <span lang="en"><a href="//en.wikipedia.org/wiki/Wikipedia:Semiprotection" class="mw-redirect" title="Wikipedia:Semiprotection">Semi-protection</a> lock with grey shackle</span>\n</p>'
-                        },
-                        {
-                            'title': 'File:TelephoneHelloNellie.jpg',
-                            'url': 'https://upload.wikimedia.org/wikipedia/commons/b/b3/TelephoneHelloNellie.jpg',
-                            'caption': 'No caption available'
-                        },
-                        {
-                            'title': 'File:Wiktionary-logo-en-v2.svg',
-                            'url': 'https://upload.wikimedia.org/wikipedia/commons/9/99/Wiktionary-logo-en-v2.svg',
-                            'caption': 'A logo derived from ...'
-                        }
-                    ]
-                }
-            }
-                    
-    """
-    
-    infobox_retrieval = wiki_get_infobox(entity=entity)
-    paragraph_retrieval = wiki_get_page_content_by_paragraph(
-        entity=entity, max_paragraphs=max_paragraphs)
-    table_retrieval = wiki_get_all_wikipedia_tables(entity=entity)
-    image_retrieval = wiki_get_page_images_with_captions(entity=entity)
-    
-    total_retrieval = {
-        'infobox': infobox_retrieval,
-        'paragraph': paragraph_retrieval,
-        'table': table_retrieval,
-        'image': image_retrieval
-    }
-    
-    return total_retrieval
diff --git a/src/agentscope/service/web/wikipedia.py b/src/agentscope/service/web/wikipedia.py
new file mode 100644
index 000000000..ea10a8f18
--- /dev/null
+++ b/src/agentscope/service/web/wikipedia.py
@@ -0,0 +1,161 @@
+# -*- coding: utf-8 -*-
+"""
+Search contents from WikiPedia
+"""
+import requests
+
+from ..service_response import (
+    ServiceResponse,
+    ServiceExecStatus,
+)
+
+
+def wikipedia_search_categories(
+    query: str,
+    max_members: int = 1000,
+) -> ServiceResponse:
+    """Retrieve categories from Wikipedia:Category pages.
+
+    Args:
+        query (str):
+            The given searching keywords
+        max_members (int):
+            The maximum number of members to output
+
+    Returns:
+        `ServiceResponse`: A response that contains the execution status and
+        returned content. In the returned content, the meanings of keys:
+            - "pageid": unique page ID for the member
+            - "ns": namespace for the member
+            - "title": title of the member
+
+        Example:
+
+        .. code-block:: python
+
+            members = wiki_get_category_members(
+                "Machine_learning",
+                max_members=10
+            )
+            print(members)
+
+        It returns contents:
+
+        .. code-block:: python
+
+            {
+                'status': <ServiceExecStatus.SUCCESS: 1>,
+                'content': [
+                    {
+                        'pageid': 67911196,
+                        'ns': 0,
+                        'title': 'Bayesian learning mechanisms'
+                    },
+                    {
+                        'pageid': 233488,
+                        'ns': 0,
+                        'title': 'Machine learning'
+                    },
+                    # ...
+                ]
+            }
+
+    """
+    url = "https://en.wikipedia.org/w/api.php"
+    limit_per_request: int = 500
+    params = {
+        "action": "query",
+        "list": "categorymembers",
+        "cmtitle": f"Category:{query}",
+        "cmlimit": limit_per_request,  # Maximum number of results per request
+        "format": "json",
+    }
+
+    members = []
+    total_fetched = 0
+
+    try:
+        while total_fetched < max_members:
+            response = requests.get(url, params=params, timeout=20)
+            response.raise_for_status()
+
+            data = response.json()
+
+            batch_members = data["query"]["categorymembers"]
+            members.extend(batch_members)
+            total_fetched += len(batch_members)
+
+            # Check if there is a continuation token
+            if "continue" in data and total_fetched < max_members:
+                params["cmcontinue"] = data["continue"]["cmcontinue"]
+            else:
+                break
+
+    except Exception as e:
+        return ServiceResponse(
+            status=ServiceExecStatus.ERROR,
+            content=str(e),
+        )
+
+    # If more members were fetched than max_members, trim the list
+    if len(members) > max_members:
+        members = members[:max_members]
+
+    if len(members) > 0:
+        return ServiceResponse(ServiceExecStatus.SUCCESS, members)
+
+    return ServiceResponse(ServiceExecStatus.ERROR, members)
+
+
+def wikipedia_search(  # pylint: disable=C0301
+    query: str,
+) -> ServiceResponse:
+    """Search the given query in Wikipedia. Note the returned text maybe related entities, which means you should adjust your query as needed and search again.
+
+    Note the returned text maybe too long for some llm, it's recommended to
+    summarize the returned text first.
+
+    Args:
+        query (`str`):
+            The searched query in wikipedia.
+
+    Return:
+        `ServiceResponse`: A response that contains the execution status and
+        returned content.
+    """  # noqa
+
+    url = "https://en.wikipedia.org/w/api.php"
+    params = {
+        "action": "query",
+        "titles": query,
+        "prop": "extracts",
+        "explaintext": True,
+        "format": "json",
+    }
+    try:
+        response = requests.get(url, params=params, timeout=20)
+        response.raise_for_status()
+        data = response.json()
+
+        # Combine into a text
+        text = []
+        for page in data["query"]["pages"].values():
+            if "extract" in page:
+                text.append(page["extract"])
+            else:
+                return ServiceResponse(
+                    status=ServiceExecStatus.ERROR,
+                    content="No content found",
+                )
+
+        content = "\n".join(text)
+        return ServiceResponse(
+            status=ServiceExecStatus.SUCCESS,
+            content=content,
+        )
+
+    except Exception as e:
+        return ServiceResponse(
+            status=ServiceExecStatus.ERROR,
+            content=str(e),
+        )
diff --git a/tests/wiki_test.py b/tests/wiki_test.py
index 25390176f..1ed4fe375 100644
--- a/tests/wiki_test.py
+++ b/tests/wiki_test.py
@@ -3,23 +3,19 @@
 import unittest
 from unittest.mock import Mock, patch, MagicMock
 
-from agentscope.service import ServiceResponse
 from agentscope.service import (
-    wiki_get_category_members,
-    wiki_get_infobox,
-    wiki_get_page_content_by_paragraph,
-    wiki_get_all_wikipedia_tables,
-    wiki_get_page_images_with_captions,
-    wiki_page_retrieval
+    wikipedia_search,
+    wikipedia_search_categories,
+    ServiceResponse,
+    ServiceExecStatus,
 )
-from agentscope.service.service_status import ServiceExecStatus
 
 
-class TestWiki(unittest.TestCase):
+class TestWikipedia(unittest.TestCase):
     """ExampleTest for a unit test."""
 
     @patch("agentscope.utils.common.requests.get")
-    def test_wiki_get_category_members(
+    def test_wikipedia_search_categories(
         self,
         mock_get: MagicMock,
     ) -> None:
@@ -52,8 +48,7 @@ def test_wiki_get_category_members(
         mock_get.return_value = mock_response
 
         test_entity = "Test"
-        max_members = 1
-        limit_per_request = 100
+        limit_per_request = 500
         params = {
             "action": "query",
             "list": "categorymembers",
@@ -62,14 +57,12 @@ def test_wiki_get_category_members(
             "format": "json",
         }
 
-        results = wiki_get_category_members(
-            entity=test_entity,
-            max_members=max_members,
-            limit_per_request=limit_per_request,
-        )
+        results = wikipedia_search_categories(query=test_entity)
+
         mock_get.assert_called_once_with(
             "https://en.wikipedia.org/w/api.php",
             params=params,
+            timeout=20,
         )
 
         self.assertEqual(
@@ -78,426 +71,42 @@ def test_wiki_get_category_members(
         )
 
     @patch("agentscope.utils.common.requests.get")
-    def test_wiki_get_infobox(
-        self,
-        mock_get: MagicMock,
-    ) -> None:
-        """Test get_infobox with different parameters and responses"""
-
-        # Mock responses for search query
-        mock_response_search = Mock()
-        mock_dict_search = {
-            "query": {
-                "search": [
-                    {"title": "Test"},
-                ],
-            },
-        }
-
-        # Mock responses for parse query
-        mock_response_parse = Mock()
-        mock_dict_parse = {
-            "parse": {
-                "title": "Test",
-                "pageid": 20,
-                "text": {
-                    "*": """
-                         <table class="infobox vevent">
-                         <tr>
-                         <th>Column1</th>
-                         <td>Data1</td>
-                         </tr>
-                         <tr>
-                         <th>Column2</th>
-                         <td>Data2</td>
-                         </tr>
-                         </table>
-                         """,
-                },
-            },
-        }
-
-        expected_result = ServiceResponse(
-            status=ServiceExecStatus.SUCCESS,
-            content={
-                "Column1": "Data1",
-                "Column2": "Data2",
-            },
-        )
-
-        mock_response_search.json.return_value = mock_dict_search
-        mock_response_parse.json.return_value = mock_dict_parse
-        mock_get.side_effect = [mock_response_search, mock_response_parse]
-
-        test_entity = "Test"
-
-        results = wiki_get_infobox(entity=test_entity)
-
-        # Define expected calls
-        calls = [
-            unittest.mock.call(
-                "https://en.wikipedia.org/w/api.php",
-                params={
-                    "action": "query",
-                    "list": "search",
-                    "srsearch": test_entity,
-                    "format": "json",
-                },
-            ),
-            unittest.mock.call(
-                "https://en.wikipedia.org/w/api.php",
-                params={
-                    "action": "parse",
-                    "page": test_entity,
-                    "prop": "text",
-                    "format": "json",
-                },
-            ),
-        ]
-
-        mock_get.assert_has_calls(calls, any_order=True)
-
-        self.assertEqual(results, expected_result)
-
-    @patch("agentscope.utils.common.requests.get")
-    def test_wiki_get_page_content_by_paragraph(
+    def test_wikipedia_search(
         self,
         mock_get: MagicMock,
     ) -> None:
         """Test get_page_content_by_paragraph"""
 
-        # Mock responses for search query
-        mock_response_search = Mock()
-        mock_dict_search = {
-            "query": {
-                "search": [
-                    {"title": "Test"},
-                ],
-            },
-        }
-
         # Mock responses for extract query
-        mock_response_extract = Mock()
-        mock_dict_extract = {
-            "query": {
-                "pages": {
-                    "20": {
-                        "pageid": 20,
-                        "title": "Test",
-                        "extract": """
-                            This is the first paragraph.
-
-                            This is the second paragraph.
-
-                            == Section Header ==
-
-                            This is the third paragraph under a section header.
-                        """,
-                    },
-                },
-            },
-        }
-
-        expected_result = ServiceResponse(
-            status=ServiceExecStatus.SUCCESS,
-            content=[
-                "This is the first paragraph.",
-                "This is the second paragraph.",
-            ],
-        )
-
-        mock_response_search.json.return_value = mock_dict_search
-        mock_response_extract.json.return_value = mock_dict_extract
-        mock_get.side_effect = [mock_response_search, mock_response_extract]
-
-        test_entity = "Test"
-
-        results = wiki_get_page_content_by_paragraph(
-            entity=test_entity,
-            max_paragraphs=2,
-        )
-
-        # Define expected calls
-        params1 = {
-            "action": "query",
-            "list": "search",
-            "srsearch": test_entity,
-            "format": "json",
-        }
-        params2 = {
-            "action": "query",
-            "prop": "extracts",
-            "explaintext": True,
-            "titles": test_entity,
-            "format": "json",
-        }
-
-        calls = [
-            unittest.mock.call(
-                "https://en.wikipedia.org/w/api.php",
-                params=params1,
-            ),
-            unittest.mock.call(
-                "https://en.wikipedia.org/w/api.php",
-                params=params2,
-            ),
-        ]
-
-        mock_get.assert_has_calls(calls, any_order=True)
-
-        self.assertEqual(results, expected_result)
-
-    @patch("agentscope.utils.common.requests.get")
-    def test_wiki_get_all_wikipedia_tables(
-        self,
-        mock_get: MagicMock,
-    ) -> None:
-        """Test get_all_wikipedia_tables"""
-
-        # Mock responses for search query
-        mock_response_search = Mock()
-        mock_dict_search = {
-            "query": {
-                "search": [
-                    {"title": "Test"},
-                ],
-            },
-        }
-
-        # Mock responses for parse query
-        mock_response_parse = Mock()
-        mock_dict_parse = {
-            "parse": {
-                "title": "Test",
-                "pageid": 20,
-                "text": {
-                    "*": """
-                         <table class="wikitable">
-                         <tr>
-                         <th>Header1</th>
-                         <th>Header2</th>
-                         </tr>
-                         <tr>
-                         <td>Row1Col1</td>
-                         <td>Row1Col2</td>
-                         </tr>
-                         <tr>
-                         <td>Row2Col1</td>
-                         <td>Row2Col2</td>
-                         </tr>
-                         </table>
-                         """,
-                },
-            },
-        }
-
-        expected_result = ServiceResponse(
-            status=ServiceExecStatus.SUCCESS,
-            content=[
-                {
-                    "Header1": ["Row1Col1", "Row2Col1"],
-                    "Header2": ["Row1Col2", "Row2Col2"],
-                },
-            ],
-        )
-
-        mock_response_search.json.return_value = mock_dict_search
-        mock_response_parse.json.return_value = mock_dict_parse
-        mock_get.side_effect = [mock_response_search, mock_response_parse]
-
-        test_entity = "Test"
-
-        results = wiki_get_all_wikipedia_tables(entity=test_entity)
-
-        # Define expected calls
-        params1 = {
-            "action": "query",
-            "list": "search",
-            "srsearch": test_entity,
-            "format": "json",
-        }
-        params2 = {
-            "action": "parse",
-            "page": test_entity,
-            "prop": "text",
-            "format": "json",
-        }
-
-        calls = [
-            unittest.mock.call(
-                "https://en.wikipedia.org/w/api.php",
-                params=params1,
-            ),
-            unittest.mock.call(
-                "https://en.wikipedia.org/w/api.php",
-                params=params2,
-            ),
-        ]
-
-        mock_get.assert_has_calls(calls, any_order=True)
-
-        self.assertEqual(results, expected_result)
-
-    @patch("agentscope.utils.common.requests.get")
-    def test_get_page_images_with_captions(
-        self,
-        mock_get: MagicMock,
-    ) -> None:
-        """Test get_page_images_with_captions"""
-
-        # Mock responses for search query
-        mock_response_search = Mock()
-        mock_dict_search = {
-            "query": {
-                "search": [
-                    {"title": "Test"},
-                ],
-            },
-        }
-
-        # Mock responses for images query
-        mock_response_images = Mock()
-        mock_dict_images = {
+        mock_response = Mock()
+        mock_dict = {
             "query": {
                 "pages": {
                     "20": {
                         "pageid": 20,
                         "title": "Test",
-                        "images": [
-                            {"title": "Image1"},
-                            {"title": "Image2"},
-                        ],
+                        "extract": "This is the first paragraph.",
                     },
-                },
-            },
-        }
-
-        # Mock responses for image details query
-        mock_response_image1 = Mock()
-        mock_dict_image1 = {
-            "query": {
-                "pages": {
-                    "30": {
+                    "21": {
                         "pageid": 30,
-                        "imageinfo": [
-                            {
-                                "url": "http://example.com/image1.jpg",
-                                "extmetadata": {
-                                    "ImageDescription": {
-                                        "value": "Caption for image 1",
-                                    },
-                                },
-                            },
-                        ],
+                        "title": "Test",
+                        "extract": "This is the second paragraph.",
                     },
                 },
             },
         }
 
-        mock_response_image2 = Mock()
-        mock_dict_image2 = {
-            "query": {
-                "pages": {
-                    "31": {
-                        "pageid": 31,
-                        "imageinfo": [
-                            {
-                                "url": "http://example.com/image2.jpg",
-                                "extmetadata": {
-                                    "ImageDescription": {
-                                        "value": "Caption for image 2",
-                                    },
-                                },
-                            },
-                        ],
-                    },
-                },
-            },
-        }
+        mock_response.json.return_value = mock_dict
+        mock_get.return_value = mock_response
 
-        expected_result = ServiceResponse(
+        expected_response = ServiceResponse(
             status=ServiceExecStatus.SUCCESS,
-            content=[
-                {
-                    "title": "Image1",
-                    "url": "http://example.com/image1.jpg",
-                    "caption": "Caption for image 1",
-                },
-                {
-                    "title": "Image2",
-                    "url": "http://example.com/image2.jpg",
-                    "caption": "Caption for image 2",
-                },
-            ],
-        )
-
-        mock_response_search.json.return_value = mock_dict_search
-        mock_response_images.json.return_value = mock_dict_images
-        mock_response_image1.json.return_value = mock_dict_image1
-        mock_response_image2.json.return_value = mock_dict_image2
-        mock_get.side_effect = [
-            mock_response_search,
-            mock_response_images,
-            mock_response_image1,
-            mock_response_image2,
-        ]
-
-        test_entity = "Test"
-
-        results = wiki_get_page_images_with_captions(entity=test_entity)
-
-        # Define expected calls
-        params1 = {
-            "action": "query",
-            "list": "search",
-            "srsearch": test_entity,
-            "format": "json",
-        }
-        params2 = {
-            "action": "query",
-            "prop": "images",
-            "titles": test_entity,
-            "format": "json",
-        }
-        params3_image1 = {
-            "action": "query",
-            "titles": "Image1",
-            "prop": "imageinfo",
-            "iiprop": "url|extmetadata",
-            "format": "json",
-        }
-        params4_image2 = {
-            "action": "query",
-            "titles": "Image2",
-            "prop": "imageinfo",
-            "iiprop": "url|extmetadata",
-            "format": "json",
-        }
-
-        calls = [
-            unittest.mock.call(
-                "https://en.wikipedia.org/w/api.php",
-                params=params1,
-            ),
-            unittest.mock.call(
-                "https://en.wikipedia.org/w/api.php",
-                params=params2,
+            content=(
+                "This is the first paragraph.\n"
+                "This is the second paragraph."
             ),
-            unittest.mock.call(
-                "https://en.wikipedia.org/w/api.php",
-                params=params3_image1,
-            ),
-            unittest.mock.call(
-                "https://en.wikipedia.org/w/api.php",
-                params=params4_image2,
-            ),
-        ]
-
-        mock_get.assert_has_calls(calls, any_order=True)
-
-        self.assertEqual(results, expected_result)
-        
+        )
 
+        response = wikipedia_search("Test")
 
-if __name__ == "__main__":
-    unittest.main()
+        self.assertEqual(expected_response, response)

From 915622145ce3b1d673bc8194db7dda09cfd1d7f8 Mon Sep 17 00:00:00 2001
From: DavdGao <gaodawei.gdw@alibaba-inc.com>
Date: Tue, 13 Aug 2024 17:15:25 +0800
Subject: [PATCH 15/16] Withdraw to solve conflict

---
 .../en/source/tutorial/204-service.md         | 66 +++++++++----------
 .../zh_CN/source/tutorial/204-service.md      |  4 +-
 2 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/docs/sphinx_doc/en/source/tutorial/204-service.md b/docs/sphinx_doc/en/source/tutorial/204-service.md
index 5c9456dee..dad6fa3d9 100644
--- a/docs/sphinx_doc/en/source/tutorial/204-service.md
+++ b/docs/sphinx_doc/en/source/tutorial/204-service.md
@@ -12,40 +12,38 @@ AgentScope and how to use them to enhance the capabilities of your agents.
 
 The following table outlines the various Service functions by type. These functions can be called using `agentscope.service.{function_name}`.
 
-| Service Scene               | Service Function Name          | Description                                                                                                    |
-|-----------------------------|--------------------------------|----------------------------------------------------------------------------------------------------------------|
-| Code                        | `execute_python_code`          | Execute a piece of Python code, optionally inside a Docker container.                                          |
-| Retrieval                   | `retrieve_from_list`           | Retrieve a specific item from a list based on given criteria.                                                  |
-|                             | `cos_sim`                      | Compute the cosine similarity between two different embeddings.                                                |
-| SQL Query                   | `query_mysql`                  | Execute SQL queries on a MySQL database and return results.                                                    |
-|                             | `query_sqlite`                 | Execute SQL queries on a SQLite database and return results.                                                   |
-|                             | `query_mongodb`                | Perform queries or operations on a MongoDB collection.                                                         |
-| Text Processing             | `summarization`                | Summarize a piece of text using a large language model to highlight its main points.                           |
-| Web                         | `bing_search`                  | Perform bing search                                                                                            |
-|                             | `google_search`                | Perform google search                                                                                          |
-|                             | `arxiv_search`                 | Perform arXiv search                                                                                           |
-|                             | `download_from_url`            | Download file from given URL.                                                                                  |
-|                             | `load_web`                     | Load and parse the web page of the specified url (currently only supports HTML).                               |
-|                             | `digest_webpage`               | Digest the content of a already loaded web page (currently only supports HTML).
-|                             | `dblp_search_publications`     | Search publications in the DBLP database
-|                             | `dblp_search_authors`          | Search for author information in the DBLP database                                                             |
-|                             | `dblp_search_venues`           | Search for venue information in the DBLP database                                                              |
-|                             | `wikipedia_search`             | Search for the given query in Wikipedia API                                                                    |
-|                             | `wikipedia_search_categories`  | Search categories for the given query in Wikipedia:Category pages.                                             |
-| File                        | `create_file`                  | Create a new file at a specified path, optionally with initial content.                                        |
-|                             | `delete_file`                  | Delete a file specified by a file path.                                                                        |
-|                             | `move_file`                    | Move or rename a file from one path to another.                                                                |
-|                             | `create_directory`             | Create a new directory at a specified path.                                                                    |
-|                             | `delete_directory`             | Delete a directory and all its contents.                                                                       |
-|                             | `move_directory`               | Move or rename a directory from one path to another.                                                           |
-|                             | `read_text_file`               | Read and return the content of a text file.                                                                    |
-|                             | `write_text_file`              | Write text content to a file at a specified path.                                                              |
-|                             | `read_json_file`               | Read and parse the content of a JSON file.                                                                     |
-|                             | `write_json_file`              | Serialize a Python object to JSON and write to a file.                                                         |
-| Multi Modality              | `dashscope_text_to_image`      | Convert text to image using Dashscope API.                                                                     |
-|                             | `dashscope_image_to_text`      | Convert image to text using Dashscope API.                                                                     |
-|                             | `dashscope_text_to_audio`      | Convert text to audio using Dashscope API.                                                                     |
-| *More services coming soon* |                                | More service functions are in development and will be added to AgentScope to further enhance its capabilities. |
+| Service Scene               | Service Function Name      | Description                                                                                                    |
+|-----------------------------|----------------------------|----------------------------------------------------------------------------------------------------------------|
+| Code                        | `execute_python_code`      | Execute a piece of Python code, optionally inside a Docker container.                                          |
+| Retrieval                   | `retrieve_from_list`       | Retrieve a specific item from a list based on given criteria.                                                  |
+|                             | `cos_sim`                  | Compute the cosine similarity between two different embeddings.                                                |
+| SQL Query                   | `query_mysql`              | Execute SQL queries on a MySQL database and return results.                                                    |
+|                             | `query_sqlite`             | Execute SQL queries on a SQLite database and return results.                                                   |
+|                             | `query_mongodb`            | Perform queries or operations on a MongoDB collection.                                                         |
+| Text Processing             | `summarization`            | Summarize a piece of text using a large language model to highlight its main points.                           |
+| Web                         | `bing_search`              | Perform bing search                                                                                            |
+|                             | `google_search`            | Perform google search                                                                                          |
+|                             | `arxiv_search`             | Perform arXiv search                                                                                           |
+|                             | `download_from_url`        | Download file from given URL.                                                                                  |
+|                             | `load_web`                 | Load and parse the web page of the specified url (currently only supports HTML).                               |
+|                             | `digest_webpage`           | Digest the content of a already loaded web page (currently only supports HTML).
+|                             | `dblp_search_publications` | Search publications in the DBLP database
+|                             | `dblp_search_authors`      | Search for author information in the DBLP database                                                             |
+|                             | `dblp_search_venues`       | Search for venue information in the DBLP database                                                              |
+| File                        | `create_file`              | Create a new file at a specified path, optionally with initial content.                                        |
+|                             | `delete_file`              | Delete a file specified by a file path.                                                                        |
+|                             | `move_file`                | Move or rename a file from one path to another.                                                                |
+|                             | `create_directory`         | Create a new directory at a specified path.                                                                    |
+|                             | `delete_directory`         | Delete a directory and all its contents.                                                                       |
+|                             | `move_directory`           | Move or rename a directory from one path to another.                                                           |
+|                             | `read_text_file`           | Read and return the content of a text file.                                                                    |
+|                             | `write_text_file`          | Write text content to a file at a specified path.                                                              |
+|                             | `read_json_file`           | Read and parse the content of a JSON file.                                                                     |
+|                             | `write_json_file`          | Serialize a Python object to JSON and write to a file.                                                         |
+| Multi Modality              | `dashscope_text_to_image`  | Convert text to image using Dashscope API.                                                                     |
+|                             | `dashscope_image_to_text`  | Convert image to text using Dashscope API.                                                                     |
+|                             | `dashscope_text_to_audio`  | Convert text to audio using Dashscope API.                                                                     |
+| *More services coming soon* |                            | More service functions are in development and will be added to AgentScope to further enhance its capabilities. |
 
 About each service function, you can find detailed information in the
 [API document](https://modelscope.github.io/agentscope/).
diff --git a/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md b/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md
index 85e4174fb..788d2bdad 100644
--- a/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md
+++ b/docs/sphinx_doc/zh_CN/source/tutorial/204-service.md
@@ -27,8 +27,6 @@
 |            | `dblp_search_publications`      | 在dblp数据库里搜索文献。
 |            | `dblp_search_authors`      | 在dblp数据库里搜索作者。                          |
 |            | `dblp_search_venues`      | 在dblp数据库里搜索期刊，会议及研讨会。                   |
-|            | `wikipedia_search`             | 在Wikipedia中进行搜索。                        |
-|            | `wikipedia_search_categories`  | 在Wikipedia的Category中搜索相关的category。      |
 | 文件处理       | `create_file`         | 在指定路径创建一个新文件，并可选择添加初始内容。                |
 |            | `delete_file`         | 删除由文件路径指定的文件。                           |
 |            | `move_file`           | 将文件从一个路径移动或重命名到另一个路径。                   |
@@ -41,7 +39,7 @@
 |            | `write_json_file`     | 将 Python 对象序列化为 JSON 并写入到文件。            |
 | 多模态        | `dashscope_text_to_image`  | 使用 DashScope API 将文本生成图片。               |
 |            | `dashscope_image_to_text`  | 使用 DashScope API 根据图片生成文字。              |
-|            | `dashscope_text_to_audio`  | 使用 DashScope API 根据文本生成音频。              |
+|            | `dashscope_text_to_audio`  | 使用 DashScope API 根据文本生成音频。             |
 | *更多服务即将推出* |                       | 正在开发更多服务功能，并将添加到 AgentScope 以进一步增强其能力。  |
 
 关于详细的参数、预期输入格式、返回类型，请参阅[API文档](https://modelscope.github.io/agentscope/)。

From bbcfe661853664f50e80e03ca34ee077c1677f3f Mon Sep 17 00:00:00 2001
From: DavdGao <gaodawei.gdw@alibaba-inc.com>
Date: Tue, 13 Aug 2024 17:27:25 +0800
Subject: [PATCH 16/16] remove the unnecessary change

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 29158f34b..8966431d4 100644
--- a/setup.py
+++ b/setup.py
@@ -30,9 +30,9 @@
     "docker",
     "pymongo",
     "pymysql",
-    "feedparser",
     "bs4",
     "beautifulsoup4",
+    "feedparser",
 ]
 
 doc_requires = [