From a613b1b7f55184dc67d330760c476254288a7ef7 Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Tue, 29 Aug 2023 17:54:30 +0200 Subject: [PATCH] Format crawler.py --- haystack/nodes/connector/crawler.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/haystack/nodes/connector/crawler.py b/haystack/nodes/connector/crawler.py index b6ea38d797..3e66c1ab3b 100644 --- a/haystack/nodes/connector/crawler.py +++ b/haystack/nodes/connector/crawler.py @@ -58,10 +58,10 @@ def __init__( Init object with basic params for crawling (can be overwritten later). :param urls: List of http(s) address(es) (can also be supplied later when calling crawl()) - :param crawler_depth: How many sublinks to follow from the initial list of URLs. Can be any integer >= 0. - For example: - 0: Only initial list of urls. - 1: Follow links found on the initial URLs (but no further). + :param crawler_depth: How many sublinks to follow from the initial list of URLs. Can be any integer >= 0. + For example: + 0: Only initial list of urls. + 1: Follow links found on the initial URLs (but no further). 2: Additionally follow links found on the second-level URLs. :param filter_urls: Optional list of regular expressions that the crawled URLs must comply with. All URLs not matching at least one of the regular expressions will be dropped. @@ -155,10 +155,10 @@ def crawl( If no parameters are provided to this method, the instance attributes that were passed during __init__ will be used. :param urls: List of http addresses or single http address - :param crawler_depth: How many sublinks to follow from the initial list of URLs. Can be any integer >= 0. - For example: - 0: Only initial list of urls. - 1: Follow links found on the initial URLs (but no further). + :param crawler_depth: How many sublinks to follow from the initial list of URLs. Can be any integer >= 0. + For example: + 0: Only initial list of urls. + 1: Follow links found on the initial URLs (but no further). 2: Additionally follow links found on the second-level URLs. :param filter_urls: Optional list of regular expressions that the crawled URLs must comply with. All URLs not matching at least one of the regular expressions will be dropped. @@ -378,10 +378,10 @@ def run( # type: ignore :param output_dir: Path for the directory to store files :param urls: List of http addresses or single http address - :param crawler_depth: How many sublinks to follow from the initial list of URLs. Can be any integer >= 0. - For example: - 0: Only initial list of urls. - 1: Follow links found on the initial URLs (but no further). + :param crawler_depth: How many sublinks to follow from the initial list of URLs. Can be any integer >= 0. + For example: + 0: Only initial list of urls. + 1: Follow links found on the initial URLs (but no further). 2: Additionally follow links found on the second-level URLs. :param filter_urls: Optional list of regular expressions that the crawled URLs must comply with. All URLs not matching at least one of the regular expressions will be dropped.