diff --git a/lib/ProductOpener/Display.pm b/lib/ProductOpener/Display.pm index 7ae6121d1d477..b6a19d57dc990 100644 --- a/lib/ProductOpener/Display.pm +++ b/lib/ProductOpener/Display.pm @@ -1007,12 +1007,12 @@ sub set_user_agent_request_ref_attributes ($request_ref) { my $is_crawl_bot = 0; my $is_denied_crawl_bot = 0; if ($user_agent_str - =~ /\b(Googlebot|Googlebot-Image|Google-InspectionTool|bingbot|Applebot|Yandex|DuckDuck|DotBot|Seekport|Ahrefs|DataForSeo|Seznam|ZoomBot|Mojeek|QRbot|Qwant|facebookexternalhit|Bytespider|GPTBot|ClaudeBot|SEOkicks|Searchmetrics|MJ12|SurveyBot|SEOdiver|wotbox|Cliqz|Paracrawl|Scrapy|VelenPublicWebCrawler|Semrush|MegaIndex\.ru|Amazon|aiohttp|python-request)/i + =~ /\b(Googlebot|Googlebot-Image|Google-InspectionTool|bingbot|Applebot|Yandex|DuckDuck|DotBot|Seekport|Ahrefs|DataForSeo|Seznam|ZoomBot|Mojeek|QRbot|Qwant|facebookexternalhit|Bytespider|GPTBot|cohere-ai|anthropic-ai|PerplexityBot|ClaudeBot|Claude-Web|SEOkicks|Searchmetrics|MJ12|SurveyBot|SEOdiver|wotbox|Cliqz|Paracrawl|Scrapy|VelenPublicWebCrawler|Semrush|MegaIndex\.ru|Amazon|aiohttp|python-request)/i ) { $is_crawl_bot = 1; if ($user_agent_str - =~ /\b(bingbot|Seekport|Ahrefs|DataForSeo|Seznam|ZoomBot|Mojeek|QRbot|Bytespider|SEOkicks|Searchmetrics|MJ12|SurveyBot|SEOdiver|wotbox|Cliqz|Paracrawl|Scrapy|VelenPublicWebCrawler|Semrush|MegaIndex\.ru|YandexMarket|Amazon|ClaudeBot)/ + =~ /\b(bingbot|Seekport|Ahrefs|DataForSeo|Seznam|ZoomBot|Mojeek|QRbot|Bytespider|SEOkicks|Searchmetrics|MJ12|SurveyBot|SEOdiver|wotbox|Cliqz|Paracrawl|Scrapy|VelenPublicWebCrawler|Semrush|MegaIndex\.ru|YandexMarket|Amazon|GPTBot|PerplexityBot|ClaudeBot|Claude-Web|cohere-ai|anthropic-ai)/i ) { $is_denied_crawl_bot = 1; diff --git a/templates/web/pages/robots/robots.tt.txt b/templates/web/pages/robots/robots.tt.txt index 3337338da8e32..b69bc635a753f 100644 --- a/templates/web/pages/robots/robots.tt.txt +++ b/templates/web/pages/robots/robots.tt.txt @@ -90,4 +90,17 @@ Disallow: / User-agent: AhrefsBot Disallow: / + +User-agent: GPTBot +Disallow: / +User-agent: cohere-ai +Disallow: / +User-agent: anthropic-ai +Disallow: / +User-agent: ClaudeBot +Disallow: / +User-agent: Claude-Web +Disallow: / +User-agent: PerplexityBot +Disallow: / [% END %] \ No newline at end of file diff --git a/tests/integration/expected_test_results/api_v2_product_write/get-product-auth-good-password.json b/tests/integration/expected_test_results/api_v2_product_write/get-product-auth-good-password.json index 905b94375de9c..bbc069a2636fa 100644 --- a/tests/integration/expected_test_results/api_v2_product_write/get-product-auth-good-password.json +++ b/tests/integration/expected_test_results/api_v2_product_write/get-product-auth-good-password.json @@ -111,7 +111,7 @@ "origins_of_ingredients" : { "aggregated_origins" : [ { - "epi_score" : 0, + "epi_score" : "0", "origin" : "en:unknown", "percent" : 100, "transportation_score" : null diff --git a/tests/integration/expected_test_results/api_v3_product_read/get-existing-product-gs1-fnc1.json b/tests/integration/expected_test_results/api_v3_product_read/get-existing-product-gs1-fnc1.json index 80dd2095d858e..dda39bc5518a7 100644 --- a/tests/integration/expected_test_results/api_v3_product_read/get-existing-product-gs1-fnc1.json +++ b/tests/integration/expected_test_results/api_v3_product_read/get-existing-product-gs1-fnc1.json @@ -106,7 +106,7 @@ "origins_of_ingredients" : { "aggregated_origins" : [ { - "epi_score" : "0", + "epi_score" : 0, "origin" : "en:unknown", "percent" : 100, "transportation_score" : null diff --git a/tests/integration/expected_test_results/api_v3_product_read/get-existing-product.json b/tests/integration/expected_test_results/api_v3_product_read/get-existing-product.json index 06f6dda1ef743..1d726f24acc1c 100644 --- a/tests/integration/expected_test_results/api_v3_product_read/get-existing-product.json +++ b/tests/integration/expected_test_results/api_v3_product_read/get-existing-product.json @@ -106,7 +106,7 @@ "origins_of_ingredients" : { "aggregated_origins" : [ { - "epi_score" : "0", + "epi_score" : 0, "origin" : "en:unknown", "percent" : 100, "transportation_score" : null diff --git a/tests/integration/expected_test_results/page_crawler/get-robots-txt-ch-it.text b/tests/integration/expected_test_results/page_crawler/get-robots-txt-ch-it.text index 27353b85292de..747316e7e8391 100644 --- a/tests/integration/expected_test_results/page_crawler/get-robots-txt-ch-it.text +++ b/tests/integration/expected_test_results/page_crawler/get-robots-txt-ch-it.text @@ -208,3 +208,16 @@ Disallow: / User-agent: AhrefsBot Disallow: / + +User-agent: GPTBot +Disallow: / +User-agent: cohere-ai +Disallow: / +User-agent: anthropic-ai +Disallow: / +User-agent: ClaudeBot +Disallow: / +User-agent: Claude-Web +Disallow: / +User-agent: PerplexityBot +Disallow: / diff --git a/tests/integration/expected_test_results/page_crawler/get-robots-txt-fr-pro-platform.text b/tests/integration/expected_test_results/page_crawler/get-robots-txt-fr-pro-platform.text index 09fba6e0c3fcf..bc4c465474b58 100644 --- a/tests/integration/expected_test_results/page_crawler/get-robots-txt-fr-pro-platform.text +++ b/tests/integration/expected_test_results/page_crawler/get-robots-txt-fr-pro-platform.text @@ -297,3 +297,16 @@ Disallow: / User-agent: AhrefsBot Disallow: / + +User-agent: GPTBot +Disallow: / +User-agent: cohere-ai +Disallow: / +User-agent: anthropic-ai +Disallow: / +User-agent: ClaudeBot +Disallow: / +User-agent: Claude-Web +Disallow: / +User-agent: PerplexityBot +Disallow: / diff --git a/tests/integration/expected_test_results/page_crawler/get-robots-txt-fr.text b/tests/integration/expected_test_results/page_crawler/get-robots-txt-fr.text index 09fba6e0c3fcf..bc4c465474b58 100644 --- a/tests/integration/expected_test_results/page_crawler/get-robots-txt-fr.text +++ b/tests/integration/expected_test_results/page_crawler/get-robots-txt-fr.text @@ -297,3 +297,16 @@ Disallow: / User-agent: AhrefsBot Disallow: / + +User-agent: GPTBot +Disallow: / +User-agent: cohere-ai +Disallow: / +User-agent: anthropic-ai +Disallow: / +User-agent: ClaudeBot +Disallow: / +User-agent: Claude-Web +Disallow: / +User-agent: PerplexityBot +Disallow: / diff --git a/tests/integration/expected_test_results/page_crawler/get-robots-txt-world-pro-platform.text b/tests/integration/expected_test_results/page_crawler/get-robots-txt-world-pro-platform.text index 27353b85292de..747316e7e8391 100644 --- a/tests/integration/expected_test_results/page_crawler/get-robots-txt-world-pro-platform.text +++ b/tests/integration/expected_test_results/page_crawler/get-robots-txt-world-pro-platform.text @@ -208,3 +208,16 @@ Disallow: / User-agent: AhrefsBot Disallow: / + +User-agent: GPTBot +Disallow: / +User-agent: cohere-ai +Disallow: / +User-agent: anthropic-ai +Disallow: / +User-agent: ClaudeBot +Disallow: / +User-agent: Claude-Web +Disallow: / +User-agent: PerplexityBot +Disallow: / diff --git a/tests/integration/expected_test_results/page_crawler/get-robots-txt-world.text b/tests/integration/expected_test_results/page_crawler/get-robots-txt-world.text index 27353b85292de..747316e7e8391 100644 --- a/tests/integration/expected_test_results/page_crawler/get-robots-txt-world.text +++ b/tests/integration/expected_test_results/page_crawler/get-robots-txt-world.text @@ -208,3 +208,16 @@ Disallow: / User-agent: AhrefsBot Disallow: / + +User-agent: GPTBot +Disallow: / +User-agent: cohere-ai +Disallow: / +User-agent: anthropic-ai +Disallow: / +User-agent: ClaudeBot +Disallow: / +User-agent: Claude-Web +Disallow: / +User-agent: PerplexityBot +Disallow: /