Skip to content

Commit

Permalink
Throttle "Feed parsing" requests (uplift to 1.50.x) (#17874)
Browse files Browse the repository at this point in the history
Throttle "Feed parsing" requests

When a site has a lot off rss feed sites and the workload is quite heavy,
browser process starves and CPU spikes. In order to avoid that, throttle
parsing requests with task queue.
  • Loading branch information
sangwoo108 authored Apr 4, 2023
1 parent 545ce51 commit 2b562ca
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 1 deletion.
84 changes: 83 additions & 1 deletion components/brave_news/browser/direct_feed_controller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ using ParseFeedCallback = base::OnceCallback<void(absl::optional<FeedData>)>;
void ParseFeedDataOffMainThread(const GURL& feed_url,
std::string body_content,
ParseFeedCallback callback) {
// TODO(sko) Maybe we should have a thread traits so that app can be shutdown
// while the worker threads are still working.
base::ThreadPool::PostTaskAndReplyWithResult(
FROM_HERE,
base::BindOnce(
Expand All @@ -111,6 +113,20 @@ void ParseFeedDataOffMainThread(const GURL& feed_url,

} // namespace

DirectFeedController::FindFeedRequest::FindFeedRequest(
const GURL& possible_feed_or_site_url,
mojom::BraveNewsController::FindFeedsCallback callback)
: possible_feed_or_site_url(possible_feed_or_site_url),
callback(std::move(callback)) {}

DirectFeedController::FindFeedRequest::FindFeedRequest(
DirectFeedController::FindFeedRequest&&) = default;
DirectFeedController::FindFeedRequest&
DirectFeedController::FindFeedRequest::operator=(
DirectFeedController::FindFeedRequest&&) = default;

DirectFeedController::FindFeedRequest::~FindFeedRequest() = default;

DirectFeedController::DirectFeedController(
PrefService* prefs,
scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory)
Expand Down Expand Up @@ -172,6 +188,31 @@ std::vector<mojom::PublisherPtr> DirectFeedController::ParseDirectFeedsPref() {
void DirectFeedController::FindFeeds(
const GURL& possible_feed_or_site_url,
mojom::BraveNewsController::FindFeedsCallback callback) {
CHECK(possible_feed_or_site_url.is_valid() &&
!possible_feed_or_site_url.is_empty());

if (ongoing_requests_.count(possible_feed_or_site_url)) {
DVLOG(2) << "Accumulated: " << possible_feed_or_site_url.spec();
ongoing_requests_[possible_feed_or_site_url].push_back(
{possible_feed_or_site_url, std::move(callback)});
return;
}

if (ongoing_requests_.size() >= kMaxOngoingRequests) {
DVLOG(2) << "Queued: " << possible_feed_or_site_url.spec();
pending_requests_.push({possible_feed_or_site_url, std::move(callback)});
return;
}

DVLOG(2) << "Kick off: " << possible_feed_or_site_url.spec();
ongoing_requests_[possible_feed_or_site_url].push_back(
{possible_feed_or_site_url, std::move(callback)});
FindFeedsImpl(possible_feed_or_site_url);
}

void DirectFeedController::FindFeedsImpl(
const GURL& possible_feed_or_site_url) {
DVLOG(2) << __FUNCTION__ << " " << possible_feed_or_site_url.spec();
// Download and check headers. If it's an html document, then parse for rss
// items
auto request = std::make_unique<network::ResourceRequest>();
Expand Down Expand Up @@ -296,11 +337,52 @@ void DirectFeedController::FindFeeds(
base::Unretained(direct_feed_controller),
std::move(callback)));
},
base::Unretained(this), iter, std::move(callback),
base::Unretained(this), iter,
base::BindOnce(&DirectFeedController::OnFindFeedsImplResponse,
weak_ptr_factory_.GetWeakPtr(),
possible_feed_or_site_url),
possible_feed_or_site_url),
5 * 1024 * 1024);
}

void DirectFeedController::OnFindFeedsImplResponse(
const GURL& feed_url,
std::vector<mojom::FeedSearchResultItemPtr> results) {
if (ongoing_requests_.count(feed_url)) {
if (ongoing_requests_[feed_url].size() == 1u) {
std::move(ongoing_requests_[feed_url].front().callback)
.Run(std::move(results));
} else {
// We need to do deep-copy
for (auto& request : ongoing_requests_[feed_url]) {
std::vector<mojom::FeedSearchResultItemPtr> clone;
base::ranges::transform(results, std::back_inserter(clone),
&mojom::FeedSearchResultItemPtr::Clone);
std::move(request.callback).Run(std::move(clone));
}
}

ongoing_requests_.erase(feed_url);
}

DVLOG(2) << ongoing_requests_.size();

if (ongoing_requests_.size() >= kMaxOngoingRequests ||
pending_requests_.empty()) {
return;
}

auto request = std::move(pending_requests_.front());
pending_requests_.pop();

auto target_url = request.possible_feed_or_site_url;
auto& requests = ongoing_requests_[target_url];
requests.push_back(std::move(request));
if (requests.size() == 1) {
FindFeedsImpl(target_url);
}
}

void DirectFeedController::VerifyFeedUrl(const GURL& feed_url,
IsValidCallback callback) {
// Download the feed and once it's done, see if there's any content.
Expand Down
30 changes: 30 additions & 0 deletions components/brave_news/browser/direct_feed_controller.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include <list>
#include <memory>
#include <queue>
#include <string>
#include <vector>

Expand Down Expand Up @@ -77,6 +78,20 @@ class DirectFeedController {
mojom::BraveNewsController::FindFeedsCallback callback);

private:
struct FindFeedRequest {
FindFeedRequest(const GURL& possible_feed_or_site_url,
mojom::BraveNewsController::FindFeedsCallback callback);
FindFeedRequest(FindFeedRequest&&);
FindFeedRequest& operator=(FindFeedRequest&&);
~FindFeedRequest();

GURL possible_feed_or_site_url;
mojom::BraveNewsController::FindFeedsCallback callback;
};

// TODO(sko) We might want to adjust this value.
static constexpr size_t kMaxOngoingRequests = 2;

using SimpleURLLoaderList =
std::list<std::unique_ptr<network::SimpleURLLoader>>;

Expand All @@ -97,9 +112,24 @@ class DirectFeedController {
const GURL& feed_url,
const std::unique_ptr<std::string> response_body);

void FindFeedsImpl(const GURL& possible_feed_or_site_url);
void OnFindFeedsImplResponse(
const GURL& feed_url,
std::vector<mojom::FeedSearchResultItemPtr> results);

raw_ptr<PrefService> prefs_;
SimpleURLLoaderList url_loaders_;

// TODO(sko) We should have a way to cancel requests.
// e.g. Navigate to different sites, quit app.
// Witthout that, some heavy RSS feed parsing work will prevent new feeds from
// detection and app from shutdown.
std::queue<FindFeedRequest> pending_requests_;
base::flat_map<GURL, std::vector<FindFeedRequest>> ongoing_requests_;

scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory_;

base::WeakPtrFactory<DirectFeedController> weak_ptr_factory_{this};
};

} // namespace brave_news
Expand Down

0 comments on commit 2b562ca

Please sign in to comment.