-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor search code #5197
Refactor search code #5197
Changes from 30 commits
47e3c1a
a84bb15
6f36acd
0bfa998
936465d
0496cf6
ab28491
617f47f
d0f6082
af8a28f
5f197ec
465628e
e3e245c
e909759
49098d7
07dc26b
e9f3f3f
d6d02da
336e674
eb65817
144ed0e
d314f58
03ad482
5dfc17a
fcc9620
701ecdb
f636737
7583f9a
747da90
6c317f1
e2655de
f457ff7
7b2013c
0741a25
85956e0
6ac2f35
e062191
8e4cc2b
8c7bda4
5b9f460
e2e271b
a993f08
d52b968
fc277fa
417ea45
80c58c7
72d867f
5f118ff
e263e69
1a3e146
fab9f42
0a06726
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,7 @@ function attach_elastic_search_query(data) { | |
var total_count = data.count || 0; | ||
|
||
if (hit_list.length) { | ||
console.debug('Read the Docs search got a result. Showing results.') | ||
for (var i = 0; i < hit_list.length; i += 1) { | ||
var doc = hit_list[i]; | ||
var highlight = doc.highlight; | ||
|
@@ -55,12 +56,18 @@ function attach_elastic_search_query(data) { | |
|
||
// Show highlighted texts | ||
if (highlight.content) { | ||
var content_text = xss(highlight.content[0]); | ||
var contents = $('<div class="context">'); | ||
|
||
contents.html(content_text); | ||
contents.find('em').addClass('highlighted'); | ||
list_item.append(contents); | ||
for (index in highlight.content) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, JS quirk here. There is |
||
if (index < 3) { | ||
// Show up to 3 results for search | ||
var content = highlight.content[index]; | ||
var content_text = xss(content); | ||
var contents = $('<div class="context">'); | ||
|
||
contents.html("..." + content_text + "..."); | ||
contents.find('em').addClass('highlighted'); | ||
list_item.append(contents); | ||
} | ||
} | ||
} | ||
|
||
Search.output.append(list_item); | ||
|
@@ -71,10 +78,11 @@ function attach_elastic_search_query(data) { | |
if (!hit_list.length) { | ||
// Fallback to Sphinx's indexes | ||
Search.query_fallback(query); | ||
console.log('Read the Docs search failed. Falling back to Sphinx search.') | ||
} | ||
else { | ||
Search.status.text( | ||
_('Search finished, found %s page(s) matching the search query.').replace('%s', total_count) | ||
_('Search finished, found %s page(s) matching the search query.').replace('%s', hit_list.length) | ||
); | ||
} | ||
}) | ||
|
@@ -97,6 +105,7 @@ function attach_elastic_search_query(data) { | |
}, | ||
complete: function (resp, status_code) { | ||
if (status_code !== 'success' || resp.responseJSON.count === 0) { | ||
console.debug('Read the Docs search failed, skipping loading search content.') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another debug statement |
||
return search_def.reject(); | ||
} | ||
return search_def.resolve(resp.responseJSON); | ||
|
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,53 @@ | ||
import logging | ||
from pprint import pformat | ||
|
||
from rest_framework import generics | ||
from rest_framework import serializers | ||
from rest_framework.exceptions import ValidationError | ||
from rest_framework.pagination import PageNumberPagination | ||
|
||
from readthedocs.search.documents import PageDocument | ||
from readthedocs.search.filters import SearchFilterBackend | ||
from readthedocs.search.pagination import SearchPagination | ||
from readthedocs.search.serializers import PageSearchSerializer | ||
from readthedocs.search.utils import get_project_list_or_404 | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
||
class SearchPagination(PageNumberPagination): | ||
page_size = 25 | ||
page_size_query_param = 'page_size' | ||
max_page_size = 100 | ||
|
||
|
||
class PageSearchSerializer(serializers.Serializer): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we take the serializers to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found it a lot of overhead to keep moving around between files when they were only used in one place. I find it easier this way to have all the things we need for the API in one place. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I also think we'll need to change our implementation here to support faceting. If we want to support facets, the current approach won't work, since the serializer never sees the facet objects. |
||
project = serializers.CharField() | ||
version = serializers.CharField() | ||
title = serializers.CharField() | ||
path = serializers.CharField() | ||
link = serializers.SerializerMethodField() | ||
highlight = serializers.SerializerMethodField() | ||
|
||
def get_link(self, obj): | ||
projects_url = self.context.get('projects_url') | ||
if projects_url: | ||
docs_url = projects_url[obj.project] | ||
return docs_url + obj.path | ||
|
||
def get_highlight(self, obj): | ||
highlight = getattr(obj.meta, 'highlight', None) | ||
if highlight: | ||
if hasattr(highlight, 'content'): | ||
for num, result in enumerate(highlight.content): | ||
# Change results to turn newlines in highlight into periods | ||
# https://github.com/rtfd/readthedocs.org/issues/5168 | ||
new_text = result.replace('\n', '. ') | ||
highlight.content[num] = new_text | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This for could be rewriten like, I suppose:
I found it clearer. |
||
ret = highlight.to_dict() | ||
log.debug('API Search highlight: %s', pformat(ret)) | ||
return ret | ||
|
||
|
||
class PageSearchAPIView(generics.ListAPIView): | ||
pagination_class = SearchPagination | ||
filter_backends = [SearchFilterBackend] | ||
serializer_class = PageSearchSerializer | ||
|
||
def get_queryset(self): | ||
|
@@ -24,7 +61,15 @@ def get_queryset(self): | |
# Validate all the required params are there | ||
self.validate_query_params() | ||
query = self.request.query_params.get('q', '') | ||
queryset = PageDocument.simple_search(query=query) | ||
kwargs = {} | ||
kwargs['projects_list'] = [p.slug for p in self.get_all_projects()] | ||
kwargs['versions_list'] = self.request.query_params.get('version') | ||
user = '' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's a better pattern to default to the I think you can pass There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree here, unless there is some significance to ES handling the user as an empty string. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nope, was just the old way we were doing it. Fixed it now. |
||
if self.request.user.is_authenticated: | ||
user = self.request.user | ||
queryset = PageDocument.faceted_search( | ||
query=query, user=user, **kwargs | ||
) | ||
return queryset | ||
|
||
def validate_query_params(self): | ||
|
@@ -43,13 +88,15 @@ def get_serializer_context(self): | |
context['projects_url'] = self.get_all_projects_url() | ||
return context | ||
|
||
def get_all_projects_url(self): | ||
version_slug = self.request.query_params.get('version') | ||
def get_all_projects(self): | ||
project_slug = self.request.query_params.get('project') | ||
all_projects = get_project_list_or_404(project_slug=project_slug, user=self.request.user) | ||
projects_url = {} | ||
return all_projects | ||
|
||
def get_all_projects_url(self): | ||
all_projects = self.get_all_projects() | ||
version_slug = self.request.query_params.get('version') | ||
projects_url = {} | ||
for project in all_projects: | ||
projects_url[project.slug] = project.get_docs_url(version_slug=version_slug) | ||
|
||
return projects_url |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,8 @@ | ||
"""Project app config""" | ||
|
||
from django.apps import AppConfig | ||
|
||
|
||
class SearchConfig(AppConfig): | ||
name = 'readthedocs.search' | ||
|
||
def ready(self): | ||
from .signals import index_html_file, remove_html_file | ||
import readthedocs.search.signals # noqa |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should drop debug/log statements like this for prod. You can tell if the Sphinx indexes are used as the search result return will be empty -- or easier, you'll see a flood of requests for Sphinx's index files.
I do think this is helpfu though. I think an addition to our JS could be to log when DEBUG = True, but I took a really quick swing at this and hit issues. We need to pass through our footer most likely. I'd say lets remove these statements and find a method of exposing DEBUG to our JS.