Skip to content

Commit

Permalink
Extract _parse_file_inplace() from parse().
Browse files Browse the repository at this point in the history
  • Loading branch information
lemon24 committed Jan 30, 2022
1 parent 87ceb00 commit b40f456
Showing 1 changed file with 37 additions and 8 deletions.
45 changes: 37 additions & 8 deletions feedparser/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import datetime
import io
import time
from typing import Dict, List, Union
from typing import Dict, List, Union, IO
import urllib.error
import urllib.parse
import xml.sax
Expand Down Expand Up @@ -206,12 +206,6 @@ def parse(
if not agent:
import feedparser
agent = feedparser.USER_AGENT
if sanitize_html is None:
import feedparser
sanitize_html = bool(feedparser.SANITIZE_HTML)
if resolve_relative_uris is None:
import feedparser
resolve_relative_uris = bool(feedparser.RESOLVE_RELATIVE_URIS)

result = FeedParserDict(
bozo=False,
Expand All @@ -235,6 +229,42 @@ def parse(
# overwrite existing headers using response_headers
result['headers'].update(response_headers or {})

# TODO (lemon24): remove this once _open_resource() returns an open file
file = io.BytesIO(data)

try:
_parse_file_inplace(
file,
result,
resolve_relative_uris=resolve_relative_uris,
sanitize_html=sanitize_html,
)
finally:
if not hasattr(url_file_stream_or_string, 'read'):
# the file does not come from the user, close it
file.close()

return result


def _parse_file_inplace(
file: IO[bytes],
result: dict,
*,
resolve_relative_uris: bool = None,
sanitize_html: bool = None,
) -> None:

# TODO (lemon24): remove this once we start using convert_file_to_utf8()
data = file.read()

# Avoid a cyclic import.
import feedparser
if sanitize_html is None:
sanitize_html = bool(feedparser.SANITIZE_HTML)
if resolve_relative_uris is None:
resolve_relative_uris = bool(feedparser.RESOLVE_RELATIVE_URIS)

data = convert_to_utf8(result['headers'], data, result)
use_json_parser = result['content-type'] == 'application/json'
use_strict_parser = result['encoding'] and True or False
Expand Down Expand Up @@ -300,4 +330,3 @@ def parse(
result['namespaces'] = {}
else:
result['namespaces'] = feed_parser.namespaces_in_use
return result

0 comments on commit b40f456

Please sign in to comment.