Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions extruct/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
"""
JSON-LD extractor
"""

import json
import re
import jstyleson


import lxml.etree

from extruct.utils import parse_html

HTML_OR_JS_COMMENTLINE = re.compile(r'^\s*(//.*|<!--.*-->)')


class JsonLdExtractor(object):
Expand All @@ -34,8 +34,7 @@ def _extract_items(self, node):
data = json.loads(script, strict=False)
except ValueError:
# sometimes JSON-decoding errors are due to leading HTML or JavaScript comments
data = json.loads(
HTML_OR_JS_COMMENTLINE.sub('', script), strict=False)
data = jstyleson.loads(script, strict=False)
if isinstance(data, list):
return data
elif isinstance(data, dict):
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ mf2py>=1.1.0
six>=1.11
w3lib
html-text
jstyleson
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ def get_version():
'mf2py',
'w3lib',
'html-text>=0.5.1',
'six'],
'six',
'jstyleson'
],
extras_require={
'cli': [
'requests',
Expand Down