Nothing Special   »   [go: up one dir, main page]

Skip to content

Commit

Permalink
mf2: support alt text in <img> tags
Browse files Browse the repository at this point in the history
  • Loading branch information
snarfed committed Jul 19, 2018
1 parent 96ae47c commit 05a7818
Show file tree
Hide file tree
Showing 9 changed files with 86 additions and 20 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,9 @@ Changelog
* Improve GraphQL support for comments and users.
* Atom:
* Shorten and ellipsize feed title when necessary ([#144](https://github.com/snarfed/granary/issues/144)).
* microformats2:
* Upgrade mf2py to improve a few things like [implied p-name detection](http://microformats.org/wiki/microformats2-implied-properties) and whitespace handling ([#142](https://github.com/snarfed/granary/issues/142), fixes [#145](https://github.com/snarfed/granary/issues/145), [snarfed/bridgy#756](https://github.com/snarfed/bridgy/issues/756), [snarfed/bridgy#828](https://github.com/snarfed/bridgy/issues/828)).
* Support `alt` attribute in `<img>` tags ([snarfed/bridgy#756](https://github.com/snarfed/bridgy/issues/756)).
### 1.12 - 2018-03-24
* Add Python 3 support! Granary now requires either Python 2.7+ or Python 3.3+.
Expand Down
4 changes: 2 additions & 2 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def get(self):

mf2 = None
if input == 'html':
mf2 = mf2py.parse(doc=body, url=url)
mf2 = mf2py.parse(doc=body, url=url, img_with_alt=True)
elif input in ('mf2-json', 'json-mf2'):
mf2 = body_json
mf2.setdefault('rels', {}) # mf2util expects rels
Expand All @@ -154,7 +154,7 @@ def fetch_mf2_func(url):
if util.domain_or_parent_in(urlparse.urlparse(url).netloc, SILO_DOMAINS):
return {'items': [{'type': ['h-card'], 'properties': {'url': [url]}}]}
_, doc = self._fetch(url)
return mf2py.parse(doc=doc, url=url)
return mf2py.parse(doc=doc, url=url, img_with_alt=True)

try:
actor = microformats2.find_author(mf2, fetch_mf2_func=fetch_mf2_func)
Expand Down
4 changes: 2 additions & 2 deletions granary/atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,9 @@ def html_to_atom(html, url=None, fetch_author=False, reader=True):
if fetch_author:
assert url, 'fetch_author=True requires url!'

parsed = mf2py.parse(doc=html, url=url)
parsed = mf2py.parse(doc=html, url=url, img_with_alt=True)
actor = microformats2.find_author(
parsed, fetch_mf2_func=lambda url: mf2py.parse(url=url))
parsed, fetch_mf2_func=lambda url: mf2py.parse(url=url, img_with_alt=True))

return activities_to_atom(
microformats2.html_to_activities(html, url, actor),
Expand Down
2 changes: 1 addition & 1 deletion granary/flickr.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ def user_to_actor(self, resp):
if profile_url:
try:
resp = util.urlopen(profile_url)
profile_json = mf2py.parse(doc=resp, url=profile_url)
profile_json = mf2py.parse(doc=resp, url=profile_url, img_with_alt=True)
# personal site is likely the first non-flickr url
urls = profile_json.get('rels', {}).get('me', [])
obj['urls'] = [{'value': u} for u in urls]
Expand Down
55 changes: 42 additions & 13 deletions granary/microformats2.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
dedupe_urls,
get_first,
get_list,
get_url,
get_urls,
uniquify,
)
Expand Down Expand Up @@ -224,8 +225,7 @@ def object_to_json(obj, trim_nulls=True, entry_class='h-entry',
'summary': [summary],
'url': (list(object_urls(obj) or object_urls(primary)) +
obj.get('upstreamDuplicates', [])),
'photo': dedupe_urls(get_urls(attachments, 'image', 'image') +
get_urls(primary, 'image')),
# photo is special cased below, to handle alt
'video': dedupe_urls(get_urls(attachments, 'video', 'stream') +
get_urls(primary, 'stream')),
'audio': get_urls(attachments, 'audio', 'stream'),
Expand All @@ -252,6 +252,16 @@ def object_to_json(obj, trim_nulls=True, entry_class='h-entry',
for a in attachments['note'] + attachments['article']]
}

# photos, including alt text
photo_urls = set()
ret['properties']['photo'] = []
for image in get_list(attachments, 'image') + [primary]:
for url in get_urls(image, 'image'):
if url and url not in photo_urls:
photo_urls.add(url)
name = get_first(image, 'image', {}).get('displayName')
ret['properties']['photo'].append({'value': url, 'alt': name} if name else url)

# hashtags and person tags
if obj_type == 'tag':
ret['properties']['tag-of'] = util.get_urls(obj, 'target')
Expand Down Expand Up @@ -345,7 +355,7 @@ def json_to_object(mf2, actor=None, fetch_mf2=False):
# the author h-card may be on another page. run full authorship algorithm:
# https://indieweb.org/authorship
def fetch(url):
return mf2py.parse(util.requests_get(url).text, url=url)
return mf2py.parse(util.requests_get(url).text, url=url, img_with_alt=True)
author = mf2util.find_author(
{'items': [mf2]}, hentry=mf2, fetch_mf2_func=fetch if fetch_mf2 else None)
if author:
Expand Down Expand Up @@ -385,10 +395,9 @@ def fetch(url):
if re.match(r'^https?://github.com/[^/]+/[^/]+(/issues)?/?$', url):
as_type = 'issue'

def absolute_urls(prop):
return [url for url in get_string_urls(props.get(prop, []))
# filter out relative and invalid URLs (mf2py gives absolute urls)
if urllib.parse.urlparse(url).netloc]
def is_absolute(url):
"""Filter out relative and invalid URLs (mf2py gives absolute urls)."""
return urllib.parse.urlparse(url).netloc

urls = props.get('url') and get_string_urls(props.get('url'))

Expand Down Expand Up @@ -416,9 +425,8 @@ def absolute_urls(prop):
'content': get_html(prop.get('content')),
'url': urls[0] if urls else None,
'urls': [{'value': u} for u in urls] if urls and len(urls) > 1 else None,
'image': [{'url': url} for url in
dedupe_urls(absolute_urls('photo') + absolute_urls('featured'))],
'stream': [{'url': url} for url in absolute_urls('video')],
# image is special cased below, to handle alt
'stream': [{'url': url} for url in get_string_urls(props.get('video', []))],
'location': json_to_object(prop.get('location')),
'replies': {'items': [json_to_object(c) for c in props.get('comment', [])]},
'tags': [{'objectType': 'hashtag', 'displayName': cat}
Expand All @@ -428,6 +436,20 @@ def absolute_urls(prop):
'attachments': attachments,
}

# images, including alt text
photo_urls = set()
obj['image'] = []
for photo in props.get('photo', []) + props.get('featured', []):
url = photo
alt = None
if isinstance(photo, dict):
photo = photo.get('properties') or photo
url = get_first(photo, 'value') or get_first(photo, 'url')
alt = get_first(photo, 'alt')
if url and url not in photo_urls and is_absolute(url):
photo_urls.add(url)
obj['image'].append({'url': url, 'displayName': alt})

# mf2util uses the indieweb/mf2 location algorithm to collect location properties.
interpreted = mf2util.interpret({'items': [mf2]}, None)
if interpreted:
Expand Down Expand Up @@ -485,7 +507,7 @@ def html_to_activities(html, url=None, actor=None):
Returns:
list of ActivityStreams activity dicts
"""
parsed = mf2py.parse(doc=html, url=url)
parsed = mf2py.parse(doc=html, url=url, img_with_alt=True)
hfeed = mf2util.find_first_entry(parsed, ['h-feed'])
items = hfeed.get('children', []) if hfeed else parsed.get('items', [])

Expand Down Expand Up @@ -925,10 +947,13 @@ def find_author(parsed, **kwargs):
"""
author = mf2util.find_author(parsed, 'http://123', **kwargs)
if author:
photo = author.get('photo')
if isinstance(photo, dict):
photo = photo.get('url') or photo.get('value')
return {
'displayName': author.get('name'),
'url': author.get('url'),
'image': {'url': author.get('photo')},
'image': {'url': photo},
}


Expand Down Expand Up @@ -1028,12 +1053,16 @@ def img(src, alt=''):
"""Returns an <img> string with the given src, class, and alt.
Args:
src: string, url of the image
src: string url or dict with value and (optionally) alt
alt: string, alt attribute value, or None
Returns:
string
"""
if isinstance(src, dict):
assert not alt
alt = src.get('alt', '')
src = src.get('value')
return '<img class="u-photo" src="%s" alt=%s />' % (
src, xml.sax.saxutils.quoteattr(alt or ''))

Expand Down
9 changes: 9 additions & 0 deletions granary/test/testdata/article_with_photo_with_alt.as.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"objectType": "article",
"displayName": "article abc",
"content": "foo bar",
"image": [{
"url": "http://pic/ture.jpg",
"displayName": "my alt text"
}]
}
12 changes: 12 additions & 0 deletions granary/test/testdata/article_with_photo_with_alt.mf2.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<article class="h-entry">
<span class="p-uid"></span>

<span class="p-name">article abc</span>
<div class="e-content">

foo bar
</div>

<img class="u-photo" src="http://pic/ture.jpg" alt="my alt text" />

</article>
14 changes: 14 additions & 0 deletions granary/test/testdata/article_with_photo_with_alt.mf2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"type": ["h-entry"],
"properties": {
"name": ["article abc"],
"content": [{
"value": "foo bar",
"html": "foo bar"
}],
"photo": [{
"value": "http://pic/ture.jpg",
"alt": "my alt text"
}]
}
}
3 changes: 1 addition & 2 deletions granary/test/testdata/note_with_composite_photo.mf2.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
"properties": {
"name": ["The Photo Caption"],
"url": ["https://ben.thatmustbe.me/static/img.jpg"]
},
"value": "https://ben.thatmustbe.me/static/img.jpgThe Photo Caption"
}
}],
"name": [""]
}
Expand Down

0 comments on commit 05a7818

Please sign in to comment.