-
Notifications
You must be signed in to change notification settings - Fork 0
/
views.py
59 lines (51 loc) · 2.17 KB
/
views.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import json
import urllib.request
from urllib.parse import urlparse
import bs4
from django.http import Http404
from django.shortcuts import render_to_response, HttpResponse
from django.core.validators import URLValidator
from django.core.exceptions import ValidationError
urlval = URLValidator()
def retrieve(request):
if request.method == 'GET':
response = dict(error='false')
siteurl = request.GET.get('url', None)
try:
urlval(siteurl)
except ValidationError:
response.update({'msg': 'Invalid URL', 'error': 'true'})
else:
# Use BeautifulSoup to parse the site's HTML looking for images
# and stores the result in 'allimages'.
soup = bs4.BeautifulSoup(urllib.request.urlopen(siteurl))
response['title'] = soup.find("title").string
allimages = soup.findAll("img")
if len(allimages) == 0:
# No images found
response.update({'msg': 'No images found', 'error': 'true'})
elif len(allimages) == 1:
# Returns the only image
response['imgsrc'] = urllib.parse.urljoin(siteurl, allimages[0]['src'])
else:
for image in allimages:
try:
imgsrc = image['src']
if not imgsrc:
# Source attribute might be empty
continue
else:
if not imgsrc.endswith('.gif'):
# Returns the first image in the document that is not a gif.
response['imgsrc'] = urllib.parse.urljoin(siteurl, imgsrc)
break
except KeyError:
continue
# Fallbacks to the first image
response.setdefault('imgsrc', urllib.parse.urljoin(siteurl, allimages[0]['src']))
return HttpResponse(json.dumps(response), content_type='application/json')
else:
# Handling only GET requests
raise Http404
def home(request):
return render_to_response('home.html')