Skip to content

Commit

Permalink
missing file
Browse files Browse the repository at this point in the history
  • Loading branch information
micjahn committed Jun 14, 2018
1 parent 5993ee7 commit c0be1a8
Showing 1 changed file with 247 additions and 0 deletions.
247 changes: 247 additions & 0 deletions lib/webaccess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
# -*- coding: UTF-8 -*-

__revision__ = '$Id$'

# Copyright (c) 2010
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA

# You may use and distribute this software under the terms of the
# GNU General Public License, version 2 or later

import logging
import string
import threading
import time
import urllib2
import cookielib
import gtk
import gutils

log = logging.getLogger("Griffith")


class WebAccess(object):

debug_mode = False
progress = None
parent_window = None

def prepare(self, parent_window=None, title=None, message=None, debug=False):
self.parent_window = parent_window
self.debug_mode = debug
if not self.progress:
self.progress = Progress(parent_window)
self.progress.set_data(_("Fetching data"), _("Wait a moment"), False)

def fetch(self, url=None, encoding=None, title=None, message=None, autodetectencoding=False, postdata=None):
if title or message:
self.progress.set_data(title, message, True)
else:
self.progress.show()
retriever = Retriever(url, self.parent_window, self.progress, postdata = postdata)
retriever.start()
while retriever.isAlive():
self.progress.pulse()
#if self.progress.status:
# waits infinite if retriever thread hangs in urllib
# retriever.join()
while gtk.events_pending():
gtk.main_iteration()
data = ''
if retriever.data:
data = retriever.data
# if autodetecting of the encoding is wanted or we are at debug mode
if self.debug_mode or autodetectencoding:
import chardet
chardetresult = chardet.detect(data)
if chardetresult['encoding']:
detectedencoding = string.lower(chardetresult['encoding'])
if self.debug_mode and encoding != detectedencoding:
log.warn('Detected encoding differs from selected encoding: %s != %s' % (detectedencoding, encoding))
log.warn(chardetresult)
if autodetectencoding:
encoding = detectedencoding
elif autodetectencoding:
log.warn('Can\'t auto-detect encoding, fallback to utf8')
encoding = 'utf8'
try:
# try to decode it strictly
if encoding:
data = data.decode(encoding)
except UnicodeDecodeError, e:
# something is wrong, perhaps a wrong character set
# or some pages are not as strict as they should be
# (like OFDb, mixes utf8 with iso8859-1)
# I want to log the error here so that I can find it
# but the program should not terminate
log.warn('Decoding error: %s' % str(e))
data = data.decode(encoding, 'ignore')
return data

def unprepare(self):
if self.progress:
self.progress.hide()


# global cookie jar is important because the cookies are only stored in memory
_cookiejar = None

class Retriever(threading.Thread):

chunksize = 4096

def __init__(self, URL, parent_window, progress, postdata = None, referer = None):
global _cookiejar
self.URL = URL
self.data = None
self.parent_window = parent_window
self.progress = progress
self.postdata = postdata
self.referer = referer
self._stopevent = threading.Event()
self._sleepperiod = 1.0
if not _cookiejar:
_cookiejar = cookielib.LWPCookieJar(policy = cookielib.DefaultCookiePolicy())
self.cj = _cookiejar
threading.Thread.__init__(self, name = "Retriever")

def run(self):
try:
self.progress.status = False
self.data = self.urlretrieve(self.URL, self.postdata, self.referer)
if self.progress.status:
self.data = None
except IOError:
self.progress.dialog.hide()
gutils.urllib_error(_("Connection error"), self.parent_window)
self.join()

def reportprogress(self, count, blockSize, totalSize):
if totalSize > 0:
try:
downloaded_percentage = min((count * blockSize * 100) / totalSize, 100)
except:
downloaded_percentage = 100
if count != 0:
downloaded_kbyte = int(count * blockSize / 1024.0)
filesize_kbyte = int(totalSize / 1024.0)
return not self.progress.status

def urlretrieve(self, url, data = None, referer = None):
headers = self.get_headers()
if referer:
headers['Referer'] = referer
opener = self.create_opener()
# make the request
the_page = ''
try:
log.debug('Fetching URL: %s' % url)
if data:
log.debug('POST data: %s' % data)
req = urllib2.Request(url, data, headers)
response = opener.open(req)
except:
log.exception('')
log.error('URL: %s' % url)
else:
log.debug('Fetched URL: %s' % response.geturl())
#log.debug('URL info: %s' % response.info())
headers = response.info()
if "content-length" in headers:
size = int(headers["Content-Length"])
else:
size = -1
read = 0
blocknum = 0
while not self.progress.status:
block = response.read(self.chunksize)
if block == '':
break
the_page = the_page + block
read += len(block)
blocknum += 1
if not self.reportprogress(blocknum, self.chunksize, size):
# breakable data fetching
break
if not self.progress.status:
the_page = self.try_decompress(the_page)
return the_page

def get_headers(self):
# use Firefox 3.5 User-Agent string from Windows XP
# additional HTTP headers to work around the html file compression which
# is used by UMTS connections. compression breaks the movie import plugins sometimes
return {
'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7',
'Cache-Control': 'no-cache',
'Pragma': 'no-cache',
'Accept-Encoding': 'gzip'
}

def create_opener(self):
return urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))

def try_decompress(self, data):
# check for gzip compressed pages
if data[0:2] == '\037\213':
return gutils.decompress(data)
return data


class Progress:

def __init__(self, window, title='', message=''):
self.status = False
self.dialog = gtk.Dialog(title, window, gtk.DIALOG_MODAL, ())
self.dialog.set_urgency_hint(False)
self.dialog.set_position(gtk.WIN_POS_CENTER)
self.dialog.stick()
self.label = gtk.Label()
self.label.set_markup(message)
self.dialog.vbox.pack_start(self.label)
self.progress = gtk.ProgressBar()
self.progress.set_pulse_step(0.01)
self.dialog.vbox.pack_start(self.progress, False, False)
self.button = gtk.Button(_("Cancel"), gtk.STOCK_CANCEL)
self.button.connect("clicked", self.callback)
self.dialog.vbox.pack_start(self.button, False, False)

def callback(self, widget):
#self.hide()
self.label.set_markup(_('Cancelling...'))
self.status = True

def pulse(self):
self.progress.pulse()
time.sleep(0.01)

def close(self):
self.dialog.destroy()

def hide(self):
if self.dialog.get_property('visible'):
self.dialog.hide()

def show(self):
if not self.dialog.get_property('visible'):
self.dialog.show_all()

def set_data(self, title, message, showit):
self.dialog.set_title(title)
self.label.set_markup(message)
if showit is True:
self.show()
# vim: fdm=marker

0 comments on commit c0be1a8

Please sign in to comment.