[-]
[+]
|
Changed |
urlwatch.changes
|
|
[-]
[+]
|
Changed |
urlwatch.spec
^
|
|
[-]
[+]
|
Changed |
urlwatch-1.15.tar.bz2/ChangeLog
^
|
@@ -115,3 +115,8 @@
Thomas Dziedzic for reporting this issue and testing the patch)
* urlwatch 1.14 released
+2012-08-30 Thomas Perl <thp.io/about>
+ * Merge changes from Slavko <slavino@slavino.sk> related to UTF-8
+ and html2txt, this has been tested on Debian-based systems
+ * urlwatch 1.15 released
+
|
[-]
[+]
|
Changed |
urlwatch-1.15.tar.bz2/PKG-INFO
^
|
@@ -1,6 +1,6 @@
Metadata-Version: 1.0
Name: urlwatch
-Version: 1.14
+Version: 1.15
Summary: Watch web pages and arbitrary URLs for changes
Home-page: http://thp.io/2008/urlwatch/
Author: Thomas Perl
|
[-]
[+]
|
Changed |
urlwatch-1.15.tar.bz2/lib/urlwatch/html2txt.py
^
|
@@ -31,7 +31,7 @@
import re
-def html2text(data, method='lynx'):
+def html2text(data, method='lynx', utf8=False):
"""
Convert a string consisting of HTML to plain text
for easy difference checking.
@@ -40,7 +40,12 @@
'lynx' (default) - Use "lynx -dump" for conversion
'html2text' - Use "html2text -nobs" for conversion
're' - A simple regex-based HTML tag stripper
-
+
+ If utf8 is True, the data will be handled as utf-8 by Lynx and
+ html2text (if possible). It seems like only the Debian-provided
+ version of html2text has support for the "-utf8" command line
+ flag, so this might not work on non-Debian systems.
+
Dependencies: apt-get install lynx html2text
"""
if isinstance(data, unicode):
@@ -53,8 +58,14 @@
if method == 'lynx':
cmd = ['lynx', '-dump', '-stdin']
+
+ if utf8:
+ cmd.append('-assume_charset=UTF-8')
elif method == 'html2text':
cmd = ['html2text', '-nobs']
+
+ if utf8:
+ cmd.append('-utf8')
else:
return data
|
[-]
[+]
|
Changed |
urlwatch-1.15.tar.bz2/urlwatch
^
|
@@ -37,7 +37,7 @@
__copyright__ = 'Copyright 2008-2011 Thomas Perl'
__license__ = 'BSD'
__homepage__ = 'http://thp.io/2008/urlwatch/'
-__version__ = '1.14'
+__version__ = '1.15'
user_agent = '%s/%s (+http://thp.io/2008/urlwatch/info.html)' % (pkgname, __version__)
|
[-]
[+]
|
Changed |
urlwatch-1.15.tar.bz2/urlwatch.1
^
|
@@ -1,4 +1,4 @@
-.TH URLWATCH "1" "November 2011" "urlwatch 1.14" "User Commands"
+.TH URLWATCH "1" "August 2012" "urlwatch 1.15" "User Commands"
.SH NAME
urlwatch \- Watch web pages and arbitrary URLs for changes
.SH SYNOPSIS
|