Changes of Revision 7
[-] | Changed | urlwatch.changes |
1
2 ------------------------------------------------------------------- 3 +Thu Aug 30 16:35:52 UTC 2012 - cs@linux-administrator.com 4 + 5 +- update to release 1.15 6 + 7 +------------------------------------------------------------------- 8 Sat Apr 7 16:57:04 UTC 2012 - cs@linux-administrator.com 9 10 - initial package 11 |
||
[-] | Changed | urlwatch.spec ^ |
8 1
2 Name: urlwatch 3 -Version: 1.14 4 +Version: 1.15 5 Release: 1 6 Url: http://thp.io/2008/urlwatch/ 7 License: BSD 8 |
||
[+] | Changed | urlwatch-1.15.tar.bz2/ChangeLog ^ |
@@ -115,3 +115,8 @@ Thomas Dziedzic for reporting this issue and testing the patch) * urlwatch 1.14 released +2012-08-30 Thomas Perl <thp.io/about> + * Merge changes from Slavko <slavino@slavino.sk> related to UTF-8 + and html2txt, this has been tested on Debian-based systems + * urlwatch 1.15 released + | ||
[+] | Changed | urlwatch-1.15.tar.bz2/PKG-INFO ^ |
@@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: urlwatch -Version: 1.14 +Version: 1.15 Summary: Watch web pages and arbitrary URLs for changes Home-page: http://thp.io/2008/urlwatch/ Author: Thomas Perl | ||
[+] | Changed | urlwatch-1.15.tar.bz2/lib/urlwatch/html2txt.py ^ |
@@ -31,7 +31,7 @@ import re -def html2text(data, method='lynx'): +def html2text(data, method='lynx', utf8=False): """ Convert a string consisting of HTML to plain text for easy difference checking. @@ -40,7 +40,12 @@ 'lynx' (default) - Use "lynx -dump" for conversion 'html2text' - Use "html2text -nobs" for conversion 're' - A simple regex-based HTML tag stripper - + + If utf8 is True, the data will be handled as utf-8 by Lynx and + html2text (if possible). It seems like only the Debian-provided + version of html2text has support for the "-utf8" command line + flag, so this might not work on non-Debian systems. + Dependencies: apt-get install lynx html2text """ if isinstance(data, unicode): @@ -53,8 +58,14 @@ if method == 'lynx': cmd = ['lynx', '-dump', '-stdin'] + + if utf8: + cmd.append('-assume_charset=UTF-8') elif method == 'html2text': cmd = ['html2text', '-nobs'] + + if utf8: + cmd.append('-utf8') else: return data | ||
[+] | Changed | urlwatch-1.15.tar.bz2/urlwatch ^ |
@@ -37,7 +37,7 @@ __copyright__ = 'Copyright 2008-2011 Thomas Perl' __license__ = 'BSD' __homepage__ = 'http://thp.io/2008/urlwatch/' -__version__ = '1.14' +__version__ = '1.15' user_agent = '%s/%s (+http://thp.io/2008/urlwatch/info.html)' % (pkgname, __version__) | ||
[+] | Changed | urlwatch-1.15.tar.bz2/urlwatch.1 ^ |
@@ -1,4 +1,4 @@ -.TH URLWATCH "1" "November 2011" "urlwatch 1.14" "User Commands" +.TH URLWATCH "1" "August 2012" "urlwatch 1.15" "User Commands" .SH NAME urlwatch \- Watch web pages and arbitrary URLs for changes .SH SYNOPSIS |