update Shared

This commit is contained in:
j 2015-11-04 13:01:55 +01:00
parent e7ebbedd38
commit 6881f3471a
184 changed files with 13080 additions and 13691 deletions

1
.gitignore vendored
View file

@ -4,3 +4,4 @@
*.pyd *.pyd
__pycache__ __pycache__
pip_cache pip_cache
Linux_x86_64/bin

View file

@ -1,11 +1,10 @@
#!/usr/bin/python3 #!/usr/bin/python3
# EASY-INSTALL-ENTRY-SCRIPT: 'chardet==2.3.0','console_scripts','chardetect'
# -*- coding: utf-8 -*- __requires__ = 'chardet==2.3.0'
import re
import sys import sys
from pkg_resources import load_entry_point
from chardet.chardetect import main
if __name__ == '__main__': if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) sys.exit(
sys.exit(main()) load_entry_point('chardet==2.3.0', 'console_scripts', 'chardetect')()
)

View file

@ -1,23 +1,23 @@
../PyPDF2/xmp.py
../PyPDF2/utils.py
../PyPDF2/filters.py ../PyPDF2/filters.py
../PyPDF2/__init__.py
../PyPDF2/_version.py
../PyPDF2/generic.py ../PyPDF2/generic.py
../PyPDF2/merger.py
../PyPDF2/pagerange.py ../PyPDF2/pagerange.py
../PyPDF2/pdf.py ../PyPDF2/pdf.py
../PyPDF2/merger.py ../PyPDF2/utils.py
../PyPDF2/__pycache__/xmp.cpython-34.pyc ../PyPDF2/xmp.py
../PyPDF2/__pycache__/utils.cpython-34.pyc ../PyPDF2/_version.py
../PyPDF2/__init__.py
../PyPDF2/__pycache__/filters.cpython-34.pyc ../PyPDF2/__pycache__/filters.cpython-34.pyc
../PyPDF2/__pycache__/__init__.cpython-34.pyc
../PyPDF2/__pycache__/_version.cpython-34.pyc
../PyPDF2/__pycache__/generic.cpython-34.pyc ../PyPDF2/__pycache__/generic.cpython-34.pyc
../PyPDF2/__pycache__/merger.cpython-34.pyc
../PyPDF2/__pycache__/pagerange.cpython-34.pyc ../PyPDF2/__pycache__/pagerange.cpython-34.pyc
../PyPDF2/__pycache__/pdf.cpython-34.pyc ../PyPDF2/__pycache__/pdf.cpython-34.pyc
../PyPDF2/__pycache__/merger.cpython-34.pyc ../PyPDF2/__pycache__/utils.cpython-34.pyc
../PyPDF2/__pycache__/xmp.cpython-34.pyc
../PyPDF2/__pycache__/_version.cpython-34.pyc
../PyPDF2/__pycache__/__init__.cpython-34.pyc
./ ./
top_level.txt
dependency_links.txt dependency_links.txt
PKG-INFO PKG-INFO
SOURCES.txt SOURCES.txt
top_level.txt

View file

@ -1,42 +0,0 @@
Metadata-Version: 1.1
Name: certifi
Version: 14.05.14
Summary: Python package for providing Mozilla's CA Bundle.
Home-page: http://python-requests.org
Author: Kenneth Reitz
Author-email: me@kennethreitz.com
License: ISC
Description: Certifi: Python SSL Certificates
================================
This installable Python package contains a CA Bundle that you can reference
in your Python code. This is useful for verifying HTTP requests, for example.
This is the same CA Bundle which ships with the Requests codebase, and is
derived from Mozilla Firefox's canonical set.
Usage
-----
To reference the installed CA Bundle, you can use the built-in function::
>>> import certifi
>>> certifi.where()
'/usr/local/lib/python2.7/site-packages/certifi/cacert.pem'
Enjoy!
Platform: UNKNOWN
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Natural Language :: English
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 2.5
Classifier: Programming Language :: Python :: 2.6
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3.0
Classifier: Programming Language :: Python :: 3.1
Classifier: Programming Language :: Python :: 3.2
Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4

View file

@ -1,13 +0,0 @@
LICENSE
MANIFEST.in
README.rst
setup.cfg
setup.py
certifi/__init__.py
certifi/__main__.py
certifi/cacert.pem
certifi/core.py
certifi.egg-info/PKG-INFO
certifi.egg-info/SOURCES.txt
certifi.egg-info/dependency_links.txt
certifi.egg-info/top_level.txt

View file

@ -1,12 +0,0 @@
../certifi/__init__.py
../certifi/core.py
../certifi/__main__.py
../certifi/cacert.pem
../certifi/__pycache__/__init__.cpython-34.pyc
../certifi/__pycache__/core.cpython-34.pyc
../certifi/__pycache__/__main__.cpython-34.pyc
./
dependency_links.txt
PKG-INFO
SOURCES.txt
top_level.txt

View file

@ -0,0 +1,30 @@
Certifi: Python SSL Certificates
================================
`Certifi`_ is a carefully curated collection of Root Certificates for
validating the trustworthiness of SSL certificates while verifying the identity
of TLS hosts. It has been extracted from the `Requests`_ project.
Installation
------------
`certifi` is available on PyPI. Simply install it with `pip`::
$ pip install certifi
Usage
-----
To reference the installed CA Bundle, you can use the built-in function::
>>> import certifi
>>> certifi.where()
'/usr/local/lib/python2.7/site-packages/certifi/cacert.pem'
Enjoy!
.. _`Certifi`: http://certifi.io/en/latest/
.. _`Requests`: http://docs.python-requests.org/en/latest/

View file

@ -0,0 +1,52 @@
Metadata-Version: 2.0
Name: certifi
Version: 2015.9.6.2
Summary: Python package for providing Mozilla's CA Bundle.
Home-page: http://certifi.io/
Author: Kenneth Reitz
Author-email: me@kennethreitz.com
License: ISC
Platform: UNKNOWN
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Natural Language :: English
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 2.5
Classifier: Programming Language :: Python :: 2.6
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3.0
Classifier: Programming Language :: Python :: 3.1
Classifier: Programming Language :: Python :: 3.2
Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4
Certifi: Python SSL Certificates
================================
`Certifi`_ is a carefully curated collection of Root Certificates for
validating the trustworthiness of SSL certificates while verifying the identity
of TLS hosts. It has been extracted from the `Requests`_ project.
Installation
------------
`certifi` is available on PyPI. Simply install it with `pip`::
$ pip install certifi
Usage
-----
To reference the installed CA Bundle, you can use the built-in function::
>>> import certifi
>>> certifi.where()
'/usr/local/lib/python2.7/site-packages/certifi/cacert.pem'
Enjoy!
.. _`Certifi`: http://certifi.io/en/latest/
.. _`Requests`: http://docs.python-requests.org/en/latest/

View file

@ -0,0 +1,15 @@
certifi/__init__.py,sha256=T8LOdkem2W_EqteuCirstbPu3iS11BmKnS_nKqQI_kQ,65
certifi/__main__.py,sha256=FiOYt1Fltst7wk9DRa6GCoBr8qBUxlNQu_MKJf04E6s,41
certifi/cacert.pem,sha256=wY10ezo0r5ZPcgfctoi3Q9KRZ79_tpb_MPDGsgWiOwE,320698
certifi/core.py,sha256=DqvIINYNNXsp3Srlk_NRaiizaww8po3l8t8ksz-Xt6Q,716
certifi/old_root.pem,sha256=Sm1SGy9Y3FjEDEy9ie0EX39fcJCv_r6gAPtj9yBrXEY,24014
certifi/weak.pem,sha256=5xzWFRrSP0ZsXiW6emg8UQ_w497lT4qWCv32OO8R1ME,344712
certifi-2015.9.6.2.dist-info/DESCRIPTION.rst,sha256=1HthO7cC8rfi_tZB3iPCnK7Npcd48svSApnFrl8J89Q,716
certifi-2015.9.6.2.dist-info/METADATA,sha256=-IMJn5G46t_YY0VsjSgXQalm6mC4sChB8lsDanFlTME,1532
certifi-2015.9.6.2.dist-info/metadata.json,sha256=LNvgTP4aFSgWMQ-8ySDRnRE7506kiisjTkPqBHna1YE,911
certifi-2015.9.6.2.dist-info/RECORD,,
certifi-2015.9.6.2.dist-info/top_level.txt,sha256=KMu4vUCfsjLrkPbSNdgdekS-pVJzBAJFO__nI8NF6-U,8
certifi-2015.9.6.2.dist-info/WHEEL,sha256=AvR0WeTpDaxT645bl5FQxUK6NPsTls2ttpcGJg3j1Xg,110
certifi/__pycache__/__init__.cpython-34.pyc,,
certifi/__pycache__/__main__.cpython-34.pyc,,
certifi/__pycache__/core.cpython-34.pyc,,

View file

@ -0,0 +1 @@
{"license": "ISC", "name": "certifi", "metadata_version": "2.0", "generator": "bdist_wheel (0.24.0)", "summary": "Python package for providing Mozilla's CA Bundle.", "version": "2015.9.6.2", "extensions": {"python.details": {"project_urls": {"Home": "http://certifi.io/"}, "document_names": {"description": "DESCRIPTION.rst"}, "contacts": [{"role": "author", "email": "me@kennethreitz.com", "name": "Kenneth Reitz"}]}}, "classifiers": ["Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Natural Language :: English", "Programming Language :: Python", "Programming Language :: Python :: 2.5", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.0", "Programming Language :: Python :: 3.1", "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4"]}

View file

@ -1 +1,3 @@
from .core import where from .core import where, old_where
__version__ = "2015.09.06.2"

File diff suppressed because it is too large Load diff

View file

@ -7,13 +7,30 @@ certifi.py
This module returns the installation location of cacert.pem. This module returns the installation location of cacert.pem.
""" """
import os import os
import warnings
class DeprecatedBundleWarning(DeprecationWarning):
"""
The weak security bundle is being deprecated. Please bother your service
provider to get them to stop using cross-signed roots.
"""
def where(): def where():
f = os.path.split(__file__)[0] f = os.path.split(__file__)[0]
return os.path.join(f, 'cacert.pem') return os.path.join(f, 'cacert.pem')
def old_where():
warnings.warn(
"The weak security bundle is being deprecated.",
DeprecatedBundleWarning
)
f = os.path.split(__file__)[0]
return os.path.join(f, 'weak.pem')
if __name__ == '__main__': if __name__ == '__main__':
print(where()) print(where())

View file

@ -0,0 +1,387 @@
# Issuer: CN=Entrust.net Secure Server Certification Authority O=Entrust.net OU=www.entrust.net/CPS incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited
# Subject: CN=Entrust.net Secure Server Certification Authority O=Entrust.net OU=www.entrust.net/CPS incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited
# Label: "Entrust.net Secure Server CA"
# Serial: 927650371
# MD5 Fingerprint: df:f2:80:73:cc:f1:e6:61:73:fc:f5:42:e9:c5:7c:ee
# SHA1 Fingerprint: 99:a6:9b:e6:1a:fe:88:6b:4d:2b:82:00:7c:b8:54:fc:31:7e:15:39
# SHA256 Fingerprint: 62:f2:40:27:8c:56:4c:4d:d8:bf:7d:9d:4f:6f:36:6e:a8:94:d2:2f:5f:34:d9:89:a9:83:ac:ec:2f:ff:ed:50
-----BEGIN CERTIFICATE-----
MIIE2DCCBEGgAwIBAgIEN0rSQzANBgkqhkiG9w0BAQUFADCBwzELMAkGA1UEBhMC
VVMxFDASBgNVBAoTC0VudHJ1c3QubmV0MTswOQYDVQQLEzJ3d3cuZW50cnVzdC5u
ZXQvQ1BTIGluY29ycC4gYnkgcmVmLiAobGltaXRzIGxpYWIuKTElMCMGA1UECxMc
KGMpIDE5OTkgRW50cnVzdC5uZXQgTGltaXRlZDE6MDgGA1UEAxMxRW50cnVzdC5u
ZXQgU2VjdXJlIFNlcnZlciBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05OTA1
MjUxNjA5NDBaFw0xOTA1MjUxNjM5NDBaMIHDMQswCQYDVQQGEwJVUzEUMBIGA1UE
ChMLRW50cnVzdC5uZXQxOzA5BgNVBAsTMnd3dy5lbnRydXN0Lm5ldC9DUFMgaW5j
b3JwLiBieSByZWYuIChsaW1pdHMgbGlhYi4pMSUwIwYDVQQLExwoYykgMTk5OSBF
bnRydXN0Lm5ldCBMaW1pdGVkMTowOAYDVQQDEzFFbnRydXN0Lm5ldCBTZWN1cmUg
U2VydmVyIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGdMA0GCSqGSIb3DQEBAQUA
A4GLADCBhwKBgQDNKIM0VBuJ8w+vN5Ex/68xYMmo6LIQaO2f55M28Qpku0f1BBc/
I0dNxScZgSYMVHINiC3ZH5oSn7yzcdOAGT9HZnuMNSjSuQrfJNqc1lB5gXpa0zf3
wkrYKZImZNHkmGw6AIr1NJtl+O3jEP/9uElY3KDegjlrgbEWGWG5VLbmQwIBA6OC
AdcwggHTMBEGCWCGSAGG+EIBAQQEAwIABzCCARkGA1UdHwSCARAwggEMMIHeoIHb
oIHYpIHVMIHSMQswCQYDVQQGEwJVUzEUMBIGA1UEChMLRW50cnVzdC5uZXQxOzA5
BgNVBAsTMnd3dy5lbnRydXN0Lm5ldC9DUFMgaW5jb3JwLiBieSByZWYuIChsaW1p
dHMgbGlhYi4pMSUwIwYDVQQLExwoYykgMTk5OSBFbnRydXN0Lm5ldCBMaW1pdGVk
MTowOAYDVQQDEzFFbnRydXN0Lm5ldCBTZWN1cmUgU2VydmVyIENlcnRpZmljYXRp
b24gQXV0aG9yaXR5MQ0wCwYDVQQDEwRDUkwxMCmgJ6AlhiNodHRwOi8vd3d3LmVu
dHJ1c3QubmV0L0NSTC9uZXQxLmNybDArBgNVHRAEJDAigA8xOTk5MDUyNTE2MDk0
MFqBDzIwMTkwNTI1MTYwOTQwWjALBgNVHQ8EBAMCAQYwHwYDVR0jBBgwFoAU8Bdi
E1U9s/8KAGv7UISX8+1i0BowHQYDVR0OBBYEFPAXYhNVPbP/CgBr+1CEl/PtYtAa
MAwGA1UdEwQFMAMBAf8wGQYJKoZIhvZ9B0EABAwwChsEVjQuMAMCBJAwDQYJKoZI
hvcNAQEFBQADgYEAkNwwAvpkdMKnCqV8IY00F6j7Rw7/JXyNEwr75Ji174z4xRAN
95K+8cPV1ZVqBLssziY2ZcgxxufuP+NXdYR6Ee9GTxj005i7qIcyunL2POI9n9cd
2cNgQ4xYDiKWL2KjLB+6rQXvqzJ4h6BUcxm1XAX5Uj5tLUUL9wqT6u0G+bI=
-----END CERTIFICATE-----
# Issuer: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 2 Policy Validation Authority
# Subject: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 2 Policy Validation Authority
# Label: "ValiCert Class 2 VA"
# Serial: 1
# MD5 Fingerprint: a9:23:75:9b:ba:49:36:6e:31:c2:db:f2:e7:66:ba:87
# SHA1 Fingerprint: 31:7a:2a:d0:7f:2b:33:5e:f5:a1:c3:4e:4b:57:e8:b7:d8:f1:fc:a6
# SHA256 Fingerprint: 58:d0:17:27:9c:d4:dc:63:ab:dd:b1:96:a6:c9:90:6c:30:c4:e0:87:83:ea:e8:c1:60:99:54:d6:93:55:59:6b
-----BEGIN CERTIFICATE-----
MIIC5zCCAlACAQEwDQYJKoZIhvcNAQEFBQAwgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0
IFZhbGlkYXRpb24gTmV0d29yazEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAz
BgNVBAsTLFZhbGlDZXJ0IENsYXNzIDIgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9y
aXR5MSEwHwYDVQQDExhodHRwOi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG
9w0BCQEWEWluZm9AdmFsaWNlcnQuY29tMB4XDTk5MDYyNjAwMTk1NFoXDTE5MDYy
NjAwMTk1NFowgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0IFZhbGlkYXRpb24gTmV0d29y
azEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAzBgNVBAsTLFZhbGlDZXJ0IENs
YXNzIDIgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9yaXR5MSEwHwYDVQQDExhodHRw
Oi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG9w0BCQEWEWluZm9AdmFsaWNl
cnQuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDOOnHK5avIWZJV16vY
dA757tn2VUdZZUcOBVXc65g2PFxTXdMwzzjsvUGJ7SVCCSRrCl6zfN1SLUzm1NZ9
WlmpZdRJEy0kTRxQb7XBhVQ7/nHk01xC+YDgkRoKWzk2Z/M/VXwbP7RfZHM047QS
v4dk+NoS/zcnwbNDu+97bi5p9wIDAQABMA0GCSqGSIb3DQEBBQUAA4GBADt/UG9v
UJSZSWI4OB9L+KXIPqeCgfYrx+jFzug6EILLGACOTb2oWH+heQC1u+mNr0HZDzTu
IYEZoDJJKPTEjlbVUjP9UNV+mWwD5MlM/Mtsq2azSiGM5bUMMj4QssxsodyamEwC
W/POuZ6lcg5Ktz885hZo+L7tdEy8W9ViH0Pd
-----END CERTIFICATE-----
# Issuer: CN=NetLock Expressz (Class C) Tanusitvanykiado O=NetLock Halozatbiztonsagi Kft. OU=Tanusitvanykiadok
# Subject: CN=NetLock Expressz (Class C) Tanusitvanykiado O=NetLock Halozatbiztonsagi Kft. OU=Tanusitvanykiadok
# Label: "NetLock Express (Class C) Root"
# Serial: 104
# MD5 Fingerprint: 4f:eb:f1:f0:70:c2:80:63:5d:58:9f:da:12:3c:a9:c4
# SHA1 Fingerprint: e3:92:51:2f:0a:cf:f5:05:df:f6:de:06:7f:75:37:e1:65:ea:57:4b
# SHA256 Fingerprint: 0b:5e:ed:4e:84:64:03:cf:55:e0:65:84:84:40:ed:2a:82:75:8b:f5:b9:aa:1f:25:3d:46:13:cf:a0:80:ff:3f
-----BEGIN CERTIFICATE-----
MIIFTzCCBLigAwIBAgIBaDANBgkqhkiG9w0BAQQFADCBmzELMAkGA1UEBhMCSFUx
ETAPBgNVBAcTCEJ1ZGFwZXN0MScwJQYDVQQKEx5OZXRMb2NrIEhhbG96YXRiaXp0
b25zYWdpIEtmdC4xGjAYBgNVBAsTEVRhbnVzaXR2YW55a2lhZG9rMTQwMgYDVQQD
EytOZXRMb2NrIEV4cHJlc3N6IChDbGFzcyBDKSBUYW51c2l0dmFueWtpYWRvMB4X
DTk5MDIyNTE0MDgxMVoXDTE5MDIyMDE0MDgxMVowgZsxCzAJBgNVBAYTAkhVMREw
DwYDVQQHEwhCdWRhcGVzdDEnMCUGA1UEChMeTmV0TG9jayBIYWxvemF0Yml6dG9u
c2FnaSBLZnQuMRowGAYDVQQLExFUYW51c2l0dmFueWtpYWRvazE0MDIGA1UEAxMr
TmV0TG9jayBFeHByZXNzeiAoQ2xhc3MgQykgVGFudXNpdHZhbnlraWFkbzCBnzAN
BgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA6+ywbGGKIyWvYCDj2Z/8kwvbXY2wobNA
OoLO/XXgeDIDhlqGlZHtU/qdQPzm6N3ZW3oDvV3zOwzDUXmbrVWg6dADEK8KuhRC
2VImESLH0iDMgqSaqf64gXadarfSNnU+sYYJ9m5tfk63euyucYT2BDMIJTLrdKwW
RMbkQJMdf60CAwEAAaOCAp8wggKbMBIGA1UdEwEB/wQIMAYBAf8CAQQwDgYDVR0P
AQH/BAQDAgAGMBEGCWCGSAGG+EIBAQQEAwIABzCCAmAGCWCGSAGG+EIBDQSCAlEW
ggJNRklHWUVMRU0hIEV6ZW4gdGFudXNpdHZhbnkgYSBOZXRMb2NrIEtmdC4gQWx0
YWxhbm9zIFN6b2xnYWx0YXRhc2kgRmVsdGV0ZWxlaWJlbiBsZWlydCBlbGphcmFz
b2sgYWxhcGphbiBrZXN6dWx0LiBBIGhpdGVsZXNpdGVzIGZvbHlhbWF0YXQgYSBO
ZXRMb2NrIEtmdC4gdGVybWVrZmVsZWxvc3NlZy1iaXp0b3NpdGFzYSB2ZWRpLiBB
IGRpZ2l0YWxpcyBhbGFpcmFzIGVsZm9nYWRhc2FuYWsgZmVsdGV0ZWxlIGF6IGVs
b2lydCBlbGxlbm9yemVzaSBlbGphcmFzIG1lZ3RldGVsZS4gQXogZWxqYXJhcyBs
ZWlyYXNhIG1lZ3RhbGFsaGF0byBhIE5ldExvY2sgS2Z0LiBJbnRlcm5ldCBob25s
YXBqYW4gYSBodHRwczovL3d3dy5uZXRsb2NrLm5ldC9kb2NzIGNpbWVuIHZhZ3kg
a2VyaGV0byBheiBlbGxlbm9yemVzQG5ldGxvY2submV0IGUtbWFpbCBjaW1lbi4g
SU1QT1JUQU5UISBUaGUgaXNzdWFuY2UgYW5kIHRoZSB1c2Ugb2YgdGhpcyBjZXJ0
aWZpY2F0ZSBpcyBzdWJqZWN0IHRvIHRoZSBOZXRMb2NrIENQUyBhdmFpbGFibGUg
YXQgaHR0cHM6Ly93d3cubmV0bG9jay5uZXQvZG9jcyBvciBieSBlLW1haWwgYXQg
Y3BzQG5ldGxvY2submV0LjANBgkqhkiG9w0BAQQFAAOBgQAQrX/XDDKACtiG8XmY
ta3UzbM2xJZIwVzNmtkFLp++UOv0JhQQLdRmF/iewSf98e3ke0ugbLWrmldwpu2g
pO0u9f38vf5NNwgMvOOWgyL1SRt/Syu0VMGAfJlOHdCM7tCs5ZL6dVb+ZKATj7i4
Fp1hBWeAyNDYpQcCNJgEjTME1A==
-----END CERTIFICATE-----
# Issuer: CN=NetLock Uzleti (Class B) Tanusitvanykiado O=NetLock Halozatbiztonsagi Kft. OU=Tanusitvanykiadok
# Subject: CN=NetLock Uzleti (Class B) Tanusitvanykiado O=NetLock Halozatbiztonsagi Kft. OU=Tanusitvanykiadok
# Label: "NetLock Business (Class B) Root"
# Serial: 105
# MD5 Fingerprint: 39:16:aa:b9:6a:41:e1:14:69:df:9e:6c:3b:72:dc:b6
# SHA1 Fingerprint: 87:9f:4b:ee:05:df:98:58:3b:e3:60:d6:33:e7:0d:3f:fe:98:71:af
# SHA256 Fingerprint: 39:df:7b:68:2b:7b:93:8f:84:71:54:81:cc:de:8d:60:d8:f2:2e:c5:98:87:7d:0a:aa:c1:2b:59:18:2b:03:12
-----BEGIN CERTIFICATE-----
MIIFSzCCBLSgAwIBAgIBaTANBgkqhkiG9w0BAQQFADCBmTELMAkGA1UEBhMCSFUx
ETAPBgNVBAcTCEJ1ZGFwZXN0MScwJQYDVQQKEx5OZXRMb2NrIEhhbG96YXRiaXp0
b25zYWdpIEtmdC4xGjAYBgNVBAsTEVRhbnVzaXR2YW55a2lhZG9rMTIwMAYDVQQD
EylOZXRMb2NrIFV6bGV0aSAoQ2xhc3MgQikgVGFudXNpdHZhbnlraWFkbzAeFw05
OTAyMjUxNDEwMjJaFw0xOTAyMjAxNDEwMjJaMIGZMQswCQYDVQQGEwJIVTERMA8G
A1UEBxMIQnVkYXBlc3QxJzAlBgNVBAoTHk5ldExvY2sgSGFsb3phdGJpenRvbnNh
Z2kgS2Z0LjEaMBgGA1UECxMRVGFudXNpdHZhbnlraWFkb2sxMjAwBgNVBAMTKU5l
dExvY2sgVXpsZXRpIChDbGFzcyBCKSBUYW51c2l0dmFueWtpYWRvMIGfMA0GCSqG
SIb3DQEBAQUAA4GNADCBiQKBgQCx6gTsIKAjwo84YM/HRrPVG/77uZmeBNwcf4xK
gZjupNTKihe5In+DCnVMm8Bp2GQ5o+2So/1bXHQawEfKOml2mrriRBf8TKPV/riX
iK+IA4kfpPIEPsgHC+b5sy96YhQJRhTKZPWLgLViqNhr1nGTLbO/CVRY7QbrqHvc
Q7GhaQIDAQABo4ICnzCCApswEgYDVR0TAQH/BAgwBgEB/wIBBDAOBgNVHQ8BAf8E
BAMCAAYwEQYJYIZIAYb4QgEBBAQDAgAHMIICYAYJYIZIAYb4QgENBIICURaCAk1G
SUdZRUxFTSEgRXplbiB0YW51c2l0dmFueSBhIE5ldExvY2sgS2Z0LiBBbHRhbGFu
b3MgU3pvbGdhbHRhdGFzaSBGZWx0ZXRlbGVpYmVuIGxlaXJ0IGVsamFyYXNvayBh
bGFwamFuIGtlc3p1bHQuIEEgaGl0ZWxlc2l0ZXMgZm9seWFtYXRhdCBhIE5ldExv
Y2sgS2Z0LiB0ZXJtZWtmZWxlbG9zc2VnLWJpenRvc2l0YXNhIHZlZGkuIEEgZGln
aXRhbGlzIGFsYWlyYXMgZWxmb2dhZGFzYW5hayBmZWx0ZXRlbGUgYXogZWxvaXJ0
IGVsbGVub3J6ZXNpIGVsamFyYXMgbWVndGV0ZWxlLiBBeiBlbGphcmFzIGxlaXJh
c2EgbWVndGFsYWxoYXRvIGEgTmV0TG9jayBLZnQuIEludGVybmV0IGhvbmxhcGph
biBhIGh0dHBzOi8vd3d3Lm5ldGxvY2submV0L2RvY3MgY2ltZW4gdmFneSBrZXJo
ZXRvIGF6IGVsbGVub3J6ZXNAbmV0bG9jay5uZXQgZS1tYWlsIGNpbWVuLiBJTVBP
UlRBTlQhIFRoZSBpc3N1YW5jZSBhbmQgdGhlIHVzZSBvZiB0aGlzIGNlcnRpZmlj
YXRlIGlzIHN1YmplY3QgdG8gdGhlIE5ldExvY2sgQ1BTIGF2YWlsYWJsZSBhdCBo
dHRwczovL3d3dy5uZXRsb2NrLm5ldC9kb2NzIG9yIGJ5IGUtbWFpbCBhdCBjcHNA
bmV0bG9jay5uZXQuMA0GCSqGSIb3DQEBBAUAA4GBAATbrowXr/gOkDFOzT4JwG06
sPgzTEdM43WIEJessDgVkcYplswhwG08pXTP2IKlOcNl40JwuyKQ433bNXbhoLXa
n3BukxowOR0w2y7jfLKRstE3Kfq51hdcR0/jHTjrn9V7lagonhVK0dHQKwCXoOKS
NitjrFgBazMpUIaD8QFI
-----END CERTIFICATE-----
# Issuer: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 3 Policy Validation Authority
# Subject: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 3 Policy Validation Authority
# Label: "RSA Root Certificate 1"
# Serial: 1
# MD5 Fingerprint: a2:6f:53:b7:ee:40:db:4a:68:e7:fa:18:d9:10:4b:72
# SHA1 Fingerprint: 69:bd:8c:f4:9c:d3:00:fb:59:2e:17:93:ca:55:6a:f3:ec:aa:35:fb
# SHA256 Fingerprint: bc:23:f9:8a:31:3c:b9:2d:e3:bb:fc:3a:5a:9f:44:61:ac:39:49:4c:4a:e1:5a:9e:9d:f1:31:e9:9b:73:01:9a
-----BEGIN CERTIFICATE-----
MIIC5zCCAlACAQEwDQYJKoZIhvcNAQEFBQAwgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0
IFZhbGlkYXRpb24gTmV0d29yazEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAz
BgNVBAsTLFZhbGlDZXJ0IENsYXNzIDMgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9y
aXR5MSEwHwYDVQQDExhodHRwOi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG
9w0BCQEWEWluZm9AdmFsaWNlcnQuY29tMB4XDTk5MDYyNjAwMjIzM1oXDTE5MDYy
NjAwMjIzM1owgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0IFZhbGlkYXRpb24gTmV0d29y
azEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAzBgNVBAsTLFZhbGlDZXJ0IENs
YXNzIDMgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9yaXR5MSEwHwYDVQQDExhodHRw
Oi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG9w0BCQEWEWluZm9AdmFsaWNl
cnQuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDjmFGWHOjVsQaBalfD
cnWTq8+epvzzFlLWLU2fNUSoLgRNB0mKOCn1dzfnt6td3zZxFJmP3MKS8edgkpfs
2Ejcv8ECIMYkpChMMFp2bbFc893enhBxoYjHW5tBbcqwuI4V7q0zK89HBFx1cQqY
JJgpp0lZpd34t0NiYfPT4tBVPwIDAQABMA0GCSqGSIb3DQEBBQUAA4GBAFa7AliE
Zwgs3x/be0kz9dNnnfS0ChCzycUs4pJqcXgn8nCDQtM+z6lU9PHYkhaM0QTLS6vJ
n0WuPIqpsHEzXcjFV9+vqDWzf4mH6eglkrh/hXqu1rweN1gqZ8mRzyqBPu3GOd/A
PhmcGcwTTYJBtYze4D1gCCAPRX5ron+jjBXu
-----END CERTIFICATE-----
# Issuer: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 1 Policy Validation Authority
# Subject: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 1 Policy Validation Authority
# Label: "ValiCert Class 1 VA"
# Serial: 1
# MD5 Fingerprint: 65:58:ab:15:ad:57:6c:1e:a8:a7:b5:69:ac:bf:ff:eb
# SHA1 Fingerprint: e5:df:74:3c:b6:01:c4:9b:98:43:dc:ab:8c:e8:6a:81:10:9f:e4:8e
# SHA256 Fingerprint: f4:c1:49:55:1a:30:13:a3:5b:c7:bf:fe:17:a7:f3:44:9b:c1:ab:5b:5a:0a:e7:4b:06:c2:3b:90:00:4c:01:04
-----BEGIN CERTIFICATE-----
MIIC5zCCAlACAQEwDQYJKoZIhvcNAQEFBQAwgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0
IFZhbGlkYXRpb24gTmV0d29yazEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAz
BgNVBAsTLFZhbGlDZXJ0IENsYXNzIDEgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9y
aXR5MSEwHwYDVQQDExhodHRwOi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG
9w0BCQEWEWluZm9AdmFsaWNlcnQuY29tMB4XDTk5MDYyNTIyMjM0OFoXDTE5MDYy
NTIyMjM0OFowgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0IFZhbGlkYXRpb24gTmV0d29y
azEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAzBgNVBAsTLFZhbGlDZXJ0IENs
YXNzIDEgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9yaXR5MSEwHwYDVQQDExhodHRw
Oi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG9w0BCQEWEWluZm9AdmFsaWNl
cnQuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDYWYJ6ibiWuqYvaG9Y
LqdUHAZu9OqNSLwxlBfw8068srg1knaw0KWlAdcAAxIiGQj4/xEjm84H9b9pGib+
TunRf50sQB1ZaG6m+FiwnRqP0z/x3BkGgagO4DrdyFNFCQbmD3DD+kCmDuJWBQ8Y
TfwggtFzVXSNdnKgHZ0dwN0/cQIDAQABMA0GCSqGSIb3DQEBBQUAA4GBAFBoPUn0
LBwGlN+VYH+Wexf+T3GtZMjdd9LvWVXoP+iOBSoh8gfStadS/pyxtuJbdxdA6nLW
I8sogTLDAHkY7FkXicnGah5xyf23dKUlRWnFSKsZ4UWKJWsZ7uW7EvV/96aNUcPw
nXS3qT6gpf+2SQMT2iLM7XGCK5nPOrf1LXLI
-----END CERTIFICATE-----
# Issuer: CN=Equifax Secure eBusiness CA-1 O=Equifax Secure Inc.
# Subject: CN=Equifax Secure eBusiness CA-1 O=Equifax Secure Inc.
# Label: "Equifax Secure eBusiness CA 1"
# Serial: 4
# MD5 Fingerprint: 64:9c:ef:2e:44:fc:c6:8f:52:07:d0:51:73:8f:cb:3d
# SHA1 Fingerprint: da:40:18:8b:91:89:a3:ed:ee:ae:da:97:fe:2f:9d:f5:b7:d1:8a:41
# SHA256 Fingerprint: cf:56:ff:46:a4:a1:86:10:9d:d9:65:84:b5:ee:b5:8a:51:0c:42:75:b0:e5:f9:4f:40:bb:ae:86:5e:19:f6:73
-----BEGIN CERTIFICATE-----
MIICgjCCAeugAwIBAgIBBDANBgkqhkiG9w0BAQQFADBTMQswCQYDVQQGEwJVUzEc
MBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5jLjEmMCQGA1UEAxMdRXF1aWZheCBT
ZWN1cmUgZUJ1c2luZXNzIENBLTEwHhcNOTkwNjIxMDQwMDAwWhcNMjAwNjIxMDQw
MDAwWjBTMQswCQYDVQQGEwJVUzEcMBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5j
LjEmMCQGA1UEAxMdRXF1aWZheCBTZWN1cmUgZUJ1c2luZXNzIENBLTEwgZ8wDQYJ
KoZIhvcNAQEBBQADgY0AMIGJAoGBAM4vGbwXt3fek6lfWg0XTzQaDJj0ItlZ1MRo
RvC0NcWFAyDGr0WlIVFFQesWWDYyb+JQYmT5/VGcqiTZ9J2DKocKIdMSODRsjQBu
WqDZQu4aIZX5UkxVWsUPOE9G+m34LjXWHXzr4vCwdYDIqROsvojvOm6rXyo4YgKw
Env+j6YDAgMBAAGjZjBkMBEGCWCGSAGG+EIBAQQEAwIABzAPBgNVHRMBAf8EBTAD
AQH/MB8GA1UdIwQYMBaAFEp4MlIR21kWNl7fwRQ2QGpHfEyhMB0GA1UdDgQWBBRK
eDJSEdtZFjZe38EUNkBqR3xMoTANBgkqhkiG9w0BAQQFAAOBgQB1W6ibAxHm6VZM
zfmpTMANmvPMZWnmJXbMWbfWVMMdzZmsGd20hdXgPfxiIKeES1hl8eL5lSE/9dR+
WB5Hh1Q+WKG1tfgq73HnvMP2sUlG4tega+VWeponmHxGYhTnyfxuAxJ5gDgdSIKN
/Bf+KpYrtWKmpj29f5JZzVoqgrI3eQ==
-----END CERTIFICATE-----
# Issuer: CN=Equifax Secure Global eBusiness CA-1 O=Equifax Secure Inc.
# Subject: CN=Equifax Secure Global eBusiness CA-1 O=Equifax Secure Inc.
# Label: "Equifax Secure Global eBusiness CA"
# Serial: 1
# MD5 Fingerprint: 8f:5d:77:06:27:c4:98:3c:5b:93:78:e7:d7:7d:9b:cc
# SHA1 Fingerprint: 7e:78:4a:10:1c:82:65:cc:2d:e1:f1:6d:47:b4:40:ca:d9:0a:19:45
# SHA256 Fingerprint: 5f:0b:62:ea:b5:e3:53:ea:65:21:65:16:58:fb:b6:53:59:f4:43:28:0a:4a:fb:d1:04:d7:7d:10:f9:f0:4c:07
-----BEGIN CERTIFICATE-----
MIICkDCCAfmgAwIBAgIBATANBgkqhkiG9w0BAQQFADBaMQswCQYDVQQGEwJVUzEc
MBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5jLjEtMCsGA1UEAxMkRXF1aWZheCBT
ZWN1cmUgR2xvYmFsIGVCdXNpbmVzcyBDQS0xMB4XDTk5MDYyMTA0MDAwMFoXDTIw
MDYyMTA0MDAwMFowWjELMAkGA1UEBhMCVVMxHDAaBgNVBAoTE0VxdWlmYXggU2Vj
dXJlIEluYy4xLTArBgNVBAMTJEVxdWlmYXggU2VjdXJlIEdsb2JhbCBlQnVzaW5l
c3MgQ0EtMTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAuucXkAJlsTRVPEnC
UdXfp9E3j9HngXNBUmCbnaEXJnitx7HoJpQytd4zjTov2/KaelpzmKNc6fuKcxtc
58O/gGzNqfTWK8D3+ZmqY6KxRwIP1ORROhI8bIpaVIRw28HFkM9yRcuoWcDNM50/
o5brhTMhHD4ePmBudpxnhcXIw2ECAwEAAaNmMGQwEQYJYIZIAYb4QgEBBAQDAgAH
MA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAUvqigdHJQa0S3ySPY+6j/s1dr
aGwwHQYDVR0OBBYEFL6ooHRyUGtEt8kj2Puo/7NXa2hsMA0GCSqGSIb3DQEBBAUA
A4GBADDiAVGqx+pf2rnQZQ8w1j7aDRRJbpGTJxQx78T3LUX47Me/okENI7SS+RkA
Z70Br83gcfxaz2TE4JaY0KNA4gGK7ycH8WUBikQtBmV1UsCGECAhX2xrD2yuCRyv
8qIYNMR1pHMc8Y3c7635s3a0kr/clRAevsvIO1qEYBlWlKlV
-----END CERTIFICATE-----
# Issuer: CN=Thawte Premium Server CA O=Thawte Consulting cc OU=Certification Services Division
# Subject: CN=Thawte Premium Server CA O=Thawte Consulting cc OU=Certification Services Division
# Label: "Thawte Premium Server CA"
# Serial: 1
# MD5 Fingerprint: 06:9f:69:79:16:66:90:02:1b:8c:8c:a2:c3:07:6f:3a
# SHA1 Fingerprint: 62:7f:8d:78:27:65:63:99:d2:7d:7f:90:44:c9:fe:b3:f3:3e:fa:9a
# SHA256 Fingerprint: ab:70:36:36:5c:71:54:aa:29:c2:c2:9f:5d:41:91:16:3b:16:2a:22:25:01:13:57:d5:6d:07:ff:a7:bc:1f:72
-----BEGIN CERTIFICATE-----
MIIDJzCCApCgAwIBAgIBATANBgkqhkiG9w0BAQQFADCBzjELMAkGA1UEBhMCWkEx
FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYD
VQQKExRUaGF3dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlv
biBTZXJ2aWNlcyBEaXZpc2lvbjEhMB8GA1UEAxMYVGhhd3RlIFByZW1pdW0gU2Vy
dmVyIENBMSgwJgYJKoZIhvcNAQkBFhlwcmVtaXVtLXNlcnZlckB0aGF3dGUuY29t
MB4XDTk2MDgwMTAwMDAwMFoXDTIwMTIzMTIzNTk1OVowgc4xCzAJBgNVBAYTAlpB
MRUwEwYDVQQIEwxXZXN0ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEdMBsG
A1UEChMUVGhhd3RlIENvbnN1bHRpbmcgY2MxKDAmBgNVBAsTH0NlcnRpZmljYXRp
b24gU2VydmljZXMgRGl2aXNpb24xITAfBgNVBAMTGFRoYXd0ZSBQcmVtaXVtIFNl
cnZlciBDQTEoMCYGCSqGSIb3DQEJARYZcHJlbWl1bS1zZXJ2ZXJAdGhhd3RlLmNv
bTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA0jY2aovXwlue2oFBYo847kkE
VdbQ7xwblRZH7xhINTpS9CtqBo87L+pW46+GjZ4X9560ZXUCTe/LCaIhUdib0GfQ
ug2SBhRz1JPLlyoAnFxODLz6FVL88kRu2hFKbgifLy3j+ao6hnO2RlNYyIkFvYMR
uHM/qgeN9EJN50CdHDcCAwEAAaMTMBEwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG
9w0BAQQFAAOBgQAmSCwWwlj66BZ0DKqqX1Q/8tfJeGBeXm43YyJ3Nn6yF8Q0ufUI
hfzJATj/Tb7yFkJD57taRvvBxhEf8UqwKEbJw8RCfbz6q1lu1bdRiBHjpIUZa4JM
pAwSremkrj/xw0llmozFyD4lt5SZu5IycQfwhl7tUCemDaYj+bvLpgcUQg==
-----END CERTIFICATE-----
# Issuer: CN=Thawte Server CA O=Thawte Consulting cc OU=Certification Services Division
# Subject: CN=Thawte Server CA O=Thawte Consulting cc OU=Certification Services Division
# Label: "Thawte Server CA"
# Serial: 1
# MD5 Fingerprint: c5:70:c4:a2:ed:53:78:0c:c8:10:53:81:64:cb:d0:1d
# SHA1 Fingerprint: 23:e5:94:94:51:95:f2:41:48:03:b4:d5:64:d2:a3:a3:f5:d8:8b:8c
# SHA256 Fingerprint: b4:41:0b:73:e2:e6:ea:ca:47:fb:c4:2f:8f:a4:01:8a:f4:38:1d:c5:4c:fa:a8:44:50:46:1e:ed:09:45:4d:e9
-----BEGIN CERTIFICATE-----
MIIDEzCCAnygAwIBAgIBATANBgkqhkiG9w0BAQQFADCBxDELMAkGA1UEBhMCWkEx
FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYD
VQQKExRUaGF3dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlv
biBTZXJ2aWNlcyBEaXZpc2lvbjEZMBcGA1UEAxMQVGhhd3RlIFNlcnZlciBDQTEm
MCQGCSqGSIb3DQEJARYXc2VydmVyLWNlcnRzQHRoYXd0ZS5jb20wHhcNOTYwODAx
MDAwMDAwWhcNMjAxMjMxMjM1OTU5WjCBxDELMAkGA1UEBhMCWkExFTATBgNVBAgT
DFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYDVQQKExRUaGF3
dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlvbiBTZXJ2aWNl
cyBEaXZpc2lvbjEZMBcGA1UEAxMQVGhhd3RlIFNlcnZlciBDQTEmMCQGCSqGSIb3
DQEJARYXc2VydmVyLWNlcnRzQHRoYXd0ZS5jb20wgZ8wDQYJKoZIhvcNAQEBBQAD
gY0AMIGJAoGBANOkUG7I/1Zr5s9dtuoMaHVHoqrC2oQl/Kj0R1HahbUgdJSGHg91
yekIYfUGbTBuFRkC6VLAYttNmZ7iagxEOM3+vuNkCXDF/rFrKbYvScg71CcEJRCX
L+eQbcAoQpnXTEPew/UhbVSfXcNY4cDk2VuwuNy0e982OsK1ZiIS1ocNAgMBAAGj
EzARMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEEBQADgYEAB/pMaVz7lcxG
7oWDTSEwjsrZqG9JGubaUeNgcGyEYRGhGshIPllDfU+VPaGLtwtimHp1it2ITk6e
QNuozDJ0uW8NxuOzRAvZim+aKZuZGCg70eNAKJpaPNW15yAbi8qkq43pUdniTCxZ
qdq5snUb9kLy78fyGPmJvKP/iiMucEc=
-----END CERTIFICATE-----
# Issuer: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority
# Subject: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority
# Label: "Verisign Class 3 Public Primary Certification Authority"
# Serial: 149843929435818692848040365716851702463
# MD5 Fingerprint: 10:fc:63:5d:f6:26:3e:0d:f3:25:be:5f:79:cd:67:67
# SHA1 Fingerprint: 74:2c:31:92:e6:07:e4:24:eb:45:49:54:2b:e1:bb:c5:3e:61:74:e2
# SHA256 Fingerprint: e7:68:56:34:ef:ac:f6:9a:ce:93:9a:6b:25:5b:7b:4f:ab:ef:42:93:5b:50:a2:65:ac:b5:cb:60:27:e4:4e:70
-----BEGIN CERTIFICATE-----
MIICPDCCAaUCEHC65B0Q2Sk0tjjKewPMur8wDQYJKoZIhvcNAQECBQAwXzELMAkG
A1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFz
cyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTk2
MDEyOTAwMDAwMFoXDTI4MDgwMTIzNTk1OVowXzELMAkGA1UEBhMCVVMxFzAVBgNV
BAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFzcyAzIFB1YmxpYyBQcmlt
YXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGfMA0GCSqGSIb3DQEBAQUAA4GN
ADCBiQKBgQDJXFme8huKARS0EN8EQNvjV69qRUCPhAwL0TPZ2RHP7gJYHyX3KqhE
BarsAx94f56TuZoAqiN91qyFomNFx3InzPRMxnVx0jnvT0Lwdd8KkMaOIG+YD/is
I19wKTakyYbnsZogy1Olhec9vn2a/iRFM9x2Fe0PonFkTGUugWhFpwIDAQABMA0G
CSqGSIb3DQEBAgUAA4GBALtMEivPLCYATxQT3ab7/AoRhIzzKBxnki98tsX63/Do
lbwdj2wsqFHMc9ikwFPwTtYmwHYBV4GSXiHx0bH/59AhWM1pF+NEHJwZRDmJXNyc
AA9WjQKZ7aKQRUzkuxCkPfAyAw7xzvjoyVGM5mKf5p/AfbdynMk2OmufTqj/ZA1k
-----END CERTIFICATE-----
# Issuer: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority
# Subject: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority
# Label: "Verisign Class 3 Public Primary Certification Authority"
# Serial: 80507572722862485515306429940691309246
# MD5 Fingerprint: ef:5a:f1:33:ef:f1:cd:bb:51:02:ee:12:14:4b:96:c4
# SHA1 Fingerprint: a1:db:63:93:91:6f:17:e4:18:55:09:40:04:15:c7:02:40:b0:ae:6b
# SHA256 Fingerprint: a4:b6:b3:99:6f:c2:f3:06:b3:fd:86:81:bd:63:41:3d:8c:50:09:cc:4f:a3:29:c2:cc:f0:e2:fa:1b:14:03:05
-----BEGIN CERTIFICATE-----
MIICPDCCAaUCEDyRMcsf9tAbDpq40ES/Er4wDQYJKoZIhvcNAQEFBQAwXzELMAkG
A1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFz
cyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTk2
MDEyOTAwMDAwMFoXDTI4MDgwMjIzNTk1OVowXzELMAkGA1UEBhMCVVMxFzAVBgNV
BAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFzcyAzIFB1YmxpYyBQcmlt
YXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGfMA0GCSqGSIb3DQEBAQUAA4GN
ADCBiQKBgQDJXFme8huKARS0EN8EQNvjV69qRUCPhAwL0TPZ2RHP7gJYHyX3KqhE
BarsAx94f56TuZoAqiN91qyFomNFx3InzPRMxnVx0jnvT0Lwdd8KkMaOIG+YD/is
I19wKTakyYbnsZogy1Olhec9vn2a/iRFM9x2Fe0PonFkTGUugWhFpwIDAQABMA0G
CSqGSIb3DQEBBQUAA4GBABByUqkFFBkyCEHwxWsKzH4PIRnN5GfcX6kb5sroc50i
2JhucwNhkcV8sEVAbkSdjbCxlnRhLQ2pRdKkkirWmnWXbj9T/UWZYB2oK0z5XqcJ
2HUw19JlYD1n1khVdWk/kfVIC0dpImmClr7JyDiGSnoscxlIaU5rfGW/D/xwzoiQ
-----END CERTIFICATE-----
# Issuer: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority - G2/(c) 1998 VeriSign, Inc. - For authorized use only/VeriSign Trust Network
# Subject: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority - G2/(c) 1998 VeriSign, Inc. - For authorized use only/VeriSign Trust Network
# Label: "Verisign Class 3 Public Primary Certification Authority - G2"
# Serial: 167285380242319648451154478808036881606
# MD5 Fingerprint: a2:33:9b:4c:74:78:73:d4:6c:e7:c1:f3:8d:cb:5c:e9
# SHA1 Fingerprint: 85:37:1c:a6:e5:50:14:3d:ce:28:03:47:1b:de:3a:09:e8:f8:77:0f
# SHA256 Fingerprint: 83:ce:3c:12:29:68:8a:59:3d:48:5f:81:97:3c:0f:91:95:43:1e:da:37:cc:5e:36:43:0e:79:c7:a8:88:63:8b
-----BEGIN CERTIFICATE-----
MIIDAjCCAmsCEH3Z/gfPqB63EHln+6eJNMYwDQYJKoZIhvcNAQEFBQAwgcExCzAJ
BgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xh
c3MgMyBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcy
MTowOAYDVQQLEzEoYykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3Jp
emVkIHVzZSBvbmx5MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMB4X
DTk4MDUxODAwMDAwMFoXDTI4MDgwMTIzNTk1OVowgcExCzAJBgNVBAYTAlVTMRcw
FQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xhc3MgMyBQdWJsaWMg
UHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcyMTowOAYDVQQLEzEo
YykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5
MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMIGfMA0GCSqGSIb3DQEB
AQUAA4GNADCBiQKBgQDMXtERXVxp0KvTuWpMmR9ZmDCOFoUgRm1HP9SFIIThbbP4
pO0M8RcPO/mn+SXXwc+EY/J8Y8+iR/LGWzOOZEAEaMGAuWQcRXfH2G71lSk8UOg0
13gfqLptQ5GVj0VXXn7F+8qkBOvqlzdUMG+7AUcyM83cV5tkaWH4mx0ciU9cZwID
AQABMA0GCSqGSIb3DQEBBQUAA4GBAFFNzb5cy5gZnBWyATl4Lk0PZ3BwmcYQWpSk
U01UbSuvDV1Ai2TT1+7eVmGSX6bEHRBhNtMsJzzoKQm5EWR0zLVznxxIqbxhAe7i
F6YM40AIOw7n60RzKprxaZLvcRTDOaxxp5EJb+RxBrO6WVcmeQD2+A2iMzAo1KpY
oJ2daZH9
-----END CERTIFICATE-----
# Issuer: CN=GTE CyberTrust Global Root O=GTE Corporation OU=GTE CyberTrust Solutions, Inc.
# Subject: CN=GTE CyberTrust Global Root O=GTE Corporation OU=GTE CyberTrust Solutions, Inc.
# Label: "GTE CyberTrust Global Root"
# Serial: 421
# MD5 Fingerprint: ca:3d:d3:68:f1:03:5c:d0:32:fa:b8:2b:59:e8:5a:db
# SHA1 Fingerprint: 97:81:79:50:d8:1c:96:70:cc:34:d8:09:cf:79:44:31:36:7e:f4:74
# SHA256 Fingerprint: a5:31:25:18:8d:21:10:aa:96:4b:02:c7:b7:c6:da:32:03:17:08:94:e5:fb:71:ff:fb:66:67:d5:e6:81:0a:36
-----BEGIN CERTIFICATE-----
MIICWjCCAcMCAgGlMA0GCSqGSIb3DQEBBAUAMHUxCzAJBgNVBAYTAlVTMRgwFgYD
VQQKEw9HVEUgQ29ycG9yYXRpb24xJzAlBgNVBAsTHkdURSBDeWJlclRydXN0IFNv
bHV0aW9ucywgSW5jLjEjMCEGA1UEAxMaR1RFIEN5YmVyVHJ1c3QgR2xvYmFsIFJv
b3QwHhcNOTgwODEzMDAyOTAwWhcNMTgwODEzMjM1OTAwWjB1MQswCQYDVQQGEwJV
UzEYMBYGA1UEChMPR1RFIENvcnBvcmF0aW9uMScwJQYDVQQLEx5HVEUgQ3liZXJU
cnVzdCBTb2x1dGlvbnMsIEluYy4xIzAhBgNVBAMTGkdURSBDeWJlclRydXN0IEds
b2JhbCBSb290MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCVD6C28FCc6HrH
iM3dFw4usJTQGz0O9pTAipTHBsiQl8i4ZBp6fmw8U+E3KHNgf7KXUwefU/ltWJTS
r41tiGeA5u2ylc9yMcqlHHK6XALnZELn+aks1joNrI1CqiQBOeacPwGFVw1Yh0X4
04Wqk2kmhXBIgD8SFcd5tB8FLztimQIDAQABMA0GCSqGSIb3DQEBBAUAA4GBAG3r
GwnpXtlR22ciYaQqPEh346B8pt5zohQDhT37qw4wxYMWM4ETCJ57NE7fQMh017l9
3PR2VX2bY1QY6fDq81yx2YtCHrnAlU66+tXifPVoYb+O7AWXX1uw16OFNMQkpw0P
lZPvy5TYnh+dXIVtx6quTx8itc2VrbqnzPmrC3p/
-----END CERTIFICATE-----

File diff suppressed because it is too large Load diff

View file

@ -1,40 +0,0 @@
Chardet: The Universal Character Encoding Detector
--------------------------------------------------
Detects
- ASCII, UTF-8, UTF-16 (2 variants), UTF-32 (4 variants)
- Big5, GB2312, EUC-TW, HZ-GB-2312, ISO-2022-CN (Traditional and Simplified Chinese)
- EUC-JP, SHIFT_JIS, ISO-2022-JP (Japanese)
- EUC-KR, ISO-2022-KR (Korean)
- KOI8-R, MacCyrillic, IBM855, IBM866, ISO-8859-5, windows-1251 (Cyrillic)
- ISO-8859-2, windows-1250 (Hungarian)
- ISO-8859-5, windows-1251 (Bulgarian)
- windows-1252 (English)
- ISO-8859-7, windows-1253 (Greek)
- ISO-8859-8, windows-1255 (Visual and Logical Hebrew)
- TIS-620 (Thai)
Requires Python 2.6 or later
Command-line Tool
-----------------
chardet comes with a command-line script which reports on the encodings of one
or more files::
% chardetect somefile someotherfile
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
About
-----
This is a continuation of Mark Pilgrim's excellent chardet. Previously, two
versions needed to be maintained: one that supported python 2.x and one that
supported python 3.x. We've recently merged with `Ian Corduscano <https://github.com/sigmavirus24>`_'s
`charade <https://github.com/sigmavirus24/charade>`_ fork, so now we have one
coherent version that works for Python 2.6+.
:maintainer: Dan Blanchard

View file

@ -1,58 +0,0 @@
Metadata-Version: 2.0
Name: chardet
Version: 2.2.1
Summary: Universal encoding detector for Python 2 and 3
Home-page: https://github.com/erikrose/chardet
Author: Ian Cordasco
Author-email: graffatcolmingov@gmail.com
License: LGPL
Keywords: encoding,i18n,xml
Platform: UNKNOWN
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: Text Processing :: Linguistic
Chardet: The Universal Character Encoding Detector
--------------------------------------------------
Detects
- ASCII, UTF-8, UTF-16 (2 variants), UTF-32 (4 variants)
- Big5, GB2312, EUC-TW, HZ-GB-2312, ISO-2022-CN (Traditional and Simplified Chinese)
- EUC-JP, SHIFT_JIS, ISO-2022-JP (Japanese)
- EUC-KR, ISO-2022-KR (Korean)
- KOI8-R, MacCyrillic, IBM855, IBM866, ISO-8859-5, windows-1251 (Cyrillic)
- ISO-8859-2, windows-1250 (Hungarian)
- ISO-8859-5, windows-1251 (Bulgarian)
- windows-1252 (English)
- ISO-8859-7, windows-1253 (Greek)
- ISO-8859-8, windows-1255 (Visual and Logical Hebrew)
- TIS-620 (Thai)
Requires Python 2.6 or later
Command-line Tool
-----------------
chardet comes with a command-line script which reports on the encodings of one
or more files::
% chardetect somefile someotherfile
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
About
-----
This is a continuation of Mark Pilgrim's excellent chardet. Previously, two
versions needed to be maintained: one that supported python 2.x and one that
supported python 3.x. We've recently merged with `Ian Corduscano <https://github.com/sigmavirus24>`_'s
`charade <https://github.com/sigmavirus24/charade>`_ fork, so now we have one
coherent version that works for Python 2.6+.
:maintainer: Dan Blanchard

View file

@ -1,85 +0,0 @@
chardet/cp949prober.py,sha256=FMvdLyB7fejPXRsTbca7LK1P3RUvvssmjUNyaEfz8zY,1782
chardet/mbcssm.py,sha256=UuiA4Ic8vEc0XpTKDneqZyiH2TwGuFVZxOxWJep3X_4,19608
chardet/langhebrewmodel.py,sha256=4ASl5vzKJPng4H278VHKtRYC03TpQpenlHTcsmZH1rE,11318
chardet/charsetgroupprober.py,sha256=0lKk7VE516fgMw119tNefFqLOxKfIE9WfdkpIT69OKU,3791
chardet/euctwfreq.py,sha256=G_I0BW9i1w0ONeeUwIYqV7_U09buIHdqh-wNHVaql7I,34872
chardet/charsetprober.py,sha256=Z48o2KiOj23FNqYH8FqzhH5m1qdm3rI8DcTm2Yqtklg,1902
chardet/jisfreq.py,sha256=ZcL4R5ekHHbP2KCYGakVMBsiKqZZZAABzhwi-uRkOps,47315
chardet/langcyrillicmodel.py,sha256=fkcd5OvogUp-GrNDWAZPgkYsSRCD2omotAEvqjlmLKE,17725
chardet/__init__.py,sha256=8-39Dg2qEuod5DNN7RMdn2ZYOO9zFU3fFfaE80iDWGc,1295
chardet/utf8prober.py,sha256=7tdNZGrJY7jZUBD483GGMkiP0Tx8Fp-cGvWHoAsilHg,2652
chardet/langthaimodel.py,sha256=-k7djh3dGKngAGnt3WfuoJN7acDcWcmHAPojhaUd7q4,11275
chardet/jpcntx.py,sha256=9fJ9oS0BUarcdZNySwmzVRuT03sYdClSmFwXDj3yVNg,19104
chardet/hebrewprober.py,sha256=8pdoUfsVXf_L4BnJde_BewS6H2yInV5688eu0nFhLHY,13359
chardet/sbcsgroupprober.py,sha256=8hLyH8RAG-aohBo7o_KciWVgRo42ZE_zEtuNG1JMRYI,3291
chardet/universaldetector.py,sha256=GkZdwNyNfbFWC8I1uqnzyhOUF7favWCqCOKqdQlx6gQ,6831
chardet/escprober.py,sha256=q5TcQKeVq31WxrW7Sv8yjpZkjEoaHO8S92EJZ9hodys,3187
chardet/euctwprober.py,sha256=upS2P6GuT5ujOxXYw-RJLcT7A4PTuo27KGUKU4UZpIQ,1676
chardet/mbcsgroupprober.py,sha256=SHRzNPLpDXfMJLA8phCHVU0WgqbgDCNxDQMolGX_7yk,1967
chardet/gb2312freq.py,sha256=M2gFdo_qQ_BslStEchrPW5CrPEZEacC0uyDLw4ok-kY,36011
chardet/sjisprober.py,sha256=1RjpQ2LU2gvoEB_4O839xDQVchWx2fG_C7_vXh52P5I,3734
chardet/eucjpprober.py,sha256=5IpfSEjAb7h3hcGMd6dkU80O900C2N6xku28rdYFKuc,3678
chardet/langgreekmodel.py,sha256=QHMy31CH_ot67UCtmurCEKqKx2WwoaKrw2YCYYBK2Lw,12628
chardet/langbulgarianmodel.py,sha256=ZyPsA796MSVhYdfWhMCgKWckupAKAnKqWcE3Cl3ej6o,12784
chardet/gb2312prober.py,sha256=VWnjoRa83Y6V6oczMaxyUr0uy48iCnC2nzk9zfEIRHc,1681
chardet/chardistribution.py,sha256=cUARQFr1oTLXeJCDQrDRkUP778AvSMzhSCnG8VLCV58,9226
chardet/langhungarianmodel.py,sha256=SXwuUzh49_cBeMXhshRHdrhlkz0T8_pZWV_pdqBKNFk,12536
chardet/chardetect.py,sha256=8g-dRSA97bSE6M25Tqe1roKKtl3XHSMnqi6vTzpHNV0,1141
chardet/constants.py,sha256=-UnY8U7EP7z9fTyd09yq35BEkSFEAUAiv9ohd1DW1s4,1335
chardet/codingstatemachine.py,sha256=E85rYhHVMw9xDEJVgiQhp0OnLGr6i2r8_7QOWMKTH08,2318
chardet/latin1prober.py,sha256=g67gqZ2z89LUOlR7BZEAh4-p5a1yGWss9nWy8FCNm8Q,5241
chardet/sbcharsetprober.py,sha256=Xq0lODqJnDgxglBiQI4BqTFiPbn63-0a5XNA5-hVu7U,4793
chardet/compat.py,sha256=5mm6yrHwef1JEG5OxkPJlSq5lkjLVpEGh3iPgFBkpkM,1157
chardet/euckrprober.py,sha256=Wo7dnZ5Erw_nB4H-m5alMiOxOuJUmGHlwCSaGqExDZA,1675
chardet/big5prober.py,sha256=XX96C--6WKYW36mL-z7pJSAtc169Z8ZImByCP4pEN9A,1684
chardet/euckrfreq.py,sha256=T5saK5mImySG5ygQPtsp6o2uKulouCwYm2ElOyFkJqU,45978
chardet/mbcharsetprober.py,sha256=9rOCjDVsmSMp6e7q2syqak22j7lrbUZhJhMee2gbVL0,3268
chardet/escsm.py,sha256=7iljEKN8lXTh8JFXPUSwlibMno6R6ksq4evLxbkzfro,7839
chardet/big5freq.py,sha256=D8oTdz-GM7Jg8TsaWJDm65vM_OLHC3xub6qUJ3rOgsQ,82594
chardet-2.2.1.data/scripts/chardetect,sha256=snDx6K00XbSe_vd7iEHs65beBQMoyABTYGKS4YyykZA,298
chardet-2.2.1.dist-info/pydist.json,sha256=K3LzbyqsBHBv-HwWY25sFHBPy1kxdszEi_gd2AFD_Kg,926
chardet-2.2.1.dist-info/WHEEL,sha256=SXYYsi-y-rEGIva8sB8iKF6bAFD6YDhmqHX5hI3fc0o,110
chardet-2.2.1.dist-info/RECORD,,
chardet-2.2.1.dist-info/top_level.txt,sha256=AowzBbZy4x8EirABDdJSLJZMkJ_53iIag8xfKR6D7kI,8
chardet-2.2.1.dist-info/DESCRIPTION.rst,sha256=m1CcXHsjUJRXdWB4svHusBa6otO4GdUW6LgirEk4V2k,1344
chardet-2.2.1.dist-info/entry_points.txt,sha256=2T00JXwbiQBZQFSKyCFxud4LEQ3_8TKuOwUsSXT-kUI,56
chardet-2.2.1.dist-info/METADATA,sha256=Pzpbxhm72oav1pTeA7pAjXPWGZ_gmYRm9bwvXM8umaw,2013
/srv/openmedialibrary/platform/Shared/home/.local/bin/chardetect,sha256=zPsthwHzIOlO2Mxw0wdp5F7cfd7xSyEpiv11jcEgaEE,220
chardet/__pycache__/langhebrewmodel.cpython-34.pyc,,
chardet/__pycache__/mbcssm.cpython-34.pyc,,
chardet/__pycache__/euckrprober.cpython-34.pyc,,
chardet/__pycache__/chardetect.cpython-34.pyc,,
chardet/__pycache__/cp949prober.cpython-34.pyc,,
chardet/__pycache__/gb2312freq.cpython-34.pyc,,
chardet/__pycache__/universaldetector.cpython-34.pyc,,
chardet/__pycache__/charsetprober.cpython-34.pyc,,
chardet/__pycache__/compat.cpython-34.pyc,,
chardet/__pycache__/__init__.cpython-34.pyc,,
chardet/__pycache__/escprober.cpython-34.pyc,,
chardet/__pycache__/euctwfreq.cpython-34.pyc,,
chardet/__pycache__/langgreekmodel.cpython-34.pyc,,
chardet/__pycache__/codingstatemachine.cpython-34.pyc,,
chardet/__pycache__/hebrewprober.cpython-34.pyc,,
chardet/__pycache__/escsm.cpython-34.pyc,,
chardet/__pycache__/langcyrillicmodel.cpython-34.pyc,,
chardet/__pycache__/euctwprober.cpython-34.pyc,,
chardet/__pycache__/charsetgroupprober.cpython-34.pyc,,
chardet/__pycache__/constants.cpython-34.pyc,,
chardet/__pycache__/chardistribution.cpython-34.pyc,,
chardet/__pycache__/langthaimodel.cpython-34.pyc,,
chardet/__pycache__/utf8prober.cpython-34.pyc,,
chardet/__pycache__/sbcsgroupprober.cpython-34.pyc,,
chardet/__pycache__/big5prober.cpython-34.pyc,,
chardet/__pycache__/langhungarianmodel.cpython-34.pyc,,
chardet/__pycache__/mbcsgroupprober.cpython-34.pyc,,
chardet/__pycache__/big5freq.cpython-34.pyc,,
chardet/__pycache__/sjisprober.cpython-34.pyc,,
chardet/__pycache__/gb2312prober.cpython-34.pyc,,
chardet/__pycache__/langbulgarianmodel.cpython-34.pyc,,
chardet/__pycache__/sbcharsetprober.cpython-34.pyc,,
chardet/__pycache__/jpcntx.cpython-34.pyc,,
chardet/__pycache__/latin1prober.cpython-34.pyc,,
chardet/__pycache__/mbcharsetprober.cpython-34.pyc,,
chardet/__pycache__/euckrfreq.cpython-34.pyc,,
chardet/__pycache__/eucjpprober.cpython-34.pyc,,
chardet/__pycache__/jisfreq.cpython-34.pyc,,

View file

@ -1 +0,0 @@
{"license": "LGPL", "exports": {"console_scripts": {"chardetect": "chardet.chardetect:main"}}, "document_names": {"description": "DESCRIPTION.rst"}, "name": "chardet", "metadata_version": "2.0", "contacts": [{"role": "author", "email": "graffatcolmingov@gmail.com", "name": "Ian Cordasco"}], "generator": "bdist_wheel (0.22.0)", "commands": {"wrap_console": {"chardetect": "chardet.chardetect:main"}}, "summary": "Universal encoding detector for Python 2 and 3", "project_urls": {"Home": "https://github.com/erikrose/chardet"}, "version": "2.2.1", "keywords": "encoding,i18n,xml", "classifiers": ["Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)", "Operating System :: OS Independent", "Programming Language :: Python", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Linguistic"]}

View file

@ -0,0 +1,70 @@
Metadata-Version: 1.1
Name: chardet
Version: 2.3.0
Summary: Universal encoding detector for Python 2 and 3
Home-page: https://github.com/chardet/chardet
Author: Ian Cordasco
Author-email: graffatcolmingov@gmail.com
License: LGPL
Description: Chardet: The Universal Character Encoding Detector
--------------------------------------------------
Detects
- ASCII, UTF-8, UTF-16 (2 variants), UTF-32 (4 variants)
- Big5, GB2312, EUC-TW, HZ-GB-2312, ISO-2022-CN (Traditional and Simplified Chinese)
- EUC-JP, SHIFT_JIS, CP932, ISO-2022-JP (Japanese)
- EUC-KR, ISO-2022-KR (Korean)
- KOI8-R, MacCyrillic, IBM855, IBM866, ISO-8859-5, windows-1251 (Cyrillic)
- ISO-8859-2, windows-1250 (Hungarian)
- ISO-8859-5, windows-1251 (Bulgarian)
- windows-1252 (English)
- ISO-8859-7, windows-1253 (Greek)
- ISO-8859-8, windows-1255 (Visual and Logical Hebrew)
- TIS-620 (Thai)
Requires Python 2.6 or later
Installation
------------
Install from `PyPI <https://pypi.python.org/pypi/chardet>`_::
pip install chardet
Command-line Tool
-----------------
chardet comes with a command-line script which reports on the encodings of one
or more files::
% chardetect somefile someotherfile
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
About
-----
This is a continuation of Mark Pilgrim's excellent chardet. Previously, two
versions needed to be maintained: one that supported python 2.x and one that
supported python 3.x. We've recently merged with `Ian Cordasco <https://github.com/sigmavirus24>`_'s
`charade <https://github.com/sigmavirus24/charade>`_ fork, so now we have one
coherent version that works for Python 2.6+.
:maintainer: Dan Blanchard
Keywords: encoding,i18n,xml
Platform: UNKNOWN
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 2
Classifier: Programming Language :: Python :: 2.6
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.2
Classifier: Programming Language :: Python :: 3.3
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: Text Processing :: Linguistic

View file

@ -0,0 +1,48 @@
LICENSE
MANIFEST.in
README.rst
setup.cfg
setup.py
chardet/__init__.py
chardet/big5freq.py
chardet/big5prober.py
chardet/chardetect.py
chardet/chardistribution.py
chardet/charsetgroupprober.py
chardet/charsetprober.py
chardet/codingstatemachine.py
chardet/compat.py
chardet/constants.py
chardet/cp949prober.py
chardet/escprober.py
chardet/escsm.py
chardet/eucjpprober.py
chardet/euckrfreq.py
chardet/euckrprober.py
chardet/euctwfreq.py
chardet/euctwprober.py
chardet/gb2312freq.py
chardet/gb2312prober.py
chardet/hebrewprober.py
chardet/jisfreq.py
chardet/jpcntx.py
chardet/langbulgarianmodel.py
chardet/langcyrillicmodel.py
chardet/langgreekmodel.py
chardet/langhebrewmodel.py
chardet/langhungarianmodel.py
chardet/langthaimodel.py
chardet/latin1prober.py
chardet/mbcharsetprober.py
chardet/mbcsgroupprober.py
chardet/mbcssm.py
chardet/sbcharsetprober.py
chardet/sbcsgroupprober.py
chardet/sjisprober.py
chardet/universaldetector.py
chardet/utf8prober.py
chardet.egg-info/PKG-INFO
chardet.egg-info/SOURCES.txt
chardet.egg-info/dependency_links.txt
chardet.egg-info/entry_points.txt
chardet.egg-info/top_level.txt

View file

@ -0,0 +1,83 @@
../chardet/__init__.py
../chardet/big5freq.py
../chardet/big5prober.py
../chardet/chardetect.py
../chardet/chardistribution.py
../chardet/charsetgroupprober.py
../chardet/charsetprober.py
../chardet/codingstatemachine.py
../chardet/compat.py
../chardet/constants.py
../chardet/cp949prober.py
../chardet/escprober.py
../chardet/escsm.py
../chardet/eucjpprober.py
../chardet/euckrfreq.py
../chardet/euckrprober.py
../chardet/euctwfreq.py
../chardet/euctwprober.py
../chardet/gb2312freq.py
../chardet/gb2312prober.py
../chardet/hebrewprober.py
../chardet/jisfreq.py
../chardet/jpcntx.py
../chardet/langbulgarianmodel.py
../chardet/langcyrillicmodel.py
../chardet/langgreekmodel.py
../chardet/langhebrewmodel.py
../chardet/langhungarianmodel.py
../chardet/langthaimodel.py
../chardet/latin1prober.py
../chardet/mbcharsetprober.py
../chardet/mbcsgroupprober.py
../chardet/mbcssm.py
../chardet/sbcharsetprober.py
../chardet/sbcsgroupprober.py
../chardet/sjisprober.py
../chardet/universaldetector.py
../chardet/utf8prober.py
../chardet/__pycache__/__init__.cpython-34.pyc
../chardet/__pycache__/big5freq.cpython-34.pyc
../chardet/__pycache__/big5prober.cpython-34.pyc
../chardet/__pycache__/chardetect.cpython-34.pyc
../chardet/__pycache__/chardistribution.cpython-34.pyc
../chardet/__pycache__/charsetgroupprober.cpython-34.pyc
../chardet/__pycache__/charsetprober.cpython-34.pyc
../chardet/__pycache__/codingstatemachine.cpython-34.pyc
../chardet/__pycache__/compat.cpython-34.pyc
../chardet/__pycache__/constants.cpython-34.pyc
../chardet/__pycache__/cp949prober.cpython-34.pyc
../chardet/__pycache__/escprober.cpython-34.pyc
../chardet/__pycache__/escsm.cpython-34.pyc
../chardet/__pycache__/eucjpprober.cpython-34.pyc
../chardet/__pycache__/euckrfreq.cpython-34.pyc
../chardet/__pycache__/euckrprober.cpython-34.pyc
../chardet/__pycache__/euctwfreq.cpython-34.pyc
../chardet/__pycache__/euctwprober.cpython-34.pyc
../chardet/__pycache__/gb2312freq.cpython-34.pyc
../chardet/__pycache__/gb2312prober.cpython-34.pyc
../chardet/__pycache__/hebrewprober.cpython-34.pyc
../chardet/__pycache__/jisfreq.cpython-34.pyc
../chardet/__pycache__/jpcntx.cpython-34.pyc
../chardet/__pycache__/langbulgarianmodel.cpython-34.pyc
../chardet/__pycache__/langcyrillicmodel.cpython-34.pyc
../chardet/__pycache__/langgreekmodel.cpython-34.pyc
../chardet/__pycache__/langhebrewmodel.cpython-34.pyc
../chardet/__pycache__/langhungarianmodel.cpython-34.pyc
../chardet/__pycache__/langthaimodel.cpython-34.pyc
../chardet/__pycache__/latin1prober.cpython-34.pyc
../chardet/__pycache__/mbcharsetprober.cpython-34.pyc
../chardet/__pycache__/mbcsgroupprober.cpython-34.pyc
../chardet/__pycache__/mbcssm.cpython-34.pyc
../chardet/__pycache__/sbcharsetprober.cpython-34.pyc
../chardet/__pycache__/sbcsgroupprober.cpython-34.pyc
../chardet/__pycache__/sjisprober.cpython-34.pyc
../chardet/__pycache__/universaldetector.cpython-34.pyc
../chardet/__pycache__/utf8prober.cpython-34.pyc
./
dependency_links.txt
entry_points.txt
PKG-INFO
SOURCES.txt
top_level.txt
../../../../bin/chardetect

View file

@ -15,7 +15,7 @@
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
__version__ = "2.2.1" __version__ = "2.3.0"
from sys import version_info from sys import version_info

View file

@ -12,34 +12,68 @@ Example::
If no paths are provided, it takes its input from stdin. If no paths are provided, it takes its input from stdin.
""" """
from io import open
from sys import argv, stdin
from __future__ import absolute_import, print_function, unicode_literals
import argparse
import sys
from io import open
from chardet import __version__
from chardet.universaldetector import UniversalDetector from chardet.universaldetector import UniversalDetector
def description_of(file, name='stdin'): def description_of(lines, name='stdin'):
"""Return a string describing the probable encoding of a file.""" """
Return a string describing the probable encoding of a file or
list of strings.
:param lines: The lines to get the encoding of.
:type lines: Iterable of bytes
:param name: Name of file or collection of lines
:type name: str
"""
u = UniversalDetector() u = UniversalDetector()
for line in file: for line in lines:
u.feed(line) u.feed(line)
u.close() u.close()
result = u.result result = u.result
if result['encoding']: if result['encoding']:
return '%s: %s with confidence %s' % (name, return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
result['encoding'], result['confidence'])
result['confidence'])
else: else:
return '%s: no result' % name return '{0}: no result'.format(name)
def main(): def main(argv=None):
if len(argv) <= 1: '''
print(description_of(stdin)) Handles command line arguments and gets things started.
else:
for path in argv[1:]: :param argv: List of arguments, as if specified on the command-line.
with open(path, 'rb') as f: If None, ``sys.argv[1:]`` is used instead.
print(description_of(f, path)) :type argv: list of str
'''
# Get command line arguments
parser = argparse.ArgumentParser(
description="Takes one or more file paths and reports their detected \
encodings",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
conflict_handler='resolve')
parser.add_argument('input',
help='File whose encoding we would like to determine.',
type=argparse.FileType('rb'), nargs='*',
default=[sys.stdin])
parser.add_argument('--version', action='version',
version='%(prog)s {0}'.format(__version__))
args = parser.parse_args(argv)
for f in args.input:
if f.isatty():
print("You are running chardetect interactively. Press " +
"CTRL-D twice at the start of a blank line to signal the " +
"end of your input. If you want help, run chardetect " +
"--help\n", file=sys.stderr)
print(description_of(f, f.name))
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -177,6 +177,12 @@ class JapaneseContextAnalysis:
return -1, 1 return -1, 1
class SJISContextAnalysis(JapaneseContextAnalysis): class SJISContextAnalysis(JapaneseContextAnalysis):
def __init__(self):
self.charset_name = "SHIFT_JIS"
def get_charset_name(self):
return self.charset_name
def get_order(self, aBuf): def get_order(self, aBuf):
if not aBuf: if not aBuf:
return -1, 1 return -1, 1
@ -184,6 +190,8 @@ class SJISContextAnalysis(JapaneseContextAnalysis):
first_char = wrap_ord(aBuf[0]) first_char = wrap_ord(aBuf[0])
if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)): if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):
charLen = 2 charLen = 2
if (first_char == 0x87) or (0xFA <= first_char <= 0xFC):
self.charset_name = "CP932"
else: else:
charLen = 1 charLen = 1

View file

@ -129,11 +129,11 @@ class Latin1Prober(CharSetProber):
if total < 0.01: if total < 0.01:
confidence = 0.0 confidence = 0.0
else: else:
confidence = ((self._mFreqCounter[3] / total) confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0)
- (self._mFreqCounter[1] * 20.0 / total)) / total)
if confidence < 0.0: if confidence < 0.0:
confidence = 0.0 confidence = 0.0
# lower the confidence of latin1 so that other more accurate # lower the confidence of latin1 so that other more accurate
# detector can take priority. # detector can take priority.
confidence = confidence * 0.5 confidence = confidence * 0.73
return confidence return confidence

View file

@ -353,7 +353,7 @@ SJIS_cls = (
2,2,2,2,2,2,2,2, # 68 - 6f 2,2,2,2,2,2,2,2, # 68 - 6f
2,2,2,2,2,2,2,2, # 70 - 77 2,2,2,2,2,2,2,2, # 70 - 77
2,2,2,2,2,2,2,1, # 78 - 7f 2,2,2,2,2,2,2,1, # 78 - 7f
3,3,3,3,3,3,3,3, # 80 - 87 3,3,3,3,3,2,2,3, # 80 - 87
3,3,3,3,3,3,3,3, # 88 - 8f 3,3,3,3,3,3,3,3, # 88 - 8f
3,3,3,3,3,3,3,3, # 90 - 97 3,3,3,3,3,3,3,3, # 90 - 97
3,3,3,3,3,3,3,3, # 98 - 9f 3,3,3,3,3,3,3,3, # 98 - 9f
@ -369,9 +369,8 @@ SJIS_cls = (
2,2,2,2,2,2,2,2, # d8 - df 2,2,2,2,2,2,2,2, # d8 - df
3,3,3,3,3,3,3,3, # e0 - e7 3,3,3,3,3,3,3,3, # e0 - e7
3,3,3,3,3,4,4,4, # e8 - ef 3,3,3,3,3,4,4,4, # e8 - ef
4,4,4,4,4,4,4,4, # f0 - f7 3,3,3,3,3,3,3,3, # f0 - f7
4,4,4,4,4,0,0,0 # f8 - ff 3,3,3,3,3,0,0,0) # f8 - ff
)
SJIS_st = ( SJIS_st = (
@ -571,5 +570,3 @@ UTF8SMModel = {'classTable': UTF8_cls,
'stateTable': UTF8_st, 'stateTable': UTF8_st,
'charLenTable': UTF8CharLenTable, 'charLenTable': UTF8CharLenTable,
'name': 'UTF-8'} 'name': 'UTF-8'}
# flake8: noqa

View file

@ -47,7 +47,7 @@ class SJISProber(MultiByteCharSetProber):
self._mContextAnalyzer.reset() self._mContextAnalyzer.reset()
def get_charset_name(self): def get_charset_name(self):
return "SHIFT_JIS" return self._mContextAnalyzer.get_charset_name()
def feed(self, aBuf): def feed(self, aBuf):
aLen = len(aBuf) aLen = len(aBuf)

View file

@ -71,9 +71,9 @@ class UniversalDetector:
if not self._mGotData: if not self._mGotData:
# If the data starts with BOM, we know it is UTF # If the data starts with BOM, we know it is UTF
if aBuf[:3] == codecs.BOM: if aBuf[:3] == codecs.BOM_UTF8:
# EF BB BF UTF-8 with BOM # EF BB BF UTF-8 with BOM
self.result = {'encoding': "UTF-8", 'confidence': 1.0} self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0}
elif aBuf[:4] == codecs.BOM_UTF32_LE: elif aBuf[:4] == codecs.BOM_UTF32_LE:
# FF FE 00 00 UTF-32, little-endian BOM # FF FE 00 00 UTF-32, little-endian BOM
self.result = {'encoding': "UTF-32LE", 'confidence': 1.0} self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}

View file

@ -1,7 +1,7 @@
./ ./
dependency_links.txt dependency_links.txt
PKG-INFO PKG-INFO
SOURCES.txt
zip-safe
top_level.txt
requires.txt requires.txt
SOURCES.txt
top_level.txt
zip-safe

View file

@ -1,12 +1,12 @@
Metadata-Version: 1.1 Metadata-Version: 1.1
Name: feedparser Name: feedparser
Version: 5.1.3 Version: 5.2.1
Summary: Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds Summary: Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds
Home-page: http://code.google.com/p/feedparser/ Home-page: https://github.com/kurtmckee/feedparser
Author: Kurt McKee Author: Kurt McKee
Author-email: contactme@kurtmckee.org Author-email: contactme@kurtmckee.org
License: UNKNOWN License: UNKNOWN
Download-URL: http://code.google.com/p/feedparser/ Download-URL: https://pypi.python.org/pypi/feedparser
Description: UNKNOWN Description: UNKNOWN
Keywords: atom,cdf,feed,parser,rdf,rss Keywords: atom,cdf,feed,parser,rdf,rss
Platform: POSIX Platform: POSIX
@ -26,5 +26,6 @@ Classifier: Programming Language :: Python :: 3.0
Classifier: Programming Language :: Python :: 3.1 Classifier: Programming Language :: Python :: 3.1
Classifier: Programming Language :: Python :: 3.2 Classifier: Programming Language :: Python :: 3.2
Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4
Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: Text Processing :: Markup :: XML Classifier: Topic :: Text Processing :: Markup :: XML

View file

@ -1,7 +1,7 @@
LICENSE LICENSE
MANIFEST.in MANIFEST.in
NEWS NEWS
README README.rst
setup.cfg setup.cfg
setup.py setup.py
docs/add_custom_css.py docs/add_custom_css.py
@ -46,7 +46,6 @@ docs/http.rst
docs/index.rst docs/index.rst
docs/introduction.rst docs/introduction.rst
docs/license.rst docs/license.rst
docs/microformats.rst
docs/namespace-handling.rst docs/namespace-handling.rst
docs/reference-bozo.rst docs/reference-bozo.rst
docs/reference-bozo_exception.rst docs/reference-bozo_exception.rst
@ -77,8 +76,6 @@ docs/reference-entry-title.rst
docs/reference-entry-title_detail.rst docs/reference-entry-title_detail.rst
docs/reference-entry-updated.rst docs/reference-entry-updated.rst
docs/reference-entry-updated_parsed.rst docs/reference-entry-updated_parsed.rst
docs/reference-entry-vcard.rst
docs/reference-entry-xfn.rst
docs/reference-entry.rst docs/reference-entry.rst
docs/reference-etag.rst docs/reference-etag.rst
docs/reference-feed-author.rst docs/reference-feed-author.rst
@ -623,15 +620,14 @@ feedparser/tests/http/http_status_303.xml
feedparser/tests/http/http_status_304.xml feedparser/tests/http/http_status_304.xml
feedparser/tests/http/http_status_307.xml feedparser/tests/http/http_status_307.xml
feedparser/tests/http/http_status_404.xml feedparser/tests/http/http_status_404.xml
feedparser/tests/http/http_status_9001.xml
feedparser/tests/http/target.xml feedparser/tests/http/target.xml
feedparser/tests/illformed/aaa_illformed.xml feedparser/tests/illformed/aaa_illformed.xml
feedparser/tests/illformed/always_strip_doctype.xml feedparser/tests/illformed/always_strip_doctype.xml
feedparser/tests/illformed/http_high_bit_date.xml feedparser/tests/illformed/http_high_bit_date.xml
feedparser/tests/illformed/non-ascii-tag.xml
feedparser/tests/illformed/rdf_channel_empty_textinput.xml feedparser/tests/illformed/rdf_channel_empty_textinput.xml
feedparser/tests/illformed/rss_empty_document.xml feedparser/tests/illformed/rss_empty_document.xml
feedparser/tests/illformed/rss_incomplete_cdata.xml feedparser/tests/illformed/rss_incomplete_cdata.xml
feedparser/tests/illformed/rss_item_link_corrupted_ampersands.xml
feedparser/tests/illformed/undeclared_namespace.xml feedparser/tests/illformed/undeclared_namespace.xml
feedparser/tests/illformed/chardet/big5.xml feedparser/tests/illformed/chardet/big5.xml
feedparser/tests/illformed/chardet/eucjp.xml feedparser/tests/illformed/chardet/eucjp.xml
@ -641,131 +637,8 @@ feedparser/tests/illformed/chardet/koi8r.xml
feedparser/tests/illformed/chardet/shiftjis.xml feedparser/tests/illformed/chardet/shiftjis.xml
feedparser/tests/illformed/chardet/tis620.xml feedparser/tests/illformed/chardet/tis620.xml
feedparser/tests/illformed/chardet/windows1255.xml feedparser/tests/illformed/chardet/windows1255.xml
feedparser/tests/microformats/hcard/2-4-2-vcard.xml feedparser/tests/illformed/geo/georss_point_no_coords.xml
feedparser/tests/microformats/hcard/3-1-1-fn-unicode-char.xml feedparser/tests/illformed/geo/georss_polygon_insufficient_coords.xml
feedparser/tests/microformats/hcard/3-1-1-fn.xml
feedparser/tests/microformats/hcard/3-1-2-n-2-plural.xml
feedparser/tests/microformats/hcard/3-1-2-n-2-singular.xml
feedparser/tests/microformats/hcard/3-1-2-n-plural.xml
feedparser/tests/microformats/hcard/3-1-2-n-singular.xml
feedparser/tests/microformats/hcard/3-1-3-nickname-2-plural.xml
feedparser/tests/microformats/hcard/3-1-3-nickname-2-singular.xml
feedparser/tests/microformats/hcard/3-1-3-nickname.xml
feedparser/tests/microformats/hcard/3-1-4-photo-inline.xml
feedparser/tests/microformats/hcard/3-1-4-photo.xml
feedparser/tests/microformats/hcard/3-1-5-bday-2.xml
feedparser/tests/microformats/hcard/3-1-5-bday-3.xml
feedparser/tests/microformats/hcard/3-1-5-bday.xml
feedparser/tests/microformats/hcard/3-2-1-adr.xml
feedparser/tests/microformats/hcard/3-2-2-label.xml
feedparser/tests/microformats/hcard/3-3-1-tel.xml
feedparser/tests/microformats/hcard/3-3-2-email-2.xml
feedparser/tests/microformats/hcard/3-3-2-email-3.xml
feedparser/tests/microformats/hcard/3-3-2-email.xml
feedparser/tests/microformats/hcard/3-3-3-mailer.xml
feedparser/tests/microformats/hcard/3-4-1-tz-2.xml
feedparser/tests/microformats/hcard/3-4-1-tz.xml
feedparser/tests/microformats/hcard/3-4-2-geo.xml
feedparser/tests/microformats/hcard/3-5-1-title.xml
feedparser/tests/microformats/hcard/3-5-2-role.xml
feedparser/tests/microformats/hcard/3-5-3-logo-2.xml
feedparser/tests/microformats/hcard/3-5-3-logo.xml
feedparser/tests/microformats/hcard/3-5-4-agent-2.xml
feedparser/tests/microformats/hcard/3-5-4-agent.xml
feedparser/tests/microformats/hcard/3-5-5-org.xml
feedparser/tests/microformats/hcard/3-6-1-categories-2-plural.xml
feedparser/tests/microformats/hcard/3-6-1-categories-2-singular.xml
feedparser/tests/microformats/hcard/3-6-1-categories.xml
feedparser/tests/microformats/hcard/3-6-2-note.xml
feedparser/tests/microformats/hcard/3-6-4-rev-2.xml
feedparser/tests/microformats/hcard/3-6-4-rev.xml
feedparser/tests/microformats/hcard/3-6-5-sort-string-2.xml
feedparser/tests/microformats/hcard/3-6-5-sort-string-3.xml
feedparser/tests/microformats/hcard/3-6-5-sort-string-4.xml
feedparser/tests/microformats/hcard/3-6-5-sort-string-5.xml
feedparser/tests/microformats/hcard/3-6-5-sort-string.xml
feedparser/tests/microformats/hcard/3-6-6-sound-2.xml
feedparser/tests/microformats/hcard/3-6-6-sound.xml
feedparser/tests/microformats/hcard/3-6-7-uid.xml
feedparser/tests/microformats/hcard/3-6-8-url.xml
feedparser/tests/microformats/hcard/3-7-1-class-2.xml
feedparser/tests/microformats/hcard/3-7-1-class-3.xml
feedparser/tests/microformats/hcard/3-7-1-class.xml
feedparser/tests/microformats/hcard/3-7-2-key.xml
feedparser/tests/microformats/hcard/7-authors.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_avi.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_bin.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_bz2.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_deb.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_dmg.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_exe.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_gz.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_hqx.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_img.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_iso.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_jar.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_m4a.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_m4v.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_mp2.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_mp3.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_mp4.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_msi.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_ogg.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_rar.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_rpm.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_sit.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_sitx.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_tar.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_tbz2.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_tgz.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_wma.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_wmv.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_z.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_zip.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_type_application_ogg.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_type_audio.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_type_video.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_invalid.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_no_autodetect.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_no_autodetect_xml.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_title.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_title_from_link_text.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_title_overrides_link_text.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_type.xml
feedparser/tests/microformats/rel_tag/rel_tag_duplicate.xml
feedparser/tests/microformats/rel_tag/rel_tag_label.xml
feedparser/tests/microformats/rel_tag/rel_tag_scheme.xml
feedparser/tests/microformats/rel_tag/rel_tag_term.xml
feedparser/tests/microformats/rel_tag/rel_tag_term_trailing_slash.xml
feedparser/tests/microformats/xfn/xfn_acquaintance.xml
feedparser/tests/microformats/xfn/xfn_brother.xml
feedparser/tests/microformats/xfn/xfn_child.xml
feedparser/tests/microformats/xfn/xfn_co-resident.xml
feedparser/tests/microformats/xfn/xfn_co-worker.xml
feedparser/tests/microformats/xfn/xfn_colleague.xml
feedparser/tests/microformats/xfn/xfn_contact.xml
feedparser/tests/microformats/xfn/xfn_coresident.xml
feedparser/tests/microformats/xfn/xfn_coworker.xml
feedparser/tests/microformats/xfn/xfn_crush.xml
feedparser/tests/microformats/xfn/xfn_date.xml
feedparser/tests/microformats/xfn/xfn_friend.xml
feedparser/tests/microformats/xfn/xfn_href.xml
feedparser/tests/microformats/xfn/xfn_husband.xml
feedparser/tests/microformats/xfn/xfn_kin.xml
feedparser/tests/microformats/xfn/xfn_me.xml
feedparser/tests/microformats/xfn/xfn_met.xml
feedparser/tests/microformats/xfn/xfn_multiple.xml
feedparser/tests/microformats/xfn/xfn_muse.xml
feedparser/tests/microformats/xfn/xfn_name.xml
feedparser/tests/microformats/xfn/xfn_neighbor.xml
feedparser/tests/microformats/xfn/xfn_parent.xml
feedparser/tests/microformats/xfn/xfn_relative.xml
feedparser/tests/microformats/xfn/xfn_sibling.xml
feedparser/tests/microformats/xfn/xfn_sister.xml
feedparser/tests/microformats/xfn/xfn_spouse.xml
feedparser/tests/microformats/xfn/xfn_sweetheart.xml
feedparser/tests/microformats/xfn/xfn_wife.xml
feedparser/tests/wellformed/amp/amp01.xml feedparser/tests/wellformed/amp/amp01.xml
feedparser/tests/wellformed/amp/amp02.xml feedparser/tests/wellformed/amp/amp02.xml
feedparser/tests/wellformed/amp/amp03.xml feedparser/tests/wellformed/amp/amp03.xml
@ -988,6 +861,7 @@ feedparser/tests/wellformed/atom/feed_title_inline_markup_2.xml
feedparser/tests/wellformed/atom/feed_title_naked_markup.xml feedparser/tests/wellformed/atom/feed_title_naked_markup.xml
feedparser/tests/wellformed/atom/feed_title_text_plain.xml feedparser/tests/wellformed/atom/feed_title_text_plain.xml
feedparser/tests/wellformed/atom/feed_updated_parsed.xml feedparser/tests/wellformed/atom/feed_updated_parsed.xml
feedparser/tests/wellformed/atom/media_group.xml
feedparser/tests/wellformed/atom/media_player1.xml feedparser/tests/wellformed/atom/media_player1.xml
feedparser/tests/wellformed/atom/media_thumbnail.xml feedparser/tests/wellformed/atom/media_thumbnail.xml
feedparser/tests/wellformed/atom/relative_uri.xml feedparser/tests/wellformed/atom/relative_uri.xml
@ -1382,6 +1256,16 @@ feedparser/tests/wellformed/cdf/item_lastmod.xml
feedparser/tests/wellformed/cdf/item_lastmod_parsed.xml feedparser/tests/wellformed/cdf/item_lastmod_parsed.xml
feedparser/tests/wellformed/cdf/item_title.xml feedparser/tests/wellformed/cdf/item_title.xml
feedparser/tests/wellformed/feedburner/feedburner_browserfriendly.xml feedparser/tests/wellformed/feedburner/feedburner_browserfriendly.xml
feedparser/tests/wellformed/geo/georss_box.xml
feedparser/tests/wellformed/geo/georss_line.xml
feedparser/tests/wellformed/geo/georss_point.xml
feedparser/tests/wellformed/geo/georss_polygon.xml
feedparser/tests/wellformed/geo/gml_linestring.xml
feedparser/tests/wellformed/geo/gml_linestring_utm.xml
feedparser/tests/wellformed/geo/gml_point.xml
feedparser/tests/wellformed/geo/gml_point_3d.xml
feedparser/tests/wellformed/geo/gml_point_utm.xml
feedparser/tests/wellformed/geo/gml_polygon.xml
feedparser/tests/wellformed/http/headers_content_location-relative.xml feedparser/tests/wellformed/http/headers_content_location-relative.xml
feedparser/tests/wellformed/http/headers_content_location-unsafe.xml feedparser/tests/wellformed/http/headers_content_location-unsafe.xml
feedparser/tests/wellformed/http/headers_etag.xml feedparser/tests/wellformed/http/headers_etag.xml
@ -1508,8 +1392,6 @@ feedparser/tests/wellformed/lang/item_fullitem_xml_lang.xml
feedparser/tests/wellformed/lang/item_fullitem_xml_lang_inherit.xml feedparser/tests/wellformed/lang/item_fullitem_xml_lang_inherit.xml
feedparser/tests/wellformed/lang/item_xhtml_body_xml_lang.xml feedparser/tests/wellformed/lang/item_xhtml_body_xml_lang.xml
feedparser/tests/wellformed/lang/item_xhtml_body_xml_lang_inherit.xml feedparser/tests/wellformed/lang/item_xhtml_body_xml_lang_inherit.xml
feedparser/tests/wellformed/mf_hcard/3-5-5-org-unicode.xml
feedparser/tests/wellformed/mf_rel_tag/rel_tag_term_no_term.xml
feedparser/tests/wellformed/namespace/atommathml.xml feedparser/tests/wellformed/namespace/atommathml.xml
feedparser/tests/wellformed/namespace/atomsvg.xml feedparser/tests/wellformed/namespace/atomsvg.xml
feedparser/tests/wellformed/namespace/atomsvgdctitle.xml feedparser/tests/wellformed/namespace/atomsvgdctitle.xml
@ -1532,9 +1414,16 @@ feedparser/tests/wellformed/namespace/rss2.0svg5.xml
feedparser/tests/wellformed/namespace/rss2.0svgtitle.xml feedparser/tests/wellformed/namespace/rss2.0svgtitle.xml
feedparser/tests/wellformed/namespace/rss2.0withAtomNS.xml feedparser/tests/wellformed/namespace/rss2.0withAtomNS.xml
feedparser/tests/wellformed/namespace/rss2.0xlink.xml feedparser/tests/wellformed/namespace/rss2.0xlink.xml
feedparser/tests/wellformed/namespace/unknown-namespace.xml
feedparser/tests/wellformed/node_precedence/atom10_arbitrary_element.xml feedparser/tests/wellformed/node_precedence/atom10_arbitrary_element.xml
feedparser/tests/wellformed/node_precedence/atom10_id.xml feedparser/tests/wellformed/node_precedence/atom10_id.xml
feedparser/tests/wellformed/node_precedence/atom10_title.xml feedparser/tests/wellformed/node_precedence/atom10_title.xml
feedparser/tests/wellformed/psc/atomsimplechapter.xml
feedparser/tests/wellformed/psc/atomsimplechapterexternal.xml
feedparser/tests/wellformed/psc/ignore_multiple_psc_chapters.xml
feedparser/tests/wellformed/psc/rss2.0simplechapter.xml
feedparser/tests/wellformed/psc/rss2.0simplechapter2items.xml
feedparser/tests/wellformed/psc/version_1.0_keyerror.xml
feedparser/tests/wellformed/rdf/doctype_contains_entity_decl.xml feedparser/tests/wellformed/rdf/doctype_contains_entity_decl.xml
feedparser/tests/wellformed/rdf/rdf_channel_description.xml feedparser/tests/wellformed/rdf/rdf_channel_description.xml
feedparser/tests/wellformed/rdf/rdf_channel_link.xml feedparser/tests/wellformed/rdf/rdf_channel_link.xml
@ -1587,6 +1476,12 @@ feedparser/tests/wellformed/rss/channel_dcterms_issued.xml
feedparser/tests/wellformed/rss/channel_dcterms_issued_parsed.xml feedparser/tests/wellformed/rss/channel_dcterms_issued_parsed.xml
feedparser/tests/wellformed/rss/channel_dcterms_modified.xml feedparser/tests/wellformed/rss/channel_dcterms_modified.xml
feedparser/tests/wellformed/rss/channel_dcterms_modified_parsed.xml feedparser/tests/wellformed/rss/channel_dcterms_modified_parsed.xml
feedparser/tests/wellformed/rss/channel_dcterms_validity.xml
feedparser/tests/wellformed/rss/channel_dcterms_validity_end.xml
feedparser/tests/wellformed/rss/channel_dcterms_validity_end_parsed.xml
feedparser/tests/wellformed/rss/channel_dcterms_validity_parsed.xml
feedparser/tests/wellformed/rss/channel_dcterms_validity_start.xml
feedparser/tests/wellformed/rss/channel_dcterms_validity_start_parsed.xml
feedparser/tests/wellformed/rss/channel_description.xml feedparser/tests/wellformed/rss/channel_description.xml
feedparser/tests/wellformed/rss/channel_description_escaped_markup.xml feedparser/tests/wellformed/rss/channel_description_escaped_markup.xml
feedparser/tests/wellformed/rss/channel_description_map_tagline.xml feedparser/tests/wellformed/rss/channel_description_map_tagline.xml
@ -1693,6 +1588,7 @@ feedparser/tests/wellformed/rss/item_fullitem_type.xml
feedparser/tests/wellformed/rss/item_guid.xml feedparser/tests/wellformed/rss/item_guid.xml
feedparser/tests/wellformed/rss/item_guid_conflict_link.xml feedparser/tests/wellformed/rss/item_guid_conflict_link.xml
feedparser/tests/wellformed/rss/item_guid_guidislink.xml feedparser/tests/wellformed/rss/item_guid_guidislink.xml
feedparser/tests/wellformed/rss/item_guid_isPermaLink_ValueError.xml
feedparser/tests/wellformed/rss/item_guid_isPermaLink_conflict_link.xml feedparser/tests/wellformed/rss/item_guid_isPermaLink_conflict_link.xml
feedparser/tests/wellformed/rss/item_guid_isPermaLink_conflict_link_not_guidislink.xml feedparser/tests/wellformed/rss/item_guid_isPermaLink_conflict_link_not_guidislink.xml
feedparser/tests/wellformed/rss/item_guid_isPermaLink_guidislink.xml feedparser/tests/wellformed/rss/item_guid_isPermaLink_guidislink.xml
@ -1706,6 +1602,8 @@ feedparser/tests/wellformed/rss/item_guid_not_permalink_not_url.xml
feedparser/tests/wellformed/rss/item_image_link_bleed.xml feedparser/tests/wellformed/rss/item_image_link_bleed.xml
feedparser/tests/wellformed/rss/item_image_link_conflict.xml feedparser/tests/wellformed/rss/item_image_link_conflict.xml
feedparser/tests/wellformed/rss/item_link.xml feedparser/tests/wellformed/rss/item_link.xml
feedparser/tests/wellformed/rss/item_madeup_tags_element.xml
feedparser/tests/wellformed/rss/item_multiple_dc_creator.xml
feedparser/tests/wellformed/rss/item_pubDate.xml feedparser/tests/wellformed/rss/item_pubDate.xml
feedparser/tests/wellformed/rss/item_pubDate_map_updated_parsed.xml feedparser/tests/wellformed/rss/item_pubDate_map_updated_parsed.xml
feedparser/tests/wellformed/rss/item_source.xml feedparser/tests/wellformed/rss/item_source.xml

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
Metadata-Version: 1.1 Metadata-Version: 1.1
Name: html5lib Name: html5lib
Version: 0.999 Version: 0.9999999
Summary: HTML parser based on the WHATWG HTML specifcation Summary: HTML parser based on the WHATWG HTML specification
Home-page: https://github.com/html5lib/html5lib-python Home-page: https://github.com/html5lib/html5lib-python
Author: James Graham Author: James Graham
Author-email: james@hoppipolla.co.uk Author-email: james@hoppipolla.co.uk
@ -167,6 +167,61 @@ Description: html5lib
Change Log Change Log
---------- ----------
0.9999999/1.0b8
~~~~~~~~~~~~~~~
Released on September 10, 2015
* Fix #195: fix the sanitizer to drop broken URLs (it threw an
exception between 0.9999 and 0.999999).
0.999999/1.0b7
~~~~~~~~~~~~~~
Released on July 7, 2015
* Fix #189: fix the sanitizer to allow relative URLs again (as it did
prior to 0.9999/1.0b5).
0.99999/1.0b6
~~~~~~~~~~~~~
Released on April 30, 2015
* Fix #188: fix the sanitizer to not throw an exception when sanitizing
bogus data URLs.
0.9999/1.0b5
~~~~~~~~~~~~
Released on April 29, 2015
* Fix #153: Sanitizer fails to treat some attributes as URLs. Despite how
this sounds, this has no known security implications. No known version
of IE (5.5 to current), Firefox (3 to current), Safari (6 to current),
Chrome (1 to current), or Opera (12 to current) will run any script
provided in these attributes.
* Pass error message to the ParseError exception in strict parsing mode.
* Allow data URIs in the sanitizer, with a whitelist of content-types.
* Add support for Python implementations that don't support lone
surrogates (read: Jython). Fixes #2.
* Remove localization of error messages. This functionality was totally
unused (and untested that everything was localizable), so we may as
well follow numerous browsers in not supporting translating technical
strings.
* Expose treewalkers.pprint as a public API.
* Add a documentEncoding property to HTML5Parser, fix #121.
0.999 0.999
~~~~~ ~~~~~
@ -340,5 +395,6 @@ Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.2 Classifier: Programming Language :: Python :: 3.2
Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4
Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: Text Processing :: Markup :: HTML Classifier: Topic :: Text Processing :: Markup :: HTML

View file

@ -1,78 +1,78 @@
../html5lib/utils.py
../html5lib/ihatexml.py
../html5lib/__init__.py ../html5lib/__init__.py
../html5lib/tokenizer.py
../html5lib/html5parser.py
../html5lib/sanitizer.py
../html5lib/inputstream.py
../html5lib/constants.py ../html5lib/constants.py
../html5lib/serializer/__init__.py ../html5lib/html5parser.py
../html5lib/serializer/htmlserializer.py ../html5lib/ihatexml.py
../html5lib/treebuilders/_base.py ../html5lib/inputstream.py
../html5lib/treebuilders/__init__.py ../html5lib/sanitizer.py
../html5lib/treebuilders/etree_lxml.py ../html5lib/tokenizer.py
../html5lib/treebuilders/dom.py ../html5lib/utils.py
../html5lib/treebuilders/etree.py
../html5lib/filters/whitespace.py
../html5lib/filters/_base.py
../html5lib/filters/__init__.py ../html5lib/filters/__init__.py
../html5lib/filters/sanitizer.py ../html5lib/filters/_base.py
../html5lib/filters/alphabeticalattributes.py
../html5lib/filters/inject_meta_charset.py
../html5lib/filters/lint.py ../html5lib/filters/lint.py
../html5lib/filters/optionaltags.py ../html5lib/filters/optionaltags.py
../html5lib/filters/inject_meta_charset.py ../html5lib/filters/sanitizer.py
../html5lib/filters/alphabeticalattributes.py ../html5lib/filters/whitespace.py
../html5lib/treewalkers/pulldom.py ../html5lib/serializer/__init__.py
../html5lib/treewalkers/_base.py ../html5lib/serializer/htmlserializer.py
../html5lib/treewalkers/genshistream.py ../html5lib/treeadapters/__init__.py
../html5lib/treeadapters/sax.py
../html5lib/treebuilders/__init__.py
../html5lib/treebuilders/_base.py
../html5lib/treebuilders/dom.py
../html5lib/treebuilders/etree.py
../html5lib/treebuilders/etree_lxml.py
../html5lib/treewalkers/__init__.py ../html5lib/treewalkers/__init__.py
../html5lib/treewalkers/_base.py
../html5lib/treewalkers/dom.py ../html5lib/treewalkers/dom.py
../html5lib/treewalkers/etree.py ../html5lib/treewalkers/etree.py
../html5lib/treewalkers/genshistream.py
../html5lib/treewalkers/lxmletree.py ../html5lib/treewalkers/lxmletree.py
../html5lib/trie/datrie.py ../html5lib/treewalkers/pulldom.py
../html5lib/trie/_base.py
../html5lib/trie/__init__.py ../html5lib/trie/__init__.py
../html5lib/trie/_base.py
../html5lib/trie/datrie.py
../html5lib/trie/py.py ../html5lib/trie/py.py
../html5lib/treeadapters/sax.py
../html5lib/treeadapters/__init__.py
../html5lib/__pycache__/utils.cpython-34.pyc
../html5lib/__pycache__/ihatexml.cpython-34.pyc
../html5lib/__pycache__/__init__.cpython-34.pyc ../html5lib/__pycache__/__init__.cpython-34.pyc
../html5lib/__pycache__/tokenizer.cpython-34.pyc
../html5lib/__pycache__/html5parser.cpython-34.pyc
../html5lib/__pycache__/sanitizer.cpython-34.pyc
../html5lib/__pycache__/inputstream.cpython-34.pyc
../html5lib/__pycache__/constants.cpython-34.pyc ../html5lib/__pycache__/constants.cpython-34.pyc
../html5lib/serializer/__pycache__/__init__.cpython-34.pyc ../html5lib/__pycache__/html5parser.cpython-34.pyc
../html5lib/serializer/__pycache__/htmlserializer.cpython-34.pyc ../html5lib/__pycache__/ihatexml.cpython-34.pyc
../html5lib/treebuilders/__pycache__/_base.cpython-34.pyc ../html5lib/__pycache__/inputstream.cpython-34.pyc
../html5lib/treebuilders/__pycache__/__init__.cpython-34.pyc ../html5lib/__pycache__/sanitizer.cpython-34.pyc
../html5lib/treebuilders/__pycache__/etree_lxml.cpython-34.pyc ../html5lib/__pycache__/tokenizer.cpython-34.pyc
../html5lib/treebuilders/__pycache__/dom.cpython-34.pyc ../html5lib/__pycache__/utils.cpython-34.pyc
../html5lib/treebuilders/__pycache__/etree.cpython-34.pyc
../html5lib/filters/__pycache__/whitespace.cpython-34.pyc
../html5lib/filters/__pycache__/_base.cpython-34.pyc
../html5lib/filters/__pycache__/__init__.cpython-34.pyc ../html5lib/filters/__pycache__/__init__.cpython-34.pyc
../html5lib/filters/__pycache__/sanitizer.cpython-34.pyc ../html5lib/filters/__pycache__/_base.cpython-34.pyc
../html5lib/filters/__pycache__/alphabeticalattributes.cpython-34.pyc
../html5lib/filters/__pycache__/inject_meta_charset.cpython-34.pyc
../html5lib/filters/__pycache__/lint.cpython-34.pyc ../html5lib/filters/__pycache__/lint.cpython-34.pyc
../html5lib/filters/__pycache__/optionaltags.cpython-34.pyc ../html5lib/filters/__pycache__/optionaltags.cpython-34.pyc
../html5lib/filters/__pycache__/inject_meta_charset.cpython-34.pyc ../html5lib/filters/__pycache__/sanitizer.cpython-34.pyc
../html5lib/filters/__pycache__/alphabeticalattributes.cpython-34.pyc ../html5lib/filters/__pycache__/whitespace.cpython-34.pyc
../html5lib/treewalkers/__pycache__/pulldom.cpython-34.pyc ../html5lib/serializer/__pycache__/__init__.cpython-34.pyc
../html5lib/treewalkers/__pycache__/_base.cpython-34.pyc ../html5lib/serializer/__pycache__/htmlserializer.cpython-34.pyc
../html5lib/treewalkers/__pycache__/genshistream.cpython-34.pyc ../html5lib/treeadapters/__pycache__/__init__.cpython-34.pyc
../html5lib/treeadapters/__pycache__/sax.cpython-34.pyc
../html5lib/treebuilders/__pycache__/__init__.cpython-34.pyc
../html5lib/treebuilders/__pycache__/_base.cpython-34.pyc
../html5lib/treebuilders/__pycache__/dom.cpython-34.pyc
../html5lib/treebuilders/__pycache__/etree.cpython-34.pyc
../html5lib/treebuilders/__pycache__/etree_lxml.cpython-34.pyc
../html5lib/treewalkers/__pycache__/__init__.cpython-34.pyc ../html5lib/treewalkers/__pycache__/__init__.cpython-34.pyc
../html5lib/treewalkers/__pycache__/_base.cpython-34.pyc
../html5lib/treewalkers/__pycache__/dom.cpython-34.pyc ../html5lib/treewalkers/__pycache__/dom.cpython-34.pyc
../html5lib/treewalkers/__pycache__/etree.cpython-34.pyc ../html5lib/treewalkers/__pycache__/etree.cpython-34.pyc
../html5lib/treewalkers/__pycache__/genshistream.cpython-34.pyc
../html5lib/treewalkers/__pycache__/lxmletree.cpython-34.pyc ../html5lib/treewalkers/__pycache__/lxmletree.cpython-34.pyc
../html5lib/trie/__pycache__/datrie.cpython-34.pyc ../html5lib/treewalkers/__pycache__/pulldom.cpython-34.pyc
../html5lib/trie/__pycache__/_base.cpython-34.pyc
../html5lib/trie/__pycache__/__init__.cpython-34.pyc ../html5lib/trie/__pycache__/__init__.cpython-34.pyc
../html5lib/trie/__pycache__/_base.cpython-34.pyc
../html5lib/trie/__pycache__/datrie.cpython-34.pyc
../html5lib/trie/__pycache__/py.cpython-34.pyc ../html5lib/trie/__pycache__/py.cpython-34.pyc
../html5lib/treeadapters/__pycache__/sax.cpython-34.pyc
../html5lib/treeadapters/__pycache__/__init__.cpython-34.pyc
./ ./
dependency_links.txt
PKG-INFO
SOURCES.txt
top_level.txt top_level.txt
PKG-INFO
requires.txt requires.txt
dependency_links.txt
SOURCES.txt

View file

@ -20,4 +20,6 @@ from .serializer import serialize
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
"getTreeWalker", "serialize"] "getTreeWalker", "serialize"]
__version__ = "0.999"
# this has to be at the top level, see how setup.py parses this
__version__ = "0.9999999"

View file

@ -1,292 +1,290 @@
from __future__ import absolute_import, division, unicode_literals from __future__ import absolute_import, division, unicode_literals
import string import string
import gettext
_ = gettext.gettext
EOF = None EOF = None
E = { E = {
"null-character": "null-character":
_("Null character in input stream, replaced with U+FFFD."), "Null character in input stream, replaced with U+FFFD.",
"invalid-codepoint": "invalid-codepoint":
_("Invalid codepoint in stream."), "Invalid codepoint in stream.",
"incorrectly-placed-solidus": "incorrectly-placed-solidus":
_("Solidus (/) incorrectly placed in tag."), "Solidus (/) incorrectly placed in tag.",
"incorrect-cr-newline-entity": "incorrect-cr-newline-entity":
_("Incorrect CR newline entity, replaced with LF."), "Incorrect CR newline entity, replaced with LF.",
"illegal-windows-1252-entity": "illegal-windows-1252-entity":
_("Entity used with illegal number (windows-1252 reference)."), "Entity used with illegal number (windows-1252 reference).",
"cant-convert-numeric-entity": "cant-convert-numeric-entity":
_("Numeric entity couldn't be converted to character " "Numeric entity couldn't be converted to character "
"(codepoint U+%(charAsInt)08x)."), "(codepoint U+%(charAsInt)08x).",
"illegal-codepoint-for-numeric-entity": "illegal-codepoint-for-numeric-entity":
_("Numeric entity represents an illegal codepoint: " "Numeric entity represents an illegal codepoint: "
"U+%(charAsInt)08x."), "U+%(charAsInt)08x.",
"numeric-entity-without-semicolon": "numeric-entity-without-semicolon":
_("Numeric entity didn't end with ';'."), "Numeric entity didn't end with ';'.",
"expected-numeric-entity-but-got-eof": "expected-numeric-entity-but-got-eof":
_("Numeric entity expected. Got end of file instead."), "Numeric entity expected. Got end of file instead.",
"expected-numeric-entity": "expected-numeric-entity":
_("Numeric entity expected but none found."), "Numeric entity expected but none found.",
"named-entity-without-semicolon": "named-entity-without-semicolon":
_("Named entity didn't end with ';'."), "Named entity didn't end with ';'.",
"expected-named-entity": "expected-named-entity":
_("Named entity expected. Got none."), "Named entity expected. Got none.",
"attributes-in-end-tag": "attributes-in-end-tag":
_("End tag contains unexpected attributes."), "End tag contains unexpected attributes.",
'self-closing-flag-on-end-tag': 'self-closing-flag-on-end-tag':
_("End tag contains unexpected self-closing flag."), "End tag contains unexpected self-closing flag.",
"expected-tag-name-but-got-right-bracket": "expected-tag-name-but-got-right-bracket":
_("Expected tag name. Got '>' instead."), "Expected tag name. Got '>' instead.",
"expected-tag-name-but-got-question-mark": "expected-tag-name-but-got-question-mark":
_("Expected tag name. Got '?' instead. (HTML doesn't " "Expected tag name. Got '?' instead. (HTML doesn't "
"support processing instructions.)"), "support processing instructions.)",
"expected-tag-name": "expected-tag-name":
_("Expected tag name. Got something else instead"), "Expected tag name. Got something else instead",
"expected-closing-tag-but-got-right-bracket": "expected-closing-tag-but-got-right-bracket":
_("Expected closing tag. Got '>' instead. Ignoring '</>'."), "Expected closing tag. Got '>' instead. Ignoring '</>'.",
"expected-closing-tag-but-got-eof": "expected-closing-tag-but-got-eof":
_("Expected closing tag. Unexpected end of file."), "Expected closing tag. Unexpected end of file.",
"expected-closing-tag-but-got-char": "expected-closing-tag-but-got-char":
_("Expected closing tag. Unexpected character '%(data)s' found."), "Expected closing tag. Unexpected character '%(data)s' found.",
"eof-in-tag-name": "eof-in-tag-name":
_("Unexpected end of file in the tag name."), "Unexpected end of file in the tag name.",
"expected-attribute-name-but-got-eof": "expected-attribute-name-but-got-eof":
_("Unexpected end of file. Expected attribute name instead."), "Unexpected end of file. Expected attribute name instead.",
"eof-in-attribute-name": "eof-in-attribute-name":
_("Unexpected end of file in attribute name."), "Unexpected end of file in attribute name.",
"invalid-character-in-attribute-name": "invalid-character-in-attribute-name":
_("Invalid character in attribute name"), "Invalid character in attribute name",
"duplicate-attribute": "duplicate-attribute":
_("Dropped duplicate attribute on tag."), "Dropped duplicate attribute on tag.",
"expected-end-of-tag-name-but-got-eof": "expected-end-of-tag-name-but-got-eof":
_("Unexpected end of file. Expected = or end of tag."), "Unexpected end of file. Expected = or end of tag.",
"expected-attribute-value-but-got-eof": "expected-attribute-value-but-got-eof":
_("Unexpected end of file. Expected attribute value."), "Unexpected end of file. Expected attribute value.",
"expected-attribute-value-but-got-right-bracket": "expected-attribute-value-but-got-right-bracket":
_("Expected attribute value. Got '>' instead."), "Expected attribute value. Got '>' instead.",
'equals-in-unquoted-attribute-value': 'equals-in-unquoted-attribute-value':
_("Unexpected = in unquoted attribute"), "Unexpected = in unquoted attribute",
'unexpected-character-in-unquoted-attribute-value': 'unexpected-character-in-unquoted-attribute-value':
_("Unexpected character in unquoted attribute"), "Unexpected character in unquoted attribute",
"invalid-character-after-attribute-name": "invalid-character-after-attribute-name":
_("Unexpected character after attribute name."), "Unexpected character after attribute name.",
"unexpected-character-after-attribute-value": "unexpected-character-after-attribute-value":
_("Unexpected character after attribute value."), "Unexpected character after attribute value.",
"eof-in-attribute-value-double-quote": "eof-in-attribute-value-double-quote":
_("Unexpected end of file in attribute value (\")."), "Unexpected end of file in attribute value (\").",
"eof-in-attribute-value-single-quote": "eof-in-attribute-value-single-quote":
_("Unexpected end of file in attribute value (')."), "Unexpected end of file in attribute value (').",
"eof-in-attribute-value-no-quotes": "eof-in-attribute-value-no-quotes":
_("Unexpected end of file in attribute value."), "Unexpected end of file in attribute value.",
"unexpected-EOF-after-solidus-in-tag": "unexpected-EOF-after-solidus-in-tag":
_("Unexpected end of file in tag. Expected >"), "Unexpected end of file in tag. Expected >",
"unexpected-character-after-solidus-in-tag": "unexpected-character-after-solidus-in-tag":
_("Unexpected character after / in tag. Expected >"), "Unexpected character after / in tag. Expected >",
"expected-dashes-or-doctype": "expected-dashes-or-doctype":
_("Expected '--' or 'DOCTYPE'. Not found."), "Expected '--' or 'DOCTYPE'. Not found.",
"unexpected-bang-after-double-dash-in-comment": "unexpected-bang-after-double-dash-in-comment":
_("Unexpected ! after -- in comment"), "Unexpected ! after -- in comment",
"unexpected-space-after-double-dash-in-comment": "unexpected-space-after-double-dash-in-comment":
_("Unexpected space after -- in comment"), "Unexpected space after -- in comment",
"incorrect-comment": "incorrect-comment":
_("Incorrect comment."), "Incorrect comment.",
"eof-in-comment": "eof-in-comment":
_("Unexpected end of file in comment."), "Unexpected end of file in comment.",
"eof-in-comment-end-dash": "eof-in-comment-end-dash":
_("Unexpected end of file in comment (-)"), "Unexpected end of file in comment (-)",
"unexpected-dash-after-double-dash-in-comment": "unexpected-dash-after-double-dash-in-comment":
_("Unexpected '-' after '--' found in comment."), "Unexpected '-' after '--' found in comment.",
"eof-in-comment-double-dash": "eof-in-comment-double-dash":
_("Unexpected end of file in comment (--)."), "Unexpected end of file in comment (--).",
"eof-in-comment-end-space-state": "eof-in-comment-end-space-state":
_("Unexpected end of file in comment."), "Unexpected end of file in comment.",
"eof-in-comment-end-bang-state": "eof-in-comment-end-bang-state":
_("Unexpected end of file in comment."), "Unexpected end of file in comment.",
"unexpected-char-in-comment": "unexpected-char-in-comment":
_("Unexpected character in comment found."), "Unexpected character in comment found.",
"need-space-after-doctype": "need-space-after-doctype":
_("No space after literal string 'DOCTYPE'."), "No space after literal string 'DOCTYPE'.",
"expected-doctype-name-but-got-right-bracket": "expected-doctype-name-but-got-right-bracket":
_("Unexpected > character. Expected DOCTYPE name."), "Unexpected > character. Expected DOCTYPE name.",
"expected-doctype-name-but-got-eof": "expected-doctype-name-but-got-eof":
_("Unexpected end of file. Expected DOCTYPE name."), "Unexpected end of file. Expected DOCTYPE name.",
"eof-in-doctype-name": "eof-in-doctype-name":
_("Unexpected end of file in DOCTYPE name."), "Unexpected end of file in DOCTYPE name.",
"eof-in-doctype": "eof-in-doctype":
_("Unexpected end of file in DOCTYPE."), "Unexpected end of file in DOCTYPE.",
"expected-space-or-right-bracket-in-doctype": "expected-space-or-right-bracket-in-doctype":
_("Expected space or '>'. Got '%(data)s'"), "Expected space or '>'. Got '%(data)s'",
"unexpected-end-of-doctype": "unexpected-end-of-doctype":
_("Unexpected end of DOCTYPE."), "Unexpected end of DOCTYPE.",
"unexpected-char-in-doctype": "unexpected-char-in-doctype":
_("Unexpected character in DOCTYPE."), "Unexpected character in DOCTYPE.",
"eof-in-innerhtml": "eof-in-innerhtml":
_("XXX innerHTML EOF"), "XXX innerHTML EOF",
"unexpected-doctype": "unexpected-doctype":
_("Unexpected DOCTYPE. Ignored."), "Unexpected DOCTYPE. Ignored.",
"non-html-root": "non-html-root":
_("html needs to be the first start tag."), "html needs to be the first start tag.",
"expected-doctype-but-got-eof": "expected-doctype-but-got-eof":
_("Unexpected End of file. Expected DOCTYPE."), "Unexpected End of file. Expected DOCTYPE.",
"unknown-doctype": "unknown-doctype":
_("Erroneous DOCTYPE."), "Erroneous DOCTYPE.",
"expected-doctype-but-got-chars": "expected-doctype-but-got-chars":
_("Unexpected non-space characters. Expected DOCTYPE."), "Unexpected non-space characters. Expected DOCTYPE.",
"expected-doctype-but-got-start-tag": "expected-doctype-but-got-start-tag":
_("Unexpected start tag (%(name)s). Expected DOCTYPE."), "Unexpected start tag (%(name)s). Expected DOCTYPE.",
"expected-doctype-but-got-end-tag": "expected-doctype-but-got-end-tag":
_("Unexpected end tag (%(name)s). Expected DOCTYPE."), "Unexpected end tag (%(name)s). Expected DOCTYPE.",
"end-tag-after-implied-root": "end-tag-after-implied-root":
_("Unexpected end tag (%(name)s) after the (implied) root element."), "Unexpected end tag (%(name)s) after the (implied) root element.",
"expected-named-closing-tag-but-got-eof": "expected-named-closing-tag-but-got-eof":
_("Unexpected end of file. Expected end tag (%(name)s)."), "Unexpected end of file. Expected end tag (%(name)s).",
"two-heads-are-not-better-than-one": "two-heads-are-not-better-than-one":
_("Unexpected start tag head in existing head. Ignored."), "Unexpected start tag head in existing head. Ignored.",
"unexpected-end-tag": "unexpected-end-tag":
_("Unexpected end tag (%(name)s). Ignored."), "Unexpected end tag (%(name)s). Ignored.",
"unexpected-start-tag-out-of-my-head": "unexpected-start-tag-out-of-my-head":
_("Unexpected start tag (%(name)s) that can be in head. Moved."), "Unexpected start tag (%(name)s) that can be in head. Moved.",
"unexpected-start-tag": "unexpected-start-tag":
_("Unexpected start tag (%(name)s)."), "Unexpected start tag (%(name)s).",
"missing-end-tag": "missing-end-tag":
_("Missing end tag (%(name)s)."), "Missing end tag (%(name)s).",
"missing-end-tags": "missing-end-tags":
_("Missing end tags (%(name)s)."), "Missing end tags (%(name)s).",
"unexpected-start-tag-implies-end-tag": "unexpected-start-tag-implies-end-tag":
_("Unexpected start tag (%(startName)s) " "Unexpected start tag (%(startName)s) "
"implies end tag (%(endName)s)."), "implies end tag (%(endName)s).",
"unexpected-start-tag-treated-as": "unexpected-start-tag-treated-as":
_("Unexpected start tag (%(originalName)s). Treated as %(newName)s."), "Unexpected start tag (%(originalName)s). Treated as %(newName)s.",
"deprecated-tag": "deprecated-tag":
_("Unexpected start tag %(name)s. Don't use it!"), "Unexpected start tag %(name)s. Don't use it!",
"unexpected-start-tag-ignored": "unexpected-start-tag-ignored":
_("Unexpected start tag %(name)s. Ignored."), "Unexpected start tag %(name)s. Ignored.",
"expected-one-end-tag-but-got-another": "expected-one-end-tag-but-got-another":
_("Unexpected end tag (%(gotName)s). " "Unexpected end tag (%(gotName)s). "
"Missing end tag (%(expectedName)s)."), "Missing end tag (%(expectedName)s).",
"end-tag-too-early": "end-tag-too-early":
_("End tag (%(name)s) seen too early. Expected other end tag."), "End tag (%(name)s) seen too early. Expected other end tag.",
"end-tag-too-early-named": "end-tag-too-early-named":
_("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."), "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).",
"end-tag-too-early-ignored": "end-tag-too-early-ignored":
_("End tag (%(name)s) seen too early. Ignored."), "End tag (%(name)s) seen too early. Ignored.",
"adoption-agency-1.1": "adoption-agency-1.1":
_("End tag (%(name)s) violates step 1, " "End tag (%(name)s) violates step 1, "
"paragraph 1 of the adoption agency algorithm."), "paragraph 1 of the adoption agency algorithm.",
"adoption-agency-1.2": "adoption-agency-1.2":
_("End tag (%(name)s) violates step 1, " "End tag (%(name)s) violates step 1, "
"paragraph 2 of the adoption agency algorithm."), "paragraph 2 of the adoption agency algorithm.",
"adoption-agency-1.3": "adoption-agency-1.3":
_("End tag (%(name)s) violates step 1, " "End tag (%(name)s) violates step 1, "
"paragraph 3 of the adoption agency algorithm."), "paragraph 3 of the adoption agency algorithm.",
"adoption-agency-4.4": "adoption-agency-4.4":
_("End tag (%(name)s) violates step 4, " "End tag (%(name)s) violates step 4, "
"paragraph 4 of the adoption agency algorithm."), "paragraph 4 of the adoption agency algorithm.",
"unexpected-end-tag-treated-as": "unexpected-end-tag-treated-as":
_("Unexpected end tag (%(originalName)s). Treated as %(newName)s."), "Unexpected end tag (%(originalName)s). Treated as %(newName)s.",
"no-end-tag": "no-end-tag":
_("This element (%(name)s) has no end tag."), "This element (%(name)s) has no end tag.",
"unexpected-implied-end-tag-in-table": "unexpected-implied-end-tag-in-table":
_("Unexpected implied end tag (%(name)s) in the table phase."), "Unexpected implied end tag (%(name)s) in the table phase.",
"unexpected-implied-end-tag-in-table-body": "unexpected-implied-end-tag-in-table-body":
_("Unexpected implied end tag (%(name)s) in the table body phase."), "Unexpected implied end tag (%(name)s) in the table body phase.",
"unexpected-char-implies-table-voodoo": "unexpected-char-implies-table-voodoo":
_("Unexpected non-space characters in " "Unexpected non-space characters in "
"table context caused voodoo mode."), "table context caused voodoo mode.",
"unexpected-hidden-input-in-table": "unexpected-hidden-input-in-table":
_("Unexpected input with type hidden in table context."), "Unexpected input with type hidden in table context.",
"unexpected-form-in-table": "unexpected-form-in-table":
_("Unexpected form in table context."), "Unexpected form in table context.",
"unexpected-start-tag-implies-table-voodoo": "unexpected-start-tag-implies-table-voodoo":
_("Unexpected start tag (%(name)s) in " "Unexpected start tag (%(name)s) in "
"table context caused voodoo mode."), "table context caused voodoo mode.",
"unexpected-end-tag-implies-table-voodoo": "unexpected-end-tag-implies-table-voodoo":
_("Unexpected end tag (%(name)s) in " "Unexpected end tag (%(name)s) in "
"table context caused voodoo mode."), "table context caused voodoo mode.",
"unexpected-cell-in-table-body": "unexpected-cell-in-table-body":
_("Unexpected table cell start tag (%(name)s) " "Unexpected table cell start tag (%(name)s) "
"in the table body phase."), "in the table body phase.",
"unexpected-cell-end-tag": "unexpected-cell-end-tag":
_("Got table cell end tag (%(name)s) " "Got table cell end tag (%(name)s) "
"while required end tags are missing."), "while required end tags are missing.",
"unexpected-end-tag-in-table-body": "unexpected-end-tag-in-table-body":
_("Unexpected end tag (%(name)s) in the table body phase. Ignored."), "Unexpected end tag (%(name)s) in the table body phase. Ignored.",
"unexpected-implied-end-tag-in-table-row": "unexpected-implied-end-tag-in-table-row":
_("Unexpected implied end tag (%(name)s) in the table row phase."), "Unexpected implied end tag (%(name)s) in the table row phase.",
"unexpected-end-tag-in-table-row": "unexpected-end-tag-in-table-row":
_("Unexpected end tag (%(name)s) in the table row phase. Ignored."), "Unexpected end tag (%(name)s) in the table row phase. Ignored.",
"unexpected-select-in-select": "unexpected-select-in-select":
_("Unexpected select start tag in the select phase " "Unexpected select start tag in the select phase "
"treated as select end tag."), "treated as select end tag.",
"unexpected-input-in-select": "unexpected-input-in-select":
_("Unexpected input start tag in the select phase."), "Unexpected input start tag in the select phase.",
"unexpected-start-tag-in-select": "unexpected-start-tag-in-select":
_("Unexpected start tag token (%(name)s in the select phase. " "Unexpected start tag token (%(name)s in the select phase. "
"Ignored."), "Ignored.",
"unexpected-end-tag-in-select": "unexpected-end-tag-in-select":
_("Unexpected end tag (%(name)s) in the select phase. Ignored."), "Unexpected end tag (%(name)s) in the select phase. Ignored.",
"unexpected-table-element-start-tag-in-select-in-table": "unexpected-table-element-start-tag-in-select-in-table":
_("Unexpected table element start tag (%(name)s) in the select in table phase."), "Unexpected table element start tag (%(name)s) in the select in table phase.",
"unexpected-table-element-end-tag-in-select-in-table": "unexpected-table-element-end-tag-in-select-in-table":
_("Unexpected table element end tag (%(name)s) in the select in table phase."), "Unexpected table element end tag (%(name)s) in the select in table phase.",
"unexpected-char-after-body": "unexpected-char-after-body":
_("Unexpected non-space characters in the after body phase."), "Unexpected non-space characters in the after body phase.",
"unexpected-start-tag-after-body": "unexpected-start-tag-after-body":
_("Unexpected start tag token (%(name)s)" "Unexpected start tag token (%(name)s)"
" in the after body phase."), " in the after body phase.",
"unexpected-end-tag-after-body": "unexpected-end-tag-after-body":
_("Unexpected end tag token (%(name)s)" "Unexpected end tag token (%(name)s)"
" in the after body phase."), " in the after body phase.",
"unexpected-char-in-frameset": "unexpected-char-in-frameset":
_("Unexpected characters in the frameset phase. Characters ignored."), "Unexpected characters in the frameset phase. Characters ignored.",
"unexpected-start-tag-in-frameset": "unexpected-start-tag-in-frameset":
_("Unexpected start tag token (%(name)s)" "Unexpected start tag token (%(name)s)"
" in the frameset phase. Ignored."), " in the frameset phase. Ignored.",
"unexpected-frameset-in-frameset-innerhtml": "unexpected-frameset-in-frameset-innerhtml":
_("Unexpected end tag token (frameset) " "Unexpected end tag token (frameset) "
"in the frameset phase (innerHTML)."), "in the frameset phase (innerHTML).",
"unexpected-end-tag-in-frameset": "unexpected-end-tag-in-frameset":
_("Unexpected end tag token (%(name)s)" "Unexpected end tag token (%(name)s)"
" in the frameset phase. Ignored."), " in the frameset phase. Ignored.",
"unexpected-char-after-frameset": "unexpected-char-after-frameset":
_("Unexpected non-space characters in the " "Unexpected non-space characters in the "
"after frameset phase. Ignored."), "after frameset phase. Ignored.",
"unexpected-start-tag-after-frameset": "unexpected-start-tag-after-frameset":
_("Unexpected start tag (%(name)s)" "Unexpected start tag (%(name)s)"
" in the after frameset phase. Ignored."), " in the after frameset phase. Ignored.",
"unexpected-end-tag-after-frameset": "unexpected-end-tag-after-frameset":
_("Unexpected end tag (%(name)s)" "Unexpected end tag (%(name)s)"
" in the after frameset phase. Ignored."), " in the after frameset phase. Ignored.",
"unexpected-end-tag-after-body-innerhtml": "unexpected-end-tag-after-body-innerhtml":
_("Unexpected end tag after body(innerHtml)"), "Unexpected end tag after body(innerHtml)",
"expected-eof-but-got-char": "expected-eof-but-got-char":
_("Unexpected non-space characters. Expected end of file."), "Unexpected non-space characters. Expected end of file.",
"expected-eof-but-got-start-tag": "expected-eof-but-got-start-tag":
_("Unexpected start tag (%(name)s)" "Unexpected start tag (%(name)s)"
". Expected end of file."), ". Expected end of file.",
"expected-eof-but-got-end-tag": "expected-eof-but-got-end-tag":
_("Unexpected end tag (%(name)s)" "Unexpected end tag (%(name)s)"
". Expected end of file."), ". Expected end of file.",
"eof-in-table": "eof-in-table":
_("Unexpected end of file. Expected table content."), "Unexpected end of file. Expected table content.",
"eof-in-select": "eof-in-select":
_("Unexpected end of file. Expected select content."), "Unexpected end of file. Expected select content.",
"eof-in-frameset": "eof-in-frameset":
_("Unexpected end of file. Expected frameset content."), "Unexpected end of file. Expected frameset content.",
"eof-in-script-in-script": "eof-in-script-in-script":
_("Unexpected end of file. Expected script content."), "Unexpected end of file. Expected script content.",
"eof-in-foreign-lands": "eof-in-foreign-lands":
_("Unexpected end of file. Expected foreign content"), "Unexpected end of file. Expected foreign content",
"non-void-element-with-trailing-solidus": "non-void-element-with-trailing-solidus":
_("Trailing solidus not allowed on element %(name)s"), "Trailing solidus not allowed on element %(name)s",
"unexpected-html-element-in-foreign-content": "unexpected-html-element-in-foreign-content":
_("Element %(name)s not allowed in a non-html context"), "Element %(name)s not allowed in a non-html context",
"unexpected-end-tag-before-html": "unexpected-end-tag-before-html":
_("Unexpected end tag (%(name)s) before html."), "Unexpected end tag (%(name)s) before html.",
"XXX-undefined-error": "XXX-undefined-error":
_("Undefined error (this sucks and should be fixed)"), "Undefined error (this sucks and should be fixed)",
} }
namespaces = { namespaces = {
@ -298,7 +296,7 @@ namespaces = {
"xmlns": "http://www.w3.org/2000/xmlns/" "xmlns": "http://www.w3.org/2000/xmlns/"
} }
scopingElements = frozenset(( scopingElements = frozenset([
(namespaces["html"], "applet"), (namespaces["html"], "applet"),
(namespaces["html"], "caption"), (namespaces["html"], "caption"),
(namespaces["html"], "html"), (namespaces["html"], "html"),
@ -316,9 +314,9 @@ scopingElements = frozenset((
(namespaces["svg"], "foreignObject"), (namespaces["svg"], "foreignObject"),
(namespaces["svg"], "desc"), (namespaces["svg"], "desc"),
(namespaces["svg"], "title"), (namespaces["svg"], "title"),
)) ])
formattingElements = frozenset(( formattingElements = frozenset([
(namespaces["html"], "a"), (namespaces["html"], "a"),
(namespaces["html"], "b"), (namespaces["html"], "b"),
(namespaces["html"], "big"), (namespaces["html"], "big"),
@ -333,9 +331,9 @@ formattingElements = frozenset((
(namespaces["html"], "strong"), (namespaces["html"], "strong"),
(namespaces["html"], "tt"), (namespaces["html"], "tt"),
(namespaces["html"], "u") (namespaces["html"], "u")
)) ])
specialElements = frozenset(( specialElements = frozenset([
(namespaces["html"], "address"), (namespaces["html"], "address"),
(namespaces["html"], "applet"), (namespaces["html"], "applet"),
(namespaces["html"], "area"), (namespaces["html"], "area"),
@ -416,22 +414,22 @@ specialElements = frozenset((
(namespaces["html"], "wbr"), (namespaces["html"], "wbr"),
(namespaces["html"], "xmp"), (namespaces["html"], "xmp"),
(namespaces["svg"], "foreignObject") (namespaces["svg"], "foreignObject")
)) ])
htmlIntegrationPointElements = frozenset(( htmlIntegrationPointElements = frozenset([
(namespaces["mathml"], "annotaion-xml"), (namespaces["mathml"], "annotaion-xml"),
(namespaces["svg"], "foreignObject"), (namespaces["svg"], "foreignObject"),
(namespaces["svg"], "desc"), (namespaces["svg"], "desc"),
(namespaces["svg"], "title") (namespaces["svg"], "title")
)) ])
mathmlTextIntegrationPointElements = frozenset(( mathmlTextIntegrationPointElements = frozenset([
(namespaces["mathml"], "mi"), (namespaces["mathml"], "mi"),
(namespaces["mathml"], "mo"), (namespaces["mathml"], "mo"),
(namespaces["mathml"], "mn"), (namespaces["mathml"], "mn"),
(namespaces["mathml"], "ms"), (namespaces["mathml"], "ms"),
(namespaces["mathml"], "mtext") (namespaces["mathml"], "mtext")
)) ])
adjustForeignAttributes = { adjustForeignAttributes = {
"xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
@ -451,21 +449,21 @@ adjustForeignAttributes = {
unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
adjustForeignAttributes.items()]) adjustForeignAttributes.items()])
spaceCharacters = frozenset(( spaceCharacters = frozenset([
"\t", "\t",
"\n", "\n",
"\u000C", "\u000C",
" ", " ",
"\r" "\r"
)) ])
tableInsertModeElements = frozenset(( tableInsertModeElements = frozenset([
"table", "table",
"tbody", "tbody",
"tfoot", "tfoot",
"thead", "thead",
"tr" "tr"
)) ])
asciiLowercase = frozenset(string.ascii_lowercase) asciiLowercase = frozenset(string.ascii_lowercase)
asciiUppercase = frozenset(string.ascii_uppercase) asciiUppercase = frozenset(string.ascii_uppercase)
@ -486,7 +484,7 @@ headingElements = (
"h6" "h6"
) )
voidElements = frozenset(( voidElements = frozenset([
"base", "base",
"command", "command",
"event-source", "event-source",
@ -502,11 +500,11 @@ voidElements = frozenset((
"input", "input",
"source", "source",
"track" "track"
)) ])
cdataElements = frozenset(('title', 'textarea')) cdataElements = frozenset(['title', 'textarea'])
rcdataElements = frozenset(( rcdataElements = frozenset([
'style', 'style',
'script', 'script',
'xmp', 'xmp',
@ -514,27 +512,27 @@ rcdataElements = frozenset((
'noembed', 'noembed',
'noframes', 'noframes',
'noscript' 'noscript'
)) ])
booleanAttributes = { booleanAttributes = {
"": frozenset(("irrelevant",)), "": frozenset(["irrelevant"]),
"style": frozenset(("scoped",)), "style": frozenset(["scoped"]),
"img": frozenset(("ismap",)), "img": frozenset(["ismap"]),
"audio": frozenset(("autoplay", "controls")), "audio": frozenset(["autoplay", "controls"]),
"video": frozenset(("autoplay", "controls")), "video": frozenset(["autoplay", "controls"]),
"script": frozenset(("defer", "async")), "script": frozenset(["defer", "async"]),
"details": frozenset(("open",)), "details": frozenset(["open"]),
"datagrid": frozenset(("multiple", "disabled")), "datagrid": frozenset(["multiple", "disabled"]),
"command": frozenset(("hidden", "disabled", "checked", "default")), "command": frozenset(["hidden", "disabled", "checked", "default"]),
"hr": frozenset(("noshade")), "hr": frozenset(["noshade"]),
"menu": frozenset(("autosubmit",)), "menu": frozenset(["autosubmit"]),
"fieldset": frozenset(("disabled", "readonly")), "fieldset": frozenset(["disabled", "readonly"]),
"option": frozenset(("disabled", "readonly", "selected")), "option": frozenset(["disabled", "readonly", "selected"]),
"optgroup": frozenset(("disabled", "readonly")), "optgroup": frozenset(["disabled", "readonly"]),
"button": frozenset(("disabled", "autofocus")), "button": frozenset(["disabled", "autofocus"]),
"input": frozenset(("disabled", "readonly", "required", "autofocus", "checked", "ismap")), "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]),
"select": frozenset(("disabled", "readonly", "autofocus", "multiple")), "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]),
"output": frozenset(("disabled", "readonly")), "output": frozenset(["disabled", "readonly"]),
} }
# entitiesWindows1252 has to be _ordered_ and needs to have an index. It # entitiesWindows1252 has to be _ordered_ and needs to have an index. It
@ -574,7 +572,7 @@ entitiesWindows1252 = (
376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
) )
xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;')) xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;'])
entities = { entities = {
"AElig": "\xc6", "AElig": "\xc6",
@ -3088,8 +3086,8 @@ tokenTypes = {
"ParseError": 7 "ParseError": 7
} }
tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"], tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
tokenTypes["EmptyTag"])) tokenTypes["EmptyTag"]])
prefixes = dict([(v, k) for k, v in namespaces.items()]) prefixes = dict([(v, k) for k, v in namespaces.items()])

View file

@ -1,8 +1,5 @@
from __future__ import absolute_import, division, unicode_literals from __future__ import absolute_import, division, unicode_literals
from gettext import gettext
_ = gettext
from . import _base from . import _base
from ..constants import cdataElements, rcdataElements, voidElements from ..constants import cdataElements, rcdataElements, voidElements
@ -23,24 +20,24 @@ class Filter(_base.Filter):
if type in ("StartTag", "EmptyTag"): if type in ("StartTag", "EmptyTag"):
name = token["name"] name = token["name"]
if contentModelFlag != "PCDATA": if contentModelFlag != "PCDATA":
raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name}) raise LintError("StartTag not in PCDATA content model flag: %(tag)s" % {"tag": name})
if not isinstance(name, str): if not isinstance(name, str):
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name}) raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
if not name: if not name:
raise LintError(_("Empty tag name")) raise LintError("Empty tag name")
if type == "StartTag" and name in voidElements: if type == "StartTag" and name in voidElements:
raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name}) raise LintError("Void element reported as StartTag token: %(tag)s" % {"tag": name})
elif type == "EmptyTag" and name not in voidElements: elif type == "EmptyTag" and name not in voidElements:
raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]}) raise LintError("Non-void element reported as EmptyTag token: %(tag)s" % {"tag": token["name"]})
if type == "StartTag": if type == "StartTag":
open_elements.append(name) open_elements.append(name)
for name, value in token["data"]: for name, value in token["data"]:
if not isinstance(name, str): if not isinstance(name, str):
raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name}) raise LintError("Attribute name is not a string: %(name)r" % {"name": name})
if not name: if not name:
raise LintError(_("Empty attribute name")) raise LintError("Empty attribute name")
if not isinstance(value, str): if not isinstance(value, str):
raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value}) raise LintError("Attribute value is not a string: %(value)r" % {"value": value})
if name in cdataElements: if name in cdataElements:
contentModelFlag = "CDATA" contentModelFlag = "CDATA"
elif name in rcdataElements: elif name in rcdataElements:
@ -51,43 +48,43 @@ class Filter(_base.Filter):
elif type == "EndTag": elif type == "EndTag":
name = token["name"] name = token["name"]
if not isinstance(name, str): if not isinstance(name, str):
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name}) raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
if not name: if not name:
raise LintError(_("Empty tag name")) raise LintError("Empty tag name")
if name in voidElements: if name in voidElements:
raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name}) raise LintError("Void element reported as EndTag token: %(tag)s" % {"tag": name})
start_name = open_elements.pop() start_name = open_elements.pop()
if start_name != name: if start_name != name:
raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name}) raise LintError("EndTag (%(end)s) does not match StartTag (%(start)s)" % {"end": name, "start": start_name})
contentModelFlag = "PCDATA" contentModelFlag = "PCDATA"
elif type == "Comment": elif type == "Comment":
if contentModelFlag != "PCDATA": if contentModelFlag != "PCDATA":
raise LintError(_("Comment not in PCDATA content model flag")) raise LintError("Comment not in PCDATA content model flag")
elif type in ("Characters", "SpaceCharacters"): elif type in ("Characters", "SpaceCharacters"):
data = token["data"] data = token["data"]
if not isinstance(data, str): if not isinstance(data, str):
raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data}) raise LintError("Attribute name is not a string: %(name)r" % {"name": data})
if not data: if not data:
raise LintError(_("%(type)s token with empty data") % {"type": type}) raise LintError("%(type)s token with empty data" % {"type": type})
if type == "SpaceCharacters": if type == "SpaceCharacters":
data = data.strip(spaceCharacters) data = data.strip(spaceCharacters)
if data: if data:
raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data}) raise LintError("Non-space character(s) found in SpaceCharacters token: %(token)r" % {"token": data})
elif type == "Doctype": elif type == "Doctype":
name = token["name"] name = token["name"]
if contentModelFlag != "PCDATA": if contentModelFlag != "PCDATA":
raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name}) raise LintError("Doctype not in PCDATA content model flag: %(name)s" % {"name": name})
if not isinstance(name, str): if not isinstance(name, str):
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name}) raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
# XXX: what to do with token["data"] ? # XXX: what to do with token["data"] ?
elif type in ("ParseError", "SerializeError"): elif type in ("ParseError", "SerializeError"):
pass pass
else: else:
raise LintError(_("Unknown token type: %(type)s") % {"type": type}) raise LintError("Unknown token type: %(type)s" % {"type": type})
yield token yield token

View file

@ -18,6 +18,7 @@ from .constants import cdataElements, rcdataElements
from .constants import tokenTypes, ReparseException, namespaces from .constants import tokenTypes, ReparseException, namespaces
from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
from .constants import adjustForeignAttributes as adjustForeignAttributesMap from .constants import adjustForeignAttributes as adjustForeignAttributesMap
from .constants import E
def parse(doc, treebuilder="etree", encoding=None, def parse(doc, treebuilder="etree", encoding=None,
@ -129,6 +130,17 @@ class HTMLParser(object):
self.framesetOK = True self.framesetOK = True
@property
def documentEncoding(self):
"""The name of the character encoding
that was used to decode the input stream,
or :obj:`None` if that is not determined yet.
"""
if not hasattr(self, 'tokenizer'):
return None
return self.tokenizer.stream.charEncoding[0]
def isHTMLIntegrationPoint(self, element): def isHTMLIntegrationPoint(self, element):
if (element.name == "annotation-xml" and if (element.name == "annotation-xml" and
element.namespace == namespaces["mathml"]): element.namespace == namespaces["mathml"]):
@ -245,7 +257,7 @@ class HTMLParser(object):
# XXX The idea is to make errorcode mandatory. # XXX The idea is to make errorcode mandatory.
self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))
if self.strict: if self.strict:
raise ParseError raise ParseError(E[errorcode] % datavars)
def normalizeToken(self, token): def normalizeToken(self, token):
""" HTML5 specific normalizations to the token stream """ """ HTML5 specific normalizations to the token stream """
@ -868,7 +880,7 @@ def getPhases(debug):
self.startTagHandler = utils.MethodDispatcher([ self.startTagHandler = utils.MethodDispatcher([
("html", self.startTagHtml), ("html", self.startTagHtml),
(("base", "basefont", "bgsound", "command", "link", "meta", (("base", "basefont", "bgsound", "command", "link", "meta",
"noframes", "script", "style", "title"), "script", "style", "title"),
self.startTagProcessInHead), self.startTagProcessInHead),
("body", self.startTagBody), ("body", self.startTagBody),
("frameset", self.startTagFrameset), ("frameset", self.startTagFrameset),
@ -1205,8 +1217,7 @@ def getPhases(debug):
attributes["name"] = "isindex" attributes["name"] = "isindex"
self.processStartTag(impliedTagToken("input", "StartTag", self.processStartTag(impliedTagToken("input", "StartTag",
attributes=attributes, attributes=attributes,
selfClosing= selfClosing=token["selfClosing"]))
token["selfClosing"]))
self.processEndTag(impliedTagToken("label")) self.processEndTag(impliedTagToken("label"))
self.processStartTag(impliedTagToken("hr", "StartTag")) self.processStartTag(impliedTagToken("hr", "StartTag"))
self.processEndTag(impliedTagToken("form")) self.processEndTag(impliedTagToken("form"))

View file

@ -28,7 +28,18 @@ asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase]) asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
invalid_unicode_re = re.compile("[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]")
invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]"
if utils.supports_lone_surrogates:
# Use one extra step of indirection and create surrogates with
# unichr. Not using this indirection would introduce an illegal
# unicode literal on platforms not supporting such lone
# surrogates.
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate +
eval('"\\uD800-\\uDFFF"'))
else:
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
@ -164,13 +175,18 @@ class HTMLUnicodeInputStream(object):
""" """
# Craziness if not utils.supports_lone_surrogates:
if len("\U0010FFFF") == 1: # Such platforms will have already checked for such
# surrogate errors, so no need to do this checking.
self.reportCharacterErrors = None
self.replaceCharactersRegexp = None
elif len("\U0010FFFF") == 1:
self.reportCharacterErrors = self.characterErrorsUCS4 self.reportCharacterErrors = self.characterErrorsUCS4
self.replaceCharactersRegexp = re.compile("[\uD800-\uDFFF]") self.replaceCharactersRegexp = re.compile(eval('"[\\uD800-\\uDFFF]"'))
else: else:
self.reportCharacterErrors = self.characterErrorsUCS2 self.reportCharacterErrors = self.characterErrorsUCS2
self.replaceCharactersRegexp = re.compile("([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])") self.replaceCharactersRegexp = re.compile(
eval('"([\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?<![\\uD800-\\uDBFF])[\\uDC00-\\uDFFF])"'))
# List of where new lines occur # List of where new lines occur
self.newLines = [0] self.newLines = [0]
@ -265,11 +281,12 @@ class HTMLUnicodeInputStream(object):
self._bufferedCharacter = data[-1] self._bufferedCharacter = data[-1]
data = data[:-1] data = data[:-1]
self.reportCharacterErrors(data) if self.reportCharacterErrors:
self.reportCharacterErrors(data)
# Replace invalid characters # Replace invalid characters
# Note U+0000 is dealt with in the tokenizer # Note U+0000 is dealt with in the tokenizer
data = self.replaceCharactersRegexp.sub("\ufffd", data) data = self.replaceCharactersRegexp.sub("\ufffd", data)
data = data.replace("\r\n", "\n") data = data.replace("\r\n", "\n")
data = data.replace("\r", "\n") data = data.replace("\r", "\n")

View file

@ -2,11 +2,26 @@ from __future__ import absolute_import, division, unicode_literals
import re import re
from xml.sax.saxutils import escape, unescape from xml.sax.saxutils import escape, unescape
from six.moves import urllib_parse as urlparse
from .tokenizer import HTMLTokenizer from .tokenizer import HTMLTokenizer
from .constants import tokenTypes from .constants import tokenTypes
content_type_rgx = re.compile(r'''
^
# Match a content type <application>/<type>
(?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
# Match any character set and encoding
(?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
|(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
# Assume the rest is data
,.*
$
''',
re.VERBOSE)
class HTMLSanitizerMixin(object): class HTMLSanitizerMixin(object):
""" sanitization of XHTML+MathML+SVG and of inline style attributes.""" """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
@ -100,8 +115,8 @@ class HTMLSanitizerMixin(object):
'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y', 'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
'y1', 'y2', 'zoomAndPan'] 'y1', 'y2', 'zoomAndPan']
attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster', attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster', 'background', 'datasrc',
'xlink:href', 'xml:base'] 'dynsrc', 'lowsrc', 'ping', 'poster', 'xlink:href', 'xml:base']
svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill', svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end', 'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
@ -138,7 +153,9 @@ class HTMLSanitizerMixin(object):
acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc', acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal', 'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag', 'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
'ssh', 'sftp', 'rtsp', 'afs'] 'ssh', 'sftp', 'rtsp', 'afs', 'data']
acceptable_content_types = ['image/png', 'image/jpeg', 'image/gif', 'image/webp', 'image/bmp', 'text/plain']
# subclasses may define their own versions of these constants # subclasses may define their own versions of these constants
allowed_elements = acceptable_elements + mathml_elements + svg_elements allowed_elements = acceptable_elements + mathml_elements + svg_elements
@ -147,6 +164,7 @@ class HTMLSanitizerMixin(object):
allowed_css_keywords = acceptable_css_keywords allowed_css_keywords = acceptable_css_keywords
allowed_svg_properties = acceptable_svg_properties allowed_svg_properties = acceptable_svg_properties
allowed_protocols = acceptable_protocols allowed_protocols = acceptable_protocols
allowed_content_types = acceptable_content_types
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
# stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
@ -189,10 +207,21 @@ class HTMLSanitizerMixin(object):
unescape(attrs[attr])).lower() unescape(attrs[attr])).lower()
# remove replacement characters from unescaped characters # remove replacement characters from unescaped characters
val_unescaped = val_unescaped.replace("\ufffd", "") val_unescaped = val_unescaped.replace("\ufffd", "")
if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and try:
(val_unescaped.split(':')[0] not in uri = urlparse.urlparse(val_unescaped)
self.allowed_protocols)): except ValueError:
uri = None
del attrs[attr] del attrs[attr]
if uri and uri.scheme:
if uri.scheme not in self.allowed_protocols:
del attrs[attr]
if uri.scheme == 'data':
m = content_type_rgx.match(uri.path)
if not m:
del attrs[attr]
elif m.group('content_type') not in self.allowed_content_types:
del attrs[attr]
for attr in self.svg_attr_val_allows_ref: for attr in self.svg_attr_val_allows_ref:
if attr in attrs: if attr in attrs:
attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
@ -245,7 +274,7 @@ class HTMLSanitizerMixin(object):
elif prop.split('-')[0].lower() in ['background', 'border', 'margin', elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
'padding']: 'padding']:
for keyword in value.split(): for keyword in value.split():
if not keyword in self.acceptable_css_keywords and \ if keyword not in self.acceptable_css_keywords and \
not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
break break
else: else:

View file

@ -1,9 +1,6 @@
from __future__ import absolute_import, division, unicode_literals from __future__ import absolute_import, division, unicode_literals
from six import text_type from six import text_type
import gettext
_ = gettext.gettext
try: try:
from functools import reduce from functools import reduce
except ImportError: except ImportError:
@ -35,7 +32,7 @@ else:
v = utils.surrogatePairToCodepoint(v) v = utils.surrogatePairToCodepoint(v)
else: else:
v = ord(v) v = ord(v)
if not v in encode_entity_map or k.islower(): if v not in encode_entity_map or k.islower():
# prefer &lt; over &LT; and similarly for &amp;, &gt;, etc. # prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
encode_entity_map[v] = k encode_entity_map[v] = k
@ -208,7 +205,7 @@ class HTMLSerializer(object):
if token["systemId"]: if token["systemId"]:
if token["systemId"].find('"') >= 0: if token["systemId"].find('"') >= 0:
if token["systemId"].find("'") >= 0: if token["systemId"].find("'") >= 0:
self.serializeError(_("System identifer contains both single and double quote characters")) self.serializeError("System identifer contains both single and double quote characters")
quote_char = "'" quote_char = "'"
else: else:
quote_char = '"' quote_char = '"'
@ -220,7 +217,7 @@ class HTMLSerializer(object):
elif type in ("Characters", "SpaceCharacters"): elif type in ("Characters", "SpaceCharacters"):
if type == "SpaceCharacters" or in_cdata: if type == "SpaceCharacters" or in_cdata:
if in_cdata and token["data"].find("</") >= 0: if in_cdata and token["data"].find("</") >= 0:
self.serializeError(_("Unexpected </ in CDATA")) self.serializeError("Unexpected </ in CDATA")
yield self.encode(token["data"]) yield self.encode(token["data"])
else: else:
yield self.encode(escape(token["data"])) yield self.encode(escape(token["data"]))
@ -231,7 +228,7 @@ class HTMLSerializer(object):
if name in rcdataElements and not self.escape_rcdata: if name in rcdataElements and not self.escape_rcdata:
in_cdata = True in_cdata = True
elif in_cdata: elif in_cdata:
self.serializeError(_("Unexpected child element of a CDATA element")) self.serializeError("Unexpected child element of a CDATA element")
for (attr_namespace, attr_name), attr_value in token["data"].items(): for (attr_namespace, attr_name), attr_value in token["data"].items():
# TODO: Add namespace support here # TODO: Add namespace support here
k = attr_name k = attr_name
@ -279,20 +276,20 @@ class HTMLSerializer(object):
if name in rcdataElements: if name in rcdataElements:
in_cdata = False in_cdata = False
elif in_cdata: elif in_cdata:
self.serializeError(_("Unexpected child element of a CDATA element")) self.serializeError("Unexpected child element of a CDATA element")
yield self.encodeStrict("</%s>" % name) yield self.encodeStrict("</%s>" % name)
elif type == "Comment": elif type == "Comment":
data = token["data"] data = token["data"]
if data.find("--") >= 0: if data.find("--") >= 0:
self.serializeError(_("Comment contains --")) self.serializeError("Comment contains --")
yield self.encodeStrict("<!--%s-->" % token["data"]) yield self.encodeStrict("<!--%s-->" % token["data"])
elif type == "Entity": elif type == "Entity":
name = token["name"] name = token["name"]
key = name + ";" key = name + ";"
if not key in entities: if key not in entities:
self.serializeError(_("Entity %s not recognized" % name)) self.serializeError("Entity %s not recognized" % name)
if self.resolve_entities and key not in xmlEntities: if self.resolve_entities and key not in xmlEntities:
data = entities[key] data = entities[key]
else: else:

View file

@ -158,7 +158,7 @@ def getDomBuilder(DomImplementation):
else: else:
# HACK: allow text nodes as children of the document node # HACK: allow text nodes as children of the document node
if hasattr(self.dom, '_child_node_types'): if hasattr(self.dom, '_child_node_types'):
if not Node.TEXT_NODE in self.dom._child_node_types: if Node.TEXT_NODE not in self.dom._child_node_types:
self.dom._child_node_types = list(self.dom._child_node_types) self.dom._child_node_types = list(self.dom._child_node_types)
self.dom._child_node_types.append(Node.TEXT_NODE) self.dom._child_node_types.append(Node.TEXT_NODE)
self.dom.appendChild(self.dom.createTextNode(data)) self.dom.appendChild(self.dom.createTextNode(data))

View file

@ -10,8 +10,12 @@ returning an iterator generating tokens.
from __future__ import absolute_import, division, unicode_literals from __future__ import absolute_import, division, unicode_literals
__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree",
"pulldom"]
import sys import sys
from .. import constants
from ..utils import default_etree from ..utils import default_etree
treeWalkerCache = {} treeWalkerCache = {}
@ -55,3 +59,89 @@ def getTreeWalker(treeType, implementation=None, **kwargs):
# XXX: NEVER cache here, caching is done in the etree submodule # XXX: NEVER cache here, caching is done in the etree submodule
return etree.getETreeModule(implementation, **kwargs).TreeWalker return etree.getETreeModule(implementation, **kwargs).TreeWalker
return treeWalkerCache.get(treeType) return treeWalkerCache.get(treeType)
def concatenateCharacterTokens(tokens):
pendingCharacters = []
for token in tokens:
type = token["type"]
if type in ("Characters", "SpaceCharacters"):
pendingCharacters.append(token["data"])
else:
if pendingCharacters:
yield {"type": "Characters", "data": "".join(pendingCharacters)}
pendingCharacters = []
yield token
if pendingCharacters:
yield {"type": "Characters", "data": "".join(pendingCharacters)}
def pprint(walker):
"""Pretty printer for tree walkers"""
output = []
indent = 0
for token in concatenateCharacterTokens(walker):
type = token["type"]
if type in ("StartTag", "EmptyTag"):
# tag name
if token["namespace"] and token["namespace"] != constants.namespaces["html"]:
if token["namespace"] in constants.prefixes:
ns = constants.prefixes[token["namespace"]]
else:
ns = token["namespace"]
name = "%s %s" % (ns, token["name"])
else:
name = token["name"]
output.append("%s<%s>" % (" " * indent, name))
indent += 2
# attributes (sorted for consistent ordering)
attrs = token["data"]
for (namespace, localname), value in sorted(attrs.items()):
if namespace:
if namespace in constants.prefixes:
ns = constants.prefixes[namespace]
else:
ns = namespace
name = "%s %s" % (ns, localname)
else:
name = localname
output.append("%s%s=\"%s\"" % (" " * indent, name, value))
# self-closing
if type == "EmptyTag":
indent -= 2
elif type == "EndTag":
indent -= 2
elif type == "Comment":
output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
elif type == "Doctype":
if token["name"]:
if token["publicId"]:
output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
(" " * indent,
token["name"],
token["publicId"],
token["systemId"] if token["systemId"] else ""))
elif token["systemId"]:
output.append("""%s<!DOCTYPE %s "" "%s">""" %
(" " * indent,
token["name"],
token["systemId"]))
else:
output.append("%s<!DOCTYPE %s>" % (" " * indent,
token["name"]))
else:
output.append("%s<!DOCTYPE >" % (" " * indent,))
elif type == "Characters":
output.append("%s\"%s\"" % (" " * indent, token["data"]))
elif type == "SpaceCharacters":
assert False, "concatenateCharacterTokens should have got rid of all Space tokens"
else:
raise ValueError("Unknown token type, %s" % type)
return "\n".join(output)

View file

@ -1,8 +1,8 @@
from __future__ import absolute_import, division, unicode_literals from __future__ import absolute_import, division, unicode_literals
from six import text_type, string_types from six import text_type, string_types
import gettext __all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN",
_ = gettext.gettext "TreeWalker", "NonRecursiveTreeWalker"]
from xml.dom import Node from xml.dom import Node
@ -58,7 +58,7 @@ class TreeWalker(object):
"namespace": to_text(namespace), "namespace": to_text(namespace),
"data": attrs} "data": attrs}
if hasChildren: if hasChildren:
yield self.error(_("Void element has children")) yield self.error("Void element has children")
def startTag(self, namespace, name, attrs): def startTag(self, namespace, name, attrs):
assert namespace is None or isinstance(namespace, string_types), type(namespace) assert namespace is None or isinstance(namespace, string_types), type(namespace)
@ -122,7 +122,7 @@ class TreeWalker(object):
return {"type": "Entity", "name": text_type(name)} return {"type": "Entity", "name": text_type(name)}
def unknown(self, nodeType): def unknown(self, nodeType):
return self.error(_("Unknown node type: ") + nodeType) return self.error("Unknown node type: " + nodeType)
class NonRecursiveTreeWalker(TreeWalker): class NonRecursiveTreeWalker(TreeWalker):

View file

@ -2,9 +2,6 @@ from __future__ import absolute_import, division, unicode_literals
from xml.dom import Node from xml.dom import Node
import gettext
_ = gettext.gettext
from . import _base from . import _base

View file

@ -7,12 +7,10 @@ except ImportError:
from ordereddict import OrderedDict from ordereddict import OrderedDict
except ImportError: except ImportError:
OrderedDict = dict OrderedDict = dict
import gettext
_ = gettext.gettext
import re import re
from six import text_type from six import string_types
from . import _base from . import _base
from ..utils import moduleFactoryFactory from ..utils import moduleFactoryFactory
@ -60,7 +58,7 @@ def getETreeBuilder(ElementTreeImplementation):
return _base.COMMENT, node.text return _base.COMMENT, node.text
else: else:
assert type(node.tag) == text_type, type(node.tag) assert isinstance(node.tag, string_types), type(node.tag)
# This is assumed to be an ordinary element # This is assumed to be an ordinary element
match = tag_regexp.match(node.tag) match = tag_regexp.match(node.tag)
if match: if match:

View file

@ -4,9 +4,6 @@ from six import text_type
from lxml import etree from lxml import etree
from ..treebuilders.etree import tag_regexp from ..treebuilders.etree import tag_regexp
from gettext import gettext
_ = gettext
from . import _base from . import _base
from .. import ihatexml from .. import ihatexml
@ -130,7 +127,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
def getNodeDetails(self, node): def getNodeDetails(self, node):
if isinstance(node, tuple): # Text node if isinstance(node, tuple): # Text node
node, key = node node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
return _base.TEXT, ensure_str(getattr(node, key)) return _base.TEXT, ensure_str(getattr(node, key))
elif isinstance(node, Root): elif isinstance(node, Root):
@ -169,7 +166,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
attrs, len(node) > 0 or node.text) attrs, len(node) > 0 or node.text)
def getFirstChild(self, node): def getFirstChild(self, node):
assert not isinstance(node, tuple), _("Text nodes have no children") assert not isinstance(node, tuple), "Text nodes have no children"
assert len(node) or node.text, "Node has no children" assert len(node) or node.text, "Node has no children"
if node.text: if node.text:
@ -180,7 +177,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
def getNextSibling(self, node): def getNextSibling(self, node):
if isinstance(node, tuple): # Text node if isinstance(node, tuple): # Text node
node, key = node node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
if key == "text": if key == "text":
# XXX: we cannot use a "bool(node) and node[0] or None" construct here # XXX: we cannot use a "bool(node) and node[0] or None" construct here
# because node[0] might evaluate to False if it has no child element # because node[0] might evaluate to False if it has no child element
@ -196,7 +193,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
def getParentNode(self, node): def getParentNode(self, node):
if isinstance(node, tuple): # Text node if isinstance(node, tuple): # Text node
node, key = node node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
if key == "text": if key == "text":
return node return node
# else: fallback to "normal" processing # else: fallback to "normal" processing

View file

@ -2,6 +2,8 @@ from __future__ import absolute_import, division, unicode_literals
from types import ModuleType from types import ModuleType
from six import text_type
try: try:
import xml.etree.cElementTree as default_etree import xml.etree.cElementTree as default_etree
except ImportError: except ImportError:
@ -9,7 +11,26 @@ except ImportError:
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
"surrogatePairToCodepoint", "moduleFactoryFactory"] "surrogatePairToCodepoint", "moduleFactoryFactory",
"supports_lone_surrogates"]
# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
# caught by the below test. In general this would be any platform
# using UTF-16 as its encoding of unicode strings, such as
# Jython. This is because UTF-16 itself is based on the use of such
# surrogates, and there is no mechanism to further escape such
# escapes.
try:
_x = eval('"\\uD800"')
if not isinstance(_x, text_type):
# We need this with u"" because of http://bugs.jython.org/issue2039
_x = eval('u"\\uD800"')
assert isinstance(_x, text_type)
except:
supports_lone_surrogates = False
else:
supports_lone_surrogates = True
class MethodDispatcher(dict): class MethodDispatcher(dict):

View file

@ -1,6 +1,6 @@
Metadata-Version: 1.1 Metadata-Version: 1.1
Name: ox Name: ox
Version: 2.1.unknown Version: 2.3.x
Summary: python-ox - the web in a dict Summary: python-ox - the web in a dict
Home-page: http://code.0x2620.org/python-ox Home-page: http://code.0x2620.org/python-ox
Author: 0x2620 Author: 0x2620

View file

@ -42,7 +42,7 @@ ox/django/api/urls.py
ox/django/api/views.py ox/django/api/views.py
ox/torrent/__init__.py ox/torrent/__init__.py
ox/torrent/bencode.py ox/torrent/bencode.py
ox/torrent/btformats.py ox/torrent/bencode3.py
ox/torrent/makemetafile.py ox/torrent/makemetafile.py
ox/web/__init__.py ox/web/__init__.py
ox/web/abebooks.py ox/web/abebooks.py
@ -74,6 +74,7 @@ ox/web/piratecinema.py
ox/web/rottentomatoes.py ox/web/rottentomatoes.py
ox/web/siteparser.py ox/web/siteparser.py
ox/web/spiegel.py ox/web/spiegel.py
ox/web/startpage.py
ox/web/thepiratebay.py ox/web/thepiratebay.py
ox/web/torrent.py ox/web/torrent.py
ox/web/tv.py ox/web/tv.py

View file

@ -1,162 +1,164 @@
../ox/image.py ../ox/__init__.py
../ox/location.py ../ox/api.py
../ox/cache.py ../ox/cache.py
../ox/net.py ../ox/file.py
../ox/utils.py ../ox/fixunicode.py
../ox/jsonc.py
../ox/normalize.py
../ox/form.py ../ox/form.py
../ox/format.py ../ox/format.py
../ox/__init__.py
../ox/movie.py
../ox/text.py
../ox/geo.py ../ox/geo.py
../ox/api.py
../ox/fixunicode.py
../ox/oembed.py
../ox/html.py ../ox/html.py
../ox/file.py ../ox/image.py
../ox/srt.py
../ox/js.py
../ox/iso.py ../ox/iso.py
../ox/django/http.py ../ox/js.py
../ox/django/utils.py ../ox/jsonc.py
../ox/django/monitor.py ../ox/location.py
../ox/movie.py
../ox/net.py
../ox/normalize.py
../ox/oembed.py
../ox/srt.py
../ox/text.py
../ox/utils.py
../ox/django/__init__.py ../ox/django/__init__.py
../ox/django/middleware.py
../ox/django/decorators.py ../ox/django/decorators.py
../ox/django/fields.py ../ox/django/fields.py
../ox/django/shortcuts.py ../ox/django/http.py
../ox/django/views.py ../ox/django/middleware.py
../ox/django/monitor.py
../ox/django/query.py ../ox/django/query.py
../ox/django/shortcuts.py
../ox/django/utils.py
../ox/django/views.py
../ox/django/widgets.py ../ox/django/widgets.py
../ox/django/api/__init__.py ../ox/django/api/__init__.py
../ox/django/api/actions.py
../ox/django/api/urls.py ../ox/django/api/urls.py
../ox/django/api/views.py ../ox/django/api/views.py
../ox/django/api/actions.py
../ox/torrent/__init__.py ../ox/torrent/__init__.py
../ox/torrent/makemetafile.py
../ox/torrent/bencode.py ../ox/torrent/bencode.py
../ox/torrent/btformats.py ../ox/torrent/bencode3.py
../ox/web/oxdb.py ../ox/torrent/makemetafile.py
../ox/web/lyricsfly.py
../ox/web/spiegel.py
../ox/web/allmovie.py
../ox/web/twitter.py
../ox/web/siteparser.py
../ox/web/ubu.py
../ox/web/epguides.py
../ox/web/__init__.py ../ox/web/__init__.py
../ox/web/archive.py
../ox/web/freebase.py
../ox/web/vimeo.py
../ox/web/thepiratebay.py
../ox/web/auth.py
../ox/web/duckduckgo.py
../ox/web/flixter.py
../ox/web/rottentomatoes.py
../ox/web/criterion.py
../ox/web/lookupbyisbn.py
../ox/web/wikipedia.py
../ox/web/abebooks.py ../ox/web/abebooks.py
../ox/web/allmovie.py
../ox/web/amazon.py ../ox/web/amazon.py
../ox/web/impawards.py
../ox/web/tv.py
../ox/web/dailymotion.py
../ox/web/movieposterdb.py
../ox/web/filmsdivision.py
../ox/web/arsenalberlin.py
../ox/web/youtube.py
../ox/web/google.py
../ox/web/itunes.py
../ox/web/piratecinema.py
../ox/web/opensubtitles.py
../ox/web/mininova.py
../ox/web/imdb.py
../ox/web/apple.py ../ox/web/apple.py
../ox/web/torrent.py ../ox/web/archive.py
../ox/web/arsenalberlin.py
../ox/web/auth.py
../ox/web/criterion.py
../ox/web/dailymotion.py
../ox/web/duckduckgo.py
../ox/web/epguides.py
../ox/web/filmsdivision.py
../ox/web/flixter.py
../ox/web/freebase.py
../ox/web/google.py
../ox/web/imdb.py
../ox/web/impawards.py
../ox/web/itunes.py
../ox/web/lookupbyisbn.py
../ox/web/lyricsfly.py
../ox/web/metacritic.py ../ox/web/metacritic.py
../ox/__pycache__/image.cpython-34.pyc ../ox/web/mininova.py
../ox/__pycache__/location.cpython-34.pyc ../ox/web/movieposterdb.py
../ox/web/opensubtitles.py
../ox/web/oxdb.py
../ox/web/piratecinema.py
../ox/web/rottentomatoes.py
../ox/web/siteparser.py
../ox/web/spiegel.py
../ox/web/startpage.py
../ox/web/thepiratebay.py
../ox/web/torrent.py
../ox/web/tv.py
../ox/web/twitter.py
../ox/web/ubu.py
../ox/web/vimeo.py
../ox/web/wikipedia.py
../ox/web/youtube.py
../ox/__pycache__/__init__.cpython-34.pyc
../ox/__pycache__/api.cpython-34.pyc
../ox/__pycache__/cache.cpython-34.pyc ../ox/__pycache__/cache.cpython-34.pyc
../ox/__pycache__/net.cpython-34.pyc ../ox/__pycache__/file.cpython-34.pyc
../ox/__pycache__/utils.cpython-34.pyc ../ox/__pycache__/fixunicode.cpython-34.pyc
../ox/__pycache__/jsonc.cpython-34.pyc
../ox/__pycache__/normalize.cpython-34.pyc
../ox/__pycache__/form.cpython-34.pyc ../ox/__pycache__/form.cpython-34.pyc
../ox/__pycache__/format.cpython-34.pyc ../ox/__pycache__/format.cpython-34.pyc
../ox/__pycache__/__init__.cpython-34.pyc
../ox/__pycache__/movie.cpython-34.pyc
../ox/__pycache__/text.cpython-34.pyc
../ox/__pycache__/geo.cpython-34.pyc ../ox/__pycache__/geo.cpython-34.pyc
../ox/__pycache__/api.cpython-34.pyc
../ox/__pycache__/fixunicode.cpython-34.pyc
../ox/__pycache__/oembed.cpython-34.pyc
../ox/__pycache__/html.cpython-34.pyc ../ox/__pycache__/html.cpython-34.pyc
../ox/__pycache__/file.cpython-34.pyc ../ox/__pycache__/image.cpython-34.pyc
../ox/__pycache__/srt.cpython-34.pyc
../ox/__pycache__/js.cpython-34.pyc
../ox/__pycache__/iso.cpython-34.pyc ../ox/__pycache__/iso.cpython-34.pyc
../ox/django/__pycache__/http.cpython-34.pyc ../ox/__pycache__/js.cpython-34.pyc
../ox/django/__pycache__/utils.cpython-34.pyc ../ox/__pycache__/jsonc.cpython-34.pyc
../ox/django/__pycache__/monitor.cpython-34.pyc ../ox/__pycache__/location.cpython-34.pyc
../ox/__pycache__/movie.cpython-34.pyc
../ox/__pycache__/net.cpython-34.pyc
../ox/__pycache__/normalize.cpython-34.pyc
../ox/__pycache__/oembed.cpython-34.pyc
../ox/__pycache__/srt.cpython-34.pyc
../ox/__pycache__/text.cpython-34.pyc
../ox/__pycache__/utils.cpython-34.pyc
../ox/django/__pycache__/__init__.cpython-34.pyc ../ox/django/__pycache__/__init__.cpython-34.pyc
../ox/django/__pycache__/middleware.cpython-34.pyc
../ox/django/__pycache__/decorators.cpython-34.pyc ../ox/django/__pycache__/decorators.cpython-34.pyc
../ox/django/__pycache__/fields.cpython-34.pyc ../ox/django/__pycache__/fields.cpython-34.pyc
../ox/django/__pycache__/shortcuts.cpython-34.pyc ../ox/django/__pycache__/http.cpython-34.pyc
../ox/django/__pycache__/views.cpython-34.pyc ../ox/django/__pycache__/middleware.cpython-34.pyc
../ox/django/__pycache__/monitor.cpython-34.pyc
../ox/django/__pycache__/query.cpython-34.pyc ../ox/django/__pycache__/query.cpython-34.pyc
../ox/django/__pycache__/shortcuts.cpython-34.pyc
../ox/django/__pycache__/utils.cpython-34.pyc
../ox/django/__pycache__/views.cpython-34.pyc
../ox/django/__pycache__/widgets.cpython-34.pyc ../ox/django/__pycache__/widgets.cpython-34.pyc
../ox/django/api/__pycache__/__init__.cpython-34.pyc ../ox/django/api/__pycache__/__init__.cpython-34.pyc
../ox/django/api/__pycache__/actions.cpython-34.pyc
../ox/django/api/__pycache__/urls.cpython-34.pyc ../ox/django/api/__pycache__/urls.cpython-34.pyc
../ox/django/api/__pycache__/views.cpython-34.pyc ../ox/django/api/__pycache__/views.cpython-34.pyc
../ox/django/api/__pycache__/actions.cpython-34.pyc
../ox/torrent/__pycache__/__init__.cpython-34.pyc ../ox/torrent/__pycache__/__init__.cpython-34.pyc
../ox/torrent/__pycache__/makemetafile.cpython-34.pyc
../ox/torrent/__pycache__/bencode.cpython-34.pyc ../ox/torrent/__pycache__/bencode.cpython-34.pyc
../ox/torrent/__pycache__/btformats.cpython-34.pyc ../ox/torrent/__pycache__/bencode3.cpython-34.pyc
../ox/web/__pycache__/oxdb.cpython-34.pyc ../ox/torrent/__pycache__/makemetafile.cpython-34.pyc
../ox/web/__pycache__/lyricsfly.cpython-34.pyc
../ox/web/__pycache__/spiegel.cpython-34.pyc
../ox/web/__pycache__/allmovie.cpython-34.pyc
../ox/web/__pycache__/twitter.cpython-34.pyc
../ox/web/__pycache__/siteparser.cpython-34.pyc
../ox/web/__pycache__/ubu.cpython-34.pyc
../ox/web/__pycache__/epguides.cpython-34.pyc
../ox/web/__pycache__/__init__.cpython-34.pyc ../ox/web/__pycache__/__init__.cpython-34.pyc
../ox/web/__pycache__/archive.cpython-34.pyc
../ox/web/__pycache__/freebase.cpython-34.pyc
../ox/web/__pycache__/vimeo.cpython-34.pyc
../ox/web/__pycache__/thepiratebay.cpython-34.pyc
../ox/web/__pycache__/auth.cpython-34.pyc
../ox/web/__pycache__/duckduckgo.cpython-34.pyc
../ox/web/__pycache__/flixter.cpython-34.pyc
../ox/web/__pycache__/rottentomatoes.cpython-34.pyc
../ox/web/__pycache__/criterion.cpython-34.pyc
../ox/web/__pycache__/lookupbyisbn.cpython-34.pyc
../ox/web/__pycache__/wikipedia.cpython-34.pyc
../ox/web/__pycache__/abebooks.cpython-34.pyc ../ox/web/__pycache__/abebooks.cpython-34.pyc
../ox/web/__pycache__/allmovie.cpython-34.pyc
../ox/web/__pycache__/amazon.cpython-34.pyc ../ox/web/__pycache__/amazon.cpython-34.pyc
../ox/web/__pycache__/impawards.cpython-34.pyc
../ox/web/__pycache__/tv.cpython-34.pyc
../ox/web/__pycache__/dailymotion.cpython-34.pyc
../ox/web/__pycache__/movieposterdb.cpython-34.pyc
../ox/web/__pycache__/filmsdivision.cpython-34.pyc
../ox/web/__pycache__/arsenalberlin.cpython-34.pyc
../ox/web/__pycache__/youtube.cpython-34.pyc
../ox/web/__pycache__/google.cpython-34.pyc
../ox/web/__pycache__/itunes.cpython-34.pyc
../ox/web/__pycache__/piratecinema.cpython-34.pyc
../ox/web/__pycache__/opensubtitles.cpython-34.pyc
../ox/web/__pycache__/mininova.cpython-34.pyc
../ox/web/__pycache__/imdb.cpython-34.pyc
../ox/web/__pycache__/apple.cpython-34.pyc ../ox/web/__pycache__/apple.cpython-34.pyc
../ox/web/__pycache__/torrent.cpython-34.pyc ../ox/web/__pycache__/archive.cpython-34.pyc
../ox/web/__pycache__/arsenalberlin.cpython-34.pyc
../ox/web/__pycache__/auth.cpython-34.pyc
../ox/web/__pycache__/criterion.cpython-34.pyc
../ox/web/__pycache__/dailymotion.cpython-34.pyc
../ox/web/__pycache__/duckduckgo.cpython-34.pyc
../ox/web/__pycache__/epguides.cpython-34.pyc
../ox/web/__pycache__/filmsdivision.cpython-34.pyc
../ox/web/__pycache__/flixter.cpython-34.pyc
../ox/web/__pycache__/freebase.cpython-34.pyc
../ox/web/__pycache__/google.cpython-34.pyc
../ox/web/__pycache__/imdb.cpython-34.pyc
../ox/web/__pycache__/impawards.cpython-34.pyc
../ox/web/__pycache__/itunes.cpython-34.pyc
../ox/web/__pycache__/lookupbyisbn.cpython-34.pyc
../ox/web/__pycache__/lyricsfly.cpython-34.pyc
../ox/web/__pycache__/metacritic.cpython-34.pyc ../ox/web/__pycache__/metacritic.cpython-34.pyc
../ox/web/__pycache__/mininova.cpython-34.pyc
../ox/web/__pycache__/movieposterdb.cpython-34.pyc
../ox/web/__pycache__/opensubtitles.cpython-34.pyc
../ox/web/__pycache__/oxdb.cpython-34.pyc
../ox/web/__pycache__/piratecinema.cpython-34.pyc
../ox/web/__pycache__/rottentomatoes.cpython-34.pyc
../ox/web/__pycache__/siteparser.cpython-34.pyc
../ox/web/__pycache__/spiegel.cpython-34.pyc
../ox/web/__pycache__/startpage.cpython-34.pyc
../ox/web/__pycache__/thepiratebay.cpython-34.pyc
../ox/web/__pycache__/torrent.cpython-34.pyc
../ox/web/__pycache__/tv.cpython-34.pyc
../ox/web/__pycache__/twitter.cpython-34.pyc
../ox/web/__pycache__/ubu.cpython-34.pyc
../ox/web/__pycache__/vimeo.cpython-34.pyc
../ox/web/__pycache__/wikipedia.cpython-34.pyc
../ox/web/__pycache__/youtube.cpython-34.pyc
./ ./
dependency_links.txt
PKG-INFO PKG-INFO
SOURCES.txt
top_level.txt
requires.txt requires.txt
dependency_links.txt
top_level.txt
SOURCES.txt

View file

@ -5,7 +5,7 @@ try:
from . import __version from . import __version
__version__ = __version.VERSION __version__ = __version.VERSION
except: except:
__version__ = '2.1.x' __version__ = '2.3.x'
from . import cache from . import cache
from . import js from . import js

View file

@ -1 +0,0 @@
VERSION="2.1.670"

View file

@ -52,6 +52,8 @@ class API(object):
def _add_action(self, action): def _add_action(self, action):
def method(self, *args, **kw): def method(self, *args, **kw):
if args and kw:
raise ValueError('pass either a dictionary or kwargs, not both')
if not kw: if not kw:
if args: if args:
kw = args[0] kw = args[0]

View file

@ -62,6 +62,9 @@ def get_headers(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
store.set(url, data, -1, url_headers) store.set(url, data, -1, url_headers)
return url_headers return url_headers
def get_json(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
return json.loads(read_url(url, data, headers, timeout).decode('utf-8'))
class InvalidResult(Exception): class InvalidResult(Exception):
"""Base class for exceptions in this module.""" """Base class for exceptions in this module."""
def __init__(self, result, headers): def __init__(self, result, headers):
@ -113,10 +116,12 @@ def read_url(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, val
result = result.decode(encoding) result = result.decode(encoding)
return result return result
get_url=read_url
def save_url(url, filename, overwrite=False): def save_url(url, filename, overwrite=False):
if not os.path.exists(filename) or overwrite: if not os.path.exists(filename) or overwrite:
dirname = os.path.dirname(filename) dirname = os.path.dirname(filename)
if not os.path.exists(dirname): if dirname and not os.path.exists(dirname):
os.makedirs(dirname) os.makedirs(dirname)
data = read_url(url) data = read_url(url)
with open(filename, 'wb') as f: with open(filename, 'wb') as f:

View file

@ -10,7 +10,7 @@ from ..shortcuts import render_to_json_response, json_response
from ...utils import json from ...utils import json
def autodiscover(): def autodiscover():
#register api actions from all installed apps # Register api actions from all installed apps
from django.utils.importlib import import_module from django.utils.importlib import import_module
from django.utils.module_loading import module_has_submodule from django.utils.module_loading import module_has_submodule
for app in settings.INSTALLED_APPS: for app in settings.INSTALLED_APPS:
@ -53,31 +53,24 @@ class ApiActions(dict):
versions = {} versions = {}
def __init__(self): def __init__(self):
def api(request): def api(request, data):
''' '''
returns list of all known api actions Returns a list of all api actions
param data { takes {
docs: bool code: boolean, // if true, return source code (optional)
} docs: boolean // if true, return doc strings (optional)
if docs is true, action properties contain docstrings }
return { returns {
status: {'code': int, 'text': string}, actions: {
data: { name: {
actions: { cache: boolean, // if false, don't cache results
'api': { code: string, // source code
cache: true, doc: string // doc strings
doc: 'recursion' },
}, ... // more actions
'hello': {
cache: true,
..
}
...
}
}
} }
}
''' '''
data = json.loads(request.POST.get('data', '{}'))
docs = data.get('docs', False) docs = data.get('docs', False)
code = data.get('code', False) code = data.get('code', False)
version = getattr(request, 'version', None) version = getattr(request, 'version', None)
@ -134,9 +127,9 @@ class ApiActions(dict):
actions = ApiActions() actions = ApiActions()
def error(request): def error(request, data):
''' '''
this action is used to test api error codes, it should return a 503 error This action is used to test API error codes. It should return a 503 error.
''' '''
success = error_is_success success = error_is_success
return render_to_json_response({}) return render_to_json_response({})

View file

@ -2,6 +2,8 @@
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from __future__ import division, with_statement from __future__ import division, with_statement
import json
from django.shortcuts import render_to_response from django.shortcuts import render_to_response
from django.template import RequestContext from django.template import RequestContext
from django.conf import settings from django.conf import settings
@ -16,7 +18,9 @@ def api(request):
'text': 'use POST'}}) 'text': 'use POST'}})
response['Access-Control-Allow-Origin'] = '*' response['Access-Control-Allow-Origin'] = '*'
return response return response
if not 'action' in request.POST: if request.META['REQUEST_METHOD'] != "POST" or (
not 'action' in request.POST and request.META.get('CONTENT_TYPE') != 'application/json'
):
methods = actions.keys() methods = actions.keys()
api = [] api = []
for f in sorted(methods): for f in sorted(methods):
@ -28,14 +32,20 @@ def api(request):
'sitename': settings.SITENAME 'sitename': settings.SITENAME
}) })
return render_to_response('api.html', context) return render_to_response('api.html', context)
action = request.POST['action'] if request.META.get('CONTENT_TYPE') == 'application/json':
r = json.loads(request.body)
action = r['action']
data = r.get('data', {})
else:
action = request.POST['action']
data = json.loads(request.POST.get('data', '{}'))
version = getattr(request, 'version', None) version = getattr(request, 'version', None)
if version: if version:
f = actions.versions.get(version, {}).get(action, actions.get(action)) f = actions.versions.get(version, {}).get(action, actions.get(action))
else: else:
f = actions.get(action) f = actions.get(action)
if f: if f:
response = f(request) response = f(request, data)
else: else:
response = render_to_json_response(json_response(status=400, response = render_to_json_response(json_response(status=400,
text='Unknown action %s' % action)) text='Unknown action %s' % action))

View file

@ -5,6 +5,7 @@ import datetime
from django.db import models from django.db import models
from django.utils import datetime_safe from django.utils import datetime_safe
from six import string_types
from ox.utils import json from ox.utils import json
@ -66,7 +67,7 @@ class DictField(models.TextField):
"""Convert our JSON object to a string before we save""" """Convert our JSON object to a string before we save"""
if value == None: if value == None:
return value return value
if isinstance(value, basestring): if isinstance(value, string_types):
value = eval(value) value = eval(value)
assert isinstance(value, dict) assert isinstance(value, dict)
value = json.dumps(value, default=to_json) value = json.dumps(value, default=to_json)
@ -92,7 +93,7 @@ class TupleField(models.TextField):
def get_db_prep_save(self, value, connection): def get_db_prep_save(self, value, connection):
"""Convert our JSON object to a string before we save""" """Convert our JSON object to a string before we save"""
if isinstance(value, basestring): if isinstance(value, string_types):
value = eval(value) value = eval(value)
if isinstance(value, list): if isinstance(value, list):
value = tuple(value) value = tuple(value)

View file

@ -3,7 +3,7 @@
import os import os
import mimetypes import mimetypes
from datetime import datetime, timedelta from datetime import datetime, timedelta
from urllib import quote from six.moves.urllib.parse import quote
from django.http import HttpResponse, Http404 from django.http import HttpResponse, Http404
from django.conf import settings from django.conf import settings
@ -26,14 +26,14 @@ def HttpFileResponse(path, content_type=None, filename=None):
url = getattr(settings, PREFIX+'_URL', '') url = getattr(settings, PREFIX+'_URL', '')
if root and path.startswith(root): if root and path.startswith(root):
path = url + path[len(root)+1:] path = url + path[len(root)+1:]
if isinstance(path, unicode): if not isinstance(path, bytes):
path = path.encode('utf-8') path = path.encode('utf-8')
response['X-Accel-Redirect'] = path response['X-Accel-Redirect'] = path
if content_type: if content_type:
response['Content-Type'] = content_type response['Content-Type'] = content_type
elif getattr(settings, 'XSENDFILE', False): elif getattr(settings, 'XSENDFILE', False):
response = HttpResponse() response = HttpResponse()
if isinstance(path, unicode): if not isinstance(path, bytes):
path = path.encode('utf-8') path = path.encode('utf-8')
response['X-Sendfile'] = path response['X-Sendfile'] = path
if content_type: if content_type:
@ -42,7 +42,7 @@ def HttpFileResponse(path, content_type=None, filename=None):
else: else:
response = HttpResponse(open(path), content_type=content_type) response = HttpResponse(open(path), content_type=content_type)
if filename: if filename:
if isinstance(filename, unicode): if not isinstance(filename, bytes):
filename = filename.encode('utf-8') filename = filename.encode('utf-8')
response['Content-Disposition'] = "attachment; filename*=UTF=8''%s" % quote(filename) response['Content-Disposition'] = "attachment; filename*=UTF=8''%s" % quote(filename)

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from shortcuts import HttpErrorJson, render_to_json_response from .shortcuts import HttpErrorJson, render_to_json_response
class ExceptionMiddleware(object): class ExceptionMiddleware(object):
def process_exception(self, request, exception): def process_exception(self, request, exception):

View file

@ -1,26 +1,27 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
from __future__ import print_function
import os import os
import sys import sys
import time
import signal import signal
import threading import threading
import atexit import atexit
import Queue from six.moves.queue import Queue
_interval = 1.0 _interval = 1.0
_times = {} _times = {}
_files = [] _files = []
_running = False _running = False
_queue = Queue.Queue() _queue = Queue()
_lock = threading.Lock() _lock = threading.Lock()
def _restart(path): def _restart(path):
_queue.put(True) _queue.put(True)
prefix = 'monitor (pid=%d):' % os.getpid() prefix = 'monitor (pid=%d):' % os.getpid()
print >> sys.stderr, '%s Change detected to \'%s\'.' % (prefix, path) print('%s Change detected to \'%s\'.' % (prefix, path), file=sys.stderr)
print >> sys.stderr, '%s Triggering process restart.' % prefix print('%s Triggering process restart.' % prefix, file=sys.stderr)
os.kill(os.getpid(), signal.SIGINT) os.kill(os.getpid(), signal.SIGINT)
def _modified(path): def _modified(path):
@ -59,7 +60,7 @@ def _monitor():
while 1: while 1:
# Check modification times on all files in sys.modules. # Check modification times on all files in sys.modules.
for module in sys.modules.values(): for module in list(sys.modules.values()):
if not hasattr(module, '__file__'): if not hasattr(module, '__file__'):
continue continue
path = getattr(module, '__file__') path = getattr(module, '__file__')

View file

@ -1,8 +1,8 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
import cookielib from six import StringIO, PY2
import urllib2 from six.moves import urllib
from StringIO import StringIO from six.moves import http_cookiejar as cookielib
from celery.utils import get_full_cls_name from celery.utils import get_full_cls_name
from celery.backends import default_backend from celery.backends import default_backend
@ -49,15 +49,15 @@ def api_proxy(request):
cj = SessionCookieJar() cj = SessionCookieJar()
if 'cj' in request.session: if 'cj' in request.session:
cj.load(request.session['cj']) cj.load(request.session['cj'])
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
opener.addheaders = [ opener.addheaders = [
('User-Agent', request.META.get('HTTP_USER_AGENT')) ('User-Agent', request.META.get('HTTP_USER_AGENT'))
] ]
form = ox.MultiPartForm() form = ox.MultiPartForm()
for key in request.POST: for key in request.POST:
form.add_field(key, request.POST[key]) form.add_field(key, request.POST[key])
r = urllib2.Request(url) r = urllib.request.Request(url)
body = str(form) body = form.body()
r.add_header('Content-type', form.get_content_type()) r.add_header('Content-type', form.get_content_type())
r.add_header('Content-length', len(body)) r.add_header('Content-length', len(body))
r.add_data(body) r.add_data(body)

View file

@ -9,6 +9,7 @@ import shutil
import struct import struct
import subprocess import subprocess
import sqlite3 import sqlite3
from distutils.spawn import find_executable
from .utils import json from .utils import json
@ -47,7 +48,7 @@ def _get_file_cache():
path = path[3:] path = path[3:]
return os.path.join(path, 'files.sqlite') return os.path.join(path, 'files.sqlite')
def cache(filename, type='oshash'): def cache(filename, type='oshash', update=False):
conn = sqlite3.connect(_get_file_cache(), timeout=10) conn = sqlite3.connect(_get_file_cache(), timeout=10)
conn.row_factory = sqlite3.Row conn.row_factory = sqlite3.Row
@ -67,11 +68,12 @@ def cache(filename, type='oshash'):
info = '' info = ''
for row in c: for row in c:
if stat.st_size == row['size'] and int(stat.st_mtime) == int(row['mtime']): if stat.st_size == row['size'] and int(stat.st_mtime) == int(row['mtime']):
value = row[type] if not update:
if value: value = row[type]
if type == 'info': if value:
value = json.loads(value) if type == 'info':
return value value = json.loads(value)
return value
h = row['oshash'] h = row['oshash']
sha1 = row['sha1'] sha1 = row['sha1']
info = row['info'] info = row['info']
@ -154,6 +156,8 @@ def avinfo(filename, cached=True):
if cached: if cached:
return cache(filename, 'info') return cache(filename, 'info')
if os.path.getsize(filename): if os.path.getsize(filename):
if find_executable('ffprobe'):
return ffprobe(filename)
ffmpeg2theora = cmd('ffmpeg2theora') ffmpeg2theora = cmd('ffmpeg2theora')
p = subprocess.Popen([ffmpeg2theora], stdout=subprocess.PIPE, stderr=subprocess.PIPE) p = subprocess.Popen([ffmpeg2theora], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
info, error = p.communicate() info, error = p.communicate()
@ -219,62 +223,71 @@ def ffprobe(filename):
return value return value
info = {} info = {}
for key in ('duration', 'size', 'bit_rate'): if not 'format' in ffinfo:
info[{ info['error'] = 'badfile'
'bit_rate': 'bitrate' else:
}.get(key, key)] = fix_value(key, ffinfo['format'][key]) for key in ('duration', 'size', 'bit_rate'):
info['audio'] = [] if key in ffinfo['format']:
info['video'] = [] info[{
info['metadata'] = ffinfo['format'].get('tags', {}) 'bit_rate': 'bitrate'
for s in ffinfo['streams']: }.get(key, key)] = fix_value(key, ffinfo['format'][key])
tags = s.pop('tags', {}) info['audio'] = []
language = None info['video'] = []
for t in tags: info['metadata'] = ffinfo['format'].get('tags', {})
if t == 'language': for s in ffinfo['streams']:
language = tags[t] tags = s.pop('tags', {})
else: language = None
info['metadata'][t] = tags[t] for t in tags:
if s.get('codec_type') in ('audio', 'video'): if t == 'language':
stream = {} language = tags[t]
if language and language != 'und': else:
stream['language'] = language info['metadata'][t] = tags[t]
keys = [ if s.get('codec_type') in ('audio', 'video'):
'codec_name', stream = {}
'width', if language and language != 'und':
'height', stream['language'] = language
'bit_rate', keys = [
'index', 'codec_name',
'display_aspect_ratio', 'width',
'sample_rate', 'height',
'channels', 'bit_rate',
] 'index',
if s['codec_type'] == 'video': 'display_aspect_ratio',
keys += [ 'sample_rate',
'sample_aspect_ratio', 'channels',
'r_frame_rate',
'pix_fmt',
] ]
if s['codec_type'] == 'video':
keys += [
'sample_aspect_ratio',
'r_frame_rate',
'pix_fmt',
]
for key in keys: for key in keys:
if key in s: if key in s:
stream[{ stream[{
'codec_name': 'codec', 'codec_name': 'codec',
'bit_rate': 'bitrate', 'bit_rate': 'bitrate',
'index': 'id', 'index': 'id',
'r_frame_rate': 'framerate', 'r_frame_rate': 'framerate',
'sample_rate': 'samplerate', 'sample_rate': 'samplerate',
'pix_fmt': 'pixel_format', 'pix_fmt': 'pixel_format',
}.get(key, key)] = fix_value(key, s[key]) 'sample_aspect_ratio': 'pixel_aspect_ratio',
info[s['codec_type']].append(stream) }.get(key, key)] = fix_value(key, s[key])
else: info[s['codec_type']].append(stream)
pass else:
#print s pass
for v in info['video']: #print s
if not 'display_aspect_ratio' in v and 'width' in v: for v in info['video']:
v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height']) k = 'display_aspect_ratio'
v['pixel_aspect_ratio'] = '1:1' if not k in v and 'width' in v \
or (k in v and v[k] == '0:1'):
v[k] = '%d:%d' % (v['width'], v['height'])
v['pixel_aspect_ratio'] = '1:1'
info['oshash'] = oshash(filename) info['oshash'] = oshash(filename)
info['path'] = os.path.basename(filename) info['path'] = filename
if not 'size' in info:
info['size'] = os.path.getsize(filename)
return info return info
def makedirs(path): def makedirs(path):

View file

@ -6,7 +6,7 @@ from __future__ import print_function
import unicodedata import unicodedata
from six import unichr from six import unichr, PY3
__all__ = ['fix_bad_unicode'] __all__ = ['fix_bad_unicode']
@ -75,7 +75,7 @@ def fix_bad_unicode(text):
>>> fix_bad_unicode('This text was never Unicode at all\x85') >>> fix_bad_unicode('This text was never Unicode at all\x85')
'This text was never Unicode at all…' 'This text was never Unicode at all…'
""" """
if not isinstance(text, str): if isinstance(text, bytes):
raise TypeError("This isn't even decoded into Unicode yet. " raise TypeError("This isn't even decoded into Unicode yet. "
"Decode it first.") "Decode it first.")
if len(text) == 0: if len(text) == 0:
@ -151,7 +151,10 @@ def text_badness(text):
- Improbable single-byte characters, such as ƒ or ¬ - Improbable single-byte characters, such as ƒ or ¬
- Letters in somewhat rare scripts - Letters in somewhat rare scripts
''' '''
assert isinstance(text, str) if PY3:
assert isinstance(text, str)
else:
assert isinstance(text, unicode)
errors = 0 errors = 0
very_weird_things = 0 very_weird_things = 0
weird_things = 0 weird_things = 0

View file

@ -68,7 +68,7 @@ class MultiPartForm(object):
return body return body
def body(self): def body(self):
"""Return a string representing the form data, including attached files.""" """Return a byte string representing the form data, including attached files."""
# Build a list of lists, each containing "lines" of the # Build a list of lists, each containing "lines" of the
# request. Each part is separated by a boundary string. # request. Each part is separated by a boundary string.
# Once the list is built, return a string where each # Once the list is built, return a string where each

View file

@ -30,6 +30,8 @@ def toAZ(num):
az = digits[r] + az az = digits[r] + az
return az return az
encode_base26=toAZ
def fromAZ(num): def fromAZ(num):
""" """
Converts a bijective base 26 string to an integer Converts a bijective base 26 string to an integer
@ -71,6 +73,8 @@ def to26(q):
converted.insert(0, l) converted.insert(0, l)
return "".join(converted) or 'A' return "".join(converted) or 'A'
decode_base26=toAZ
def from26(q): def from26(q):
""" """
Converts an base 26 string to an integer Converts an base 26 string to an integer
@ -402,6 +406,37 @@ def format_duration(ms, verbosity=0, years=True, hours=True, milliseconds=True):
duration = ' '.join(durations) duration = ' '.join(durations)
return duration return duration
def format_timecode(seconds):
'''
>>> format_timecode(3599.999)
'00:59:59.999'
'''
seconds = float(seconds)
d = int(seconds / 86400)
h = int(seconds % 86400 / 3600)
m = int(seconds % 3600 / 60)
s = float(seconds % 60)
duration = "%s%02d:%02d:%06.3f" % ('%d:' % d if d else '', h, m, s)
return duration
def parse_timecode(string):
'''
Takes a formatted timecode, returns seconds
>> parse_timecode('1:02:03:04.05')
93784.05
>> parse_timecode('3')
3.0
>> parse_timecode('2:')
120
>> parse_timecode('1::')
3600.0
'''
timecode = 0
for i, v in enumerate(list(reversed(string.split(':')))[:4]):
timecode += float(v) * ( 86400 if i == 3 else pow(60, i))
return timecode
def ms2runtime(ms, shortenLong=False): def ms2runtime(ms, shortenLong=False):
# deprecated - use format_duration # deprecated - use format_duration
''' '''

View file

@ -259,6 +259,10 @@ def sanitize_html(html, tags=None, global_attributes=[]):
{'name': 'li'}, {'name': 'li'},
{'name': 'ol'}, {'name': 'ol'},
{'name': 'ul'}, {'name': 'ul'},
# definition lists
{'name': 'dl'},
{'name': 'dt'},
{'name': 'dd'},
# tables # tables
{'name': 'table'}, {'name': 'table'},
{'name': 'tbody'}, {'name': 'tbody'},

View file

@ -25,7 +25,13 @@ def drawText(image, position, text, font_file, font_size, color):
draw = ImageDraw.Draw(image) draw = ImageDraw.Draw(image)
font = ImageFont.truetype(font_file, font_size, encoding='unic') font = ImageFont.truetype(font_file, font_size, encoding='unic')
draw.text(position, text, fill=color, font=font) draw.text(position, text, fill=color, font=font)
return draw.textsize(text, font=font) size = draw.textsize(text, font=font)
version = getattr(Image, 'PILLOW_VERSION', None)
if version and version > '2.1.0' and version < '2.6.1':
offset = font.getoffset(text)
else:
offset = (0, 0)
return (size[0] + offset[0], size[1] + offset[1])
def getHSL(rgb): def getHSL(rgb):
rgb = [x / 255 for x in rgb] rgb = [x / 255 for x in rgb]
@ -141,7 +147,13 @@ def getRGB(hsl):
def getTextSize(image, text, font_file, font_size): def getTextSize(image, text, font_file, font_size):
draw = ImageDraw.Draw(image) draw = ImageDraw.Draw(image)
font = ImageFont.truetype(font_file, font_size, encoding='unic') font = ImageFont.truetype(font_file, font_size, encoding='unic')
return draw.textsize(text, font=font) size = draw.textsize(text, font=font)
version = getattr(Image, 'PILLOW_VERSION', None)
if version and version > '2.1.0' and version < '2.6.1':
offset = font.getoffset(text)
else:
offset = (0, 0)
return (size[0] + offset[0], size[1] + offset[1])
def wrapText(text, max_width, max_lines, font_file, font_size): def wrapText(text, max_width, max_lines, font_file, font_size):
# wraps text to max_width and max_lines # wraps text to max_width and max_lines

View file

@ -29,7 +29,7 @@ def format_path(data, directory_key='director'):
director = data['directorSort'] or ['Unknown Director'] director = data['directorSort'] or ['Unknown Director']
title = data['seriesTitle' if data['isEpisode'] else 'title'] or 'Untitled' title = data['seriesTitle' if data['isEpisode'] else 'title'] or 'Untitled'
year = data['seriesYear' if data['isEpisode'] else 'year'] or None year = data['seriesYear' if data['isEpisode'] else 'year'] or None
parts = map(format_underscores, filter(lambda x: x != None, [ parts = list(map(format_underscores, filter(lambda x: x != None, [
u'; '.join(director[:10]), u'; '.join(director[:10]),
u'%s%s' % (title, u' (%s)' % year if year else ''), u'%s%s' % (title, u' (%s)' % year if year else ''),
u'%s%s%s%s%s%s' % ( u'%s%s%s%s%s%s' % (
@ -40,7 +40,7 @@ def format_path(data, directory_key='director'):
u'.%s' % data['language'] if data['language'] else '', u'.%s' % data['language'] if data['language'] else '',
u'.%s' % data['extension'] if data['extension'] else '' u'.%s' % data['extension'] if data['extension'] else ''
) )
])) ])))
if data.get('subdirectory'): if data.get('subdirectory'):
parts.insert(-1, data['subdirectory']) parts.insert(-1, data['subdirectory'])
return unicodedata.normalize('NFD', u'/'.join(parts)) return unicodedata.normalize('NFD', u'/'.join(parts))
@ -188,8 +188,6 @@ def parse_path(path, directory_key='director'):
# TODO: '.com.avi' # TODO: '.com.avi'
''' '''
def parse_title(string):
return title, year
def parse_type(string): def parse_type(string):
for type in EXTENSIONS: for type in EXTENSIONS:
if string in EXTENSIONS[type]: if string in EXTENSIONS[type]:
@ -210,7 +208,7 @@ def parse_path(path, directory_key='director'):
string = re.sub('(?<=\w)_ ', ': ', string) string = re.sub('(?<=\w)_ ', ': ', string)
return string return string
data = {} data = {}
parts = map(lambda x: parse_underscores(x.strip()), path.split('/')) parts = list(map(lambda x: parse_underscores(x.strip()), unicodedata.normalize('NFD', path).split('/')))
# subdirectory # subdirectory
if len(parts) > 4: if len(parts) > 4:
data['subdirectory'] = '/'.join(parts[3:-1]) data['subdirectory'] = '/'.join(parts[3:-1])
@ -226,14 +224,14 @@ def parse_path(path, directory_key='director'):
# directorSort, director # directorSort, director
data['directorSort'] = data['director'] = [] data['directorSort'] = data['director'] = []
if director: if director:
data['directorSort'] = filter( data['directorSort'] = list(filter(
lambda x: x != 'Unknown Director', lambda x: x != 'Unknown Director',
director.split('; ') director.split('; ')
) ))
data['director'] = map( data['director'] = list(map(
lambda x: ' '.join(reversed(x.split(', '))), lambda x: ' '.join(reversed(x.split(', '))),
data['directorSort'] data['directorSort']
) ))
# title, year # title, year
data['title'] = data['year'] = None data['title'] = data['year'] = None
if title: if title:
@ -327,7 +325,7 @@ def parse_movie_path(path):
""" """
episodeTitle = episodeYear = seriesTitle = None episodeTitle = episodeYear = seriesTitle = None
episodeDirector = [] episodeDirector = []
parts = path.split('/') parts = unicodedata.normalize('NFD', path).split('/')
#title/year #title/year
if len(parts) == 4: if len(parts) == 4:

View file

@ -2,20 +2,21 @@
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
# GPL 2008 # GPL 2008
from __future__ import with_statement, print_function from __future__ import with_statement, print_function
import os
import gzip import gzip
import json
import os
import re import re
from six import BytesIO, PY3
import struct import struct
from six.moves import urllib
from six import BytesIO, PY3
from six.moves import urllib
from chardet.universaldetector import UniversalDetector from chardet.universaldetector import UniversalDetector
DEBUG = False DEBUG = False
# Default headers for HTTP requests. # Default headers for HTTP requests.
DEFAULT_HEADERS = { DEFAULT_HEADERS = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:28.0) Gecko/20100101 Firefox/28.0', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-us,en;q=0.5', 'Accept-Language': 'en-us,en;q=0.5',
@ -47,9 +48,16 @@ def get_headers(url, data=None, headers=DEFAULT_HEADERS):
headers = e.headers headers = e.headers
return dict(headers) return dict(headers)
def get_json(url, data=None, headers=DEFAULT_HEADERS):
return json.loads(read_url(url, data, headers).decode('utf-8'))
def open_url(url, data=None, headers=DEFAULT_HEADERS): def open_url(url, data=None, headers=DEFAULT_HEADERS):
if isinstance(url, bytes): if PY3:
url = url.decode('utf-8') if isinstance(url, bytes):
url = url.decode('utf-8')
else:
if not isinstance(url, bytes):
url = url.encode('utf-8')
url = url.replace(' ', '%20') url = url.replace(' ', '%20')
if data and PY3 and not isinstance(data, bytes): if data and PY3 and not isinstance(data, bytes):
data = data.encode('utf-8') data = data.encode('utf-8')
@ -100,10 +108,12 @@ def detect_encoding(data):
detector.close() detector.close()
return detector.result['encoding'] return detector.result['encoding']
get_url=read_url
def save_url(url, filename, overwrite=False): def save_url(url, filename, overwrite=False):
if not os.path.exists(filename) or overwrite: if not os.path.exists(filename) or overwrite:
dirname = os.path.dirname(filename) dirname = os.path.dirname(filename)
if not os.path.exists(dirname): if dirname and not os.path.exists(dirname):
os.makedirs(dirname) os.makedirs(dirname)
data = read_url(url) data = read_url(url)
with open(filename, 'wb') as f: with open(filename, 'wb') as f:
@ -135,8 +145,9 @@ def oshash(url):
if filesize > 65536: if filesize > 65536:
tail = get_range(url, filesize-65536, filesize) tail = get_range(url, filesize-65536, filesize)
if filesize < 65536: if filesize < 65536:
for offset in range(0, filesize, bytesize): f = BytesIO(head)
buffer = head[offset:offset+bytesize] for x in range(int(filesize/bytesize)):
buffer = f.read(bytesize)
(l_value,)= struct.unpack(longlongformat, buffer) (l_value,)= struct.unpack(longlongformat, buffer)
hash += l_value hash += l_value
hash = hash & 0xFFFFFFFFFFFFFFFF #cut off 64bit overflow hash = hash & 0xFFFFFFFFFFFFFFFF #cut off 64bit overflow

View file

@ -87,6 +87,7 @@ UA_REGEXPS = {
'(Camino)\/(\d+)', '(Camino)\/(\d+)',
'(Chimera)\/(\d+)', '(Chimera)\/(\d+)',
'(chromeframe)\/(\d+)', '(chromeframe)\/(\d+)',
'(Edge)\/(\d+)',
'(Epiphany)\/(\d+)', # before Chrome, Chromium and Safari '(Epiphany)\/(\d+)', # before Chrome, Chromium and Safari
'(Chromium)\/(\d+)', # before Chrome '(Chromium)\/(\d+)', # before Chrome
'(Chrome)\/(\d+)', '(Chrome)\/(\d+)',
@ -178,6 +179,7 @@ UA_VERSIONS = {
'10.8': '10.8 (Mountain Lion)', '10.8': '10.8 (Mountain Lion)',
'10.9': '10.9 (Mavericks)', '10.9': '10.9 (Mavericks)',
'10.10': '10.10 (Yosemite)', '10.10': '10.10 (Yosemite)',
'10.11': '10.11 (El Capitan)',
'40': 'Series 40', '40': 'Series 40',
'60': 'Series 60', '60': 'Series 60',
'NT 3.1': 'NT 3.1 (3.1)', 'NT 3.1': 'NT 3.1 (3.1)',
@ -192,6 +194,7 @@ UA_VERSIONS = {
'NT 6.1': 'NT 6.1 (7)', 'NT 6.1': 'NT 6.1 (7)',
'NT 6.2': 'NT 6.2 (8)', 'NT 6.2': 'NT 6.2 (8)',
'NT 6.3': 'NT 6.3 (8.1)', 'NT 6.3': 'NT 6.3 (8.1)',
'NT 6.4': 'NT 6.4 (10)',
'16': 'NT 3.1 (3.1)', '16': 'NT 3.1 (3.1)',
'3.1': 'NT 3.1 (3.1)', '3.1': 'NT 3.1 (3.1)',
'95': 'NT 4.0 (95)', '95': 'NT 4.0 (95)',
@ -254,6 +257,8 @@ def get_sort_name(name):
last_names = [] last_names = []
if re.search('^[0-9]+$', first_names[-1]): if re.search('^[0-9]+$', first_names[-1]):
add_name() add_name()
if re.search('[(\[].+?[)\]]$', first_names[-1]):
add_name()
if find_name(SUFFIXES): if find_name(SUFFIXES):
add_name() add_name()
add_name() add_name()

View file

@ -5,15 +5,19 @@
from threading import Event from threading import Event
from hashlib import sha1 from hashlib import sha1
import os import os
from six import PY2
from .bencode import bencode, bdecode if PY2:
from .bencode import bencode, bdecode
else:
from .bencode3 import bencode, bdecode
__all__ = ['create_torrent', 'get_info_hash', 'get_torrent_info', 'get_files', 'get_torrent_size'] __all__ = ['create_torrent', 'get_info_hash', 'get_torrent_info', 'get_files', 'get_torrent_size']
def create_torrent(file, url, params = {}, flag = Event(), def create_torrent(file, url, params = {}, flag = Event(),
progress = lambda x: None, progress_percent = 1): progress = lambda x: None, progress_percent = 1):
"Creates a torrent for a given file, using url as tracker url" "Creates a torrent for a given file, using url as tracker url"
from makemetafile import make_meta_file from .makemetafile import make_meta_file
return make_meta_file(file, url, params, flag, progress, progress_percent) return make_meta_file(file, url, params, flag, progress, progress_percent)
def get_info_hash(torrentFile): def get_info_hash(torrentFile):

View file

@ -0,0 +1,151 @@
##
#
# bencode.py python3 compatable bencode / bdecode
#
##
def _decode_int(data):
"""
decode integer from bytearray
return int, remaining data
"""
data = data[1:]
end = data.index(b'e')
return int(data[:end],10), data[end+1:]
def _decode_str(data):
"""
decode string from bytearray
return string, remaining data
"""
start = data.index(b':')
l = int(data[:start].decode(),10)
if l <= 0:
raise Exception('invalid string size: %d'%d)
start += 1
ret = bytes(data[start:start+l])
data = data[start+l:]
return ret, data
def _decode_list(data):
"""
decode list from bytearray
return list, remaining data
"""
ls = []
data = data[1:]
while data[0] != ord(b'e'):
elem, data = _decode(data)
ls.append(elem)
return ls, data[1:]
def _decode_dict(data):
"""
decode dict from bytearray
return dict, remaining data
"""
d = {}
data = data[1:]
while data[0] != ord(b'e'):
k, data = _decode_str(data)
v, data = _decode(data)
d[k.decode()] = v
return d, data[1:]
def _decode(data):
"""
decode a bytearray
return deserialized object, remaining data
"""
ch = chr(data[0])
if ch == 'l':
return _decode_list(data)
elif ch == 'i':
return _decode_int(data)
elif ch == 'd':
return _decode_dict(data)
elif ch.isdigit():
return _decode_str(data)
else:
raise Exception('could not deserialize data: %s'%data)
def bdecode(data):
"""
decode a bytearray
return deserialized object
"""
obj , data = _decode(data)
if len(data) > 0:
raise Exception('failed to deserialize, extra data: %s'%data)
return obj
def _encode_str(s,buff):
"""
encode string to a buffer
"""
s = bytearray(s)
l = len(s)
buff.append(bytearray(str(l)+':','utf-8'))
buff.append(s)
def _encode_int(i,buff):
"""
encode integer to a buffer
"""
buff.append(b'i')
buff.append(bytearray(str(i),'ascii'))
buff.append(b'e')
def _encode_list(l,buff):
"""
encode list of elements to a buffer
"""
buff.append(b'l')
for i in l:
_encode(i,buff)
buff.append(b'e')
def _encode_dict(d,buff):
"""
encode dict
"""
buff.append(b'd')
l = list(d.keys())
l.sort()
for k in l:
_encode(str(k),buff)
_encode(d[k],buff)
buff.append(b'e')
def _encode(obj,buff):
"""
encode element obj to a buffer buff
"""
if isinstance(obj,str):
_encode_str(bytearray(obj,'utf-8'),buff)
elif isinstance(obj,bytes):
_encode_str(bytearray(obj),buff)
elif isinstance(obj,bytearray):
_encode_str(obj,buff)
elif str(obj).isdigit():
_encode_int(obj,buff)
elif isinstance(obj,list):
_encode_list(obj,buff)
elif hasattr(obj,'keys') and hasattr(obj,'values'):
_encode_dict(obj,buff)
elif str(obj) in ['True','False']:
_encode_int(int(obj and '1' or '0'),buff)
else:
raise Exception('non serializable object: %s'%obj)
def bencode(obj):
"""
bencode element, return bytearray
"""
buff = []
_encode(obj,buff)
ret = bytearray()
for ba in buff:
ret += ba
return bytes(ret)

View file

@ -1,100 +0,0 @@
# Written by Bram Cohen
# see LICENSE.txt for license information
from types import StringType, LongType, IntType, ListType, DictType
from re import compile
reg = compile(r'^[^/\\.~][^/\\]*$')
ints = (LongType, IntType)
def check_info(info):
if type(info) != DictType:
raise ValueError, 'bad metainfo - not a dictionary'
pieces = info.get('pieces')
if type(pieces) != StringType or len(pieces) % 20 != 0:
raise ValueError, 'bad metainfo - bad pieces key'
piecelength = info.get('piece length')
if type(piecelength) not in ints or piecelength <= 0:
raise ValueError, 'bad metainfo - illegal piece length'
name = info.get('name')
if type(name) != StringType:
raise ValueError, 'bad metainfo - bad name'
if not reg.match(name):
raise ValueError, 'name %s disallowed for security reasons' % name
if info.has_key('files') == info.has_key('length'):
raise ValueError, 'single/multiple file mix'
if info.has_key('length'):
length = info.get('length')
if type(length) not in ints or length < 0:
raise ValueError, 'bad metainfo - bad length'
else:
files = info.get('files')
if type(files) != ListType:
raise ValueError
for f in files:
if type(f) != DictType:
raise ValueError, 'bad metainfo - bad file value'
length = f.get('length')
if type(length) not in ints or length < 0:
raise ValueError, 'bad metainfo - bad length'
path = f.get('path')
if type(path) != ListType or path == []:
raise ValueError, 'bad metainfo - bad path'
for p in path:
if type(p) != StringType:
raise ValueError, 'bad metainfo - bad path dir'
if not reg.match(p):
raise ValueError, 'path %s disallowed for security reasons' % p
for i in xrange(len(files)):
for j in xrange(i):
if files[i]['path'] == files[j]['path']:
raise ValueError, 'bad metainfo - duplicate path'
def check_message(message):
if type(message) != DictType:
raise ValueError
check_info(message.get('info'))
if type(message.get('announce')) != StringType:
raise ValueError
def check_peers(message):
if type(message) != DictType:
raise ValueError
if message.has_key('failure reason'):
if type(message['failure reason']) != StringType:
raise ValueError
return
peers = message.get('peers')
if type(peers) == ListType:
for p in peers:
if type(p) != DictType:
raise ValueError
if type(p.get('ip')) != StringType:
raise ValueError
port = p.get('port')
if type(port) not in ints or p <= 0:
raise ValueError
if p.has_key('peer id'):
id = p['peer id']
if type(id) != StringType or len(id) != 20:
raise ValueError
elif type(peers) != StringType or len(peers) % 6 != 0:
raise ValueError
interval = message.get('interval', 1)
if type(interval) not in ints or interval <= 0:
raise ValueError
minint = message.get('min interval', 1)
if type(minint) not in ints or minint <= 0:
raise ValueError
if type(message.get('tracker id', '')) != StringType:
raise ValueError
npeers = message.get('num peers', 0)
if type(npeers) not in ints or npeers < 0:
raise ValueError
dpeers = message.get('done peers', 0)
if type(dpeers) not in ints or dpeers < 0:
raise ValueError
last = message.get('last', 0)
if type(last) not in ints or last < 0:
raise ValueError

View file

@ -6,9 +6,13 @@ from os.path import getsize, split, join, abspath, isdir
from os import listdir from os import listdir
from hashlib import sha1 as sha from hashlib import sha1 as sha
from copy import copy from copy import copy
from string import strip import re
from bencode import bencode
from btformats import check_info from six import PY2
if PY2:
from .bencode import bencode
else:
from .bencode3 import bencode
from threading import Event from threading import Event
from time import time from time import time
from traceback import print_exc from traceback import print_exc
@ -57,14 +61,63 @@ def print_announcelist_details():
print ('') print ('')
print (' httpseeds = optional list of http-seed URLs, in the format:') print (' httpseeds = optional list of http-seed URLs, in the format:')
print (' url[|url...]') print (' url[|url...]')
reg = re.compile(r'^[^/\\.~][^/\\]*$')
def is_number(value):
return isinstance(value, int) or isinstance(value,float)
def check_info(info):
if not isinstance(info, dict):
raise ValueError('bad metainfo - not a dictionary')
pieces = info.get('pieces')
if not isinstance(pieces, bytes) or len(pieces) % 20 != 0:
raise ValueError('bad metainfo - bad pieces key')
piecelength = info.get('piece length')
if not is_number(piecelength) or piecelength <= 0:
raise ValueError('bad metainfo - illegal piece length')
name = info.get('name')
if not isinstance(name, bytes):
raise ValueError('bad metainfo - bad name')
if not reg.match(name.decode('utf-8')):
raise ValueError('name %s disallowed for security reasons' % name)
if ('files' in info) == ('length' in info):
raise ValueError('single/multiple file mix')
if 'length' in info:
length = info.get('length')
if not is_number(length) or length < 0:
raise ValueError('bad metainfo - bad length')
else:
files = info.get('files')
if not isinstance(files, list):
raise ValueError
for f in files:
if not isinstance(f, dict):
raise ValueError('bad metainfo - bad file value')
length = f.get('length')
if not is_number(length) or length < 0:
raise ValueError('bad metainfo - bad length')
path = f.get('path')
if not isinstance(path, list) or path == []:
raise ValueError('bad metainfo - bad path')
for p in path:
if not isinstance(p, bytes):
raise ValueError('bad metainfo - bad path dir')
if not reg.match(p.decode('utf-8')):
raise ValueError('path %s disallowed for security reasons' % p)
for i in range(len(files)):
for j in range(i):
if files[i]['path'] == files[j]['path']:
raise ValueError('bad metainfo - duplicate path')
def make_meta_file(file, url, params = {}, flag = Event(), def make_meta_file(file, url, params = {}, flag = Event(),
progress = lambda x: None, progress_percent = 1): progress = lambda x: None, progress_percent = 1):
if params.has_key('piece_size_pow2'): if 'piece_size_pow2' in params:
piece_len_exp = params['piece_size_pow2'] piece_len_exp = params['piece_size_pow2']
else: else:
piece_len_exp = default_piece_len_exp piece_len_exp = default_piece_len_exp
if params.has_key('target') and params['target'] != '': if 'target' in params and params['target'] != '':
f = params['target'] f = params['target']
else: else:
a, b = split(file) a, b = split(file)
@ -75,7 +128,7 @@ def make_meta_file(file, url, params = {}, flag = Event(),
if piece_len_exp == 0: # automatic if piece_len_exp == 0: # automatic
size = calcsize(file) size = calcsize(file)
if size > 8L*1024*1024*1024: # > 8 gig = if size > 8*1024*1024*1024: # > 8 gig =
piece_len_exp = 21 # 2 meg pieces piece_len_exp = 21 # 2 meg pieces
elif size > 2*1024*1024*1024: # > 2 gig = elif size > 2*1024*1024*1024: # > 2 gig =
piece_len_exp = 20 # 1 meg pieces piece_len_exp = 20 # 1 meg pieces
@ -92,7 +145,7 @@ def make_meta_file(file, url, params = {}, flag = Event(),
piece_length = 2 ** piece_len_exp piece_length = 2 ** piece_len_exp
encoding = None encoding = None
if params.has_key('filesystem_encoding'): if 'filesystem_encoding' in params:
encoding = params['filesystem_encoding'] encoding = params['filesystem_encoding']
if not encoding: if not encoding:
encoding = ENCODING encoding = ENCODING
@ -103,29 +156,29 @@ def make_meta_file(file, url, params = {}, flag = Event(),
if flag.isSet(): if flag.isSet():
return return
check_info(info) check_info(info)
h = open(f, 'wb') h = open(f.encode(encoding), 'wb')
data = {'info': info, 'announce': strip(url), 'creation date': long(time())} data = {'info': info, 'announce': url.strip(), 'creation date': int(time())}
if params.has_key('comment') and params['comment']: if 'comment' in params and params['comment']:
data['comment'] = params['comment'] data['comment'] = params['comment']
if params.has_key('real_announce_list'): # shortcut for progs calling in from outside if 'real_announce_list' in params: # shortcut for progs calling in from outside
data['announce-list'] = params['real_announce_list'] data['announce-list'] = params['real_announce_list']
elif params.has_key('announce_list') and params['announce_list']: elif 'announce_list' in params and params['announce_list']:
l = [] l = []
for tier in params['announce_list'].split('|'): for tier in params['announce_list'].split('|'):
l.append(tier.split(',')) l.append(tier.split(','))
data['announce-list'] = l data['announce-list'] = l
if params.has_key('real_httpseeds'): # shortcut for progs calling in from outside if 'real_httpseeds' in params: # shortcut for progs calling in from outside
data['httpseeds'] = params['real_httpseeds'] data['httpseeds'] = params['real_httpseeds']
elif params.has_key('httpseeds') and params['httpseeds']: elif 'httpseeds' in params and params['httpseeds']:
data['httpseeds'] = params['httpseeds'].split('|') data['httpseeds'] = params['httpseeds'].split('|')
if params.has_key('url-list') and params['url-list']: if 'url-list' in params and params['url-list']:
data['url-list'] = params['url-list'].split('|') data['url-list'] = params['url-list'].split('|')
if params.has_key('playtime') and params['playtime']: if 'playtime' in params and params['playtime']:
data['info']['playtime'] = params['playtime'] data['info']['playtime'] = params['playtime']
h.write(bencode(data)) h.write(bencode(data))
@ -134,7 +187,7 @@ def make_meta_file(file, url, params = {}, flag = Event(),
def calcsize(file): def calcsize(file):
if not isdir(file): if not isdir(file):
return getsize(file) return getsize(file)
total = 0L total = 0
for s in subfiles(abspath(file)): for s in subfiles(abspath(file)):
total += getsize(s[1]) total += getsize(s[1])
return total return total
@ -151,8 +204,8 @@ def uniconvertl(l, e):
def uniconvert(s, e): def uniconvert(s, e):
try: try:
if s.__class__.__name__ != 'unicode': if isinstance(s, bytes):
s = unicode(s,e) s = s.decode(e)
except UnicodeError: except UnicodeError:
raise UnicodeError('bad filename: '+s) raise UnicodeError('bad filename: '+s)
return s.encode('utf-8') return s.encode('utf-8')
@ -164,15 +217,15 @@ def makeinfo(file, piece_length, encoding, flag, progress, progress_percent=1):
subs.sort() subs.sort()
pieces = [] pieces = []
sh = sha() sh = sha()
done = 0L done = 0
fs = [] fs = []
totalsize = 0.0 totalsize = 0.0
totalhashed = 0L totalhashed = 0
for p, f in subs: for p, f in subs:
totalsize += getsize(f) totalsize += getsize(f)
for p, f in subs: for p, f in subs:
pos = 0L pos = 0
size = getsize(f) size = getsize(f)
fs.append({'length': size, 'path': uniconvertl(p, encoding)}) fs.append({'length': size, 'path': uniconvertl(p, encoding)})
h = open(f, 'rb') h = open(f, 'rb')
@ -196,13 +249,13 @@ def makeinfo(file, piece_length, encoding, flag, progress, progress_percent=1):
h.close() h.close()
if done > 0: if done > 0:
pieces.append(sh.digest()) pieces.append(sh.digest())
return {'pieces': ''.join(pieces), return {'pieces': b''.join(pieces),
'piece length': piece_length, 'files': fs, 'piece length': piece_length, 'files': fs,
'name': uniconvert(split(file)[1], encoding) } 'name': uniconvert(split(file)[1], encoding) }
else: else:
size = getsize(file) size = getsize(file)
pieces = [] pieces = []
p = 0L p = 0
h = open(file, 'rb') h = open(file, 'rb')
while p < size: while p < size:
x = h.read(min(piece_length, size - p)) x = h.read(min(piece_length, size - p))
@ -217,7 +270,7 @@ def makeinfo(file, piece_length, encoding, flag, progress, progress_percent=1):
else: else:
progress(min(piece_length, size - p)) progress(min(piece_length, size - p))
h.close() h.close()
return {'pieces': ''.join(pieces), return {'pieces': b''.join(pieces),
'piece length': piece_length, 'length': size, 'piece length': piece_length, 'length': size,
'name': uniconvert(split(file)[1], encoding) } 'name': uniconvert(split(file)[1], encoding) }
@ -240,7 +293,7 @@ def completedir(dir, url, params = {}, flag = Event(),
files = listdir(dir) files = listdir(dir)
files.sort() files.sort()
ext = '.torrent' ext = '.torrent'
if params.has_key('target'): if 'target' in params:
target = params['target'] target = params['target']
else: else:
target = '' target = ''

View file

@ -7,7 +7,7 @@ from six.moves.urllib.parse import quote
from ox import find_re, strip_tags, decode_html from ox import find_re, strip_tags, decode_html
from ox.cache import read_url from ox.cache import read_url
import lxml import lxml.html
def findISBN(title, author): def findISBN(title, author):

View file

@ -15,9 +15,14 @@ def get_data(id):
details = cache.read_url('%s?output=json' % url) details = cache.read_url('%s?output=json' % url)
details = json.loads(details) details = json.loads(details)
for key in ('title', 'description', 'runtime'): for key in ('title', 'description', 'runtime'):
data[key] = details['metadata'][key] if key in details['metadata']:
if isinstance(data[key], list): data[key] = details['metadata'][key]
data[key] = data[key][0] if isinstance(data[key], list):
data[key] = data[key][0]
if isinstance(data[key], basestring):
data[key] = data[key].strip()
if data[key][0] == '[' and data[key][-1] == ']':
data[key] = data[key][1:-1]
data['url'] = url data['url'] = url
data['image'] = 'http://archive.org/download/%s/format=thumbnail' % id data['image'] = 'http://archive.org/download/%s/format=thumbnail' % id
data['ogg'] = 'http://archive.org/download/%s/format=Ogg+video' % id data['ogg'] = 'http://archive.org/download/%s/format=Ogg+video' % id

View file

@ -5,7 +5,7 @@ import re
import ox.cache import ox.cache
from ox.cache import read_url from ox.cache import read_url
from ox.html import strip_tags from ox.html import strip_tags, decode_html
from ox.text import find_re from ox.text import find_re
import imdb import imdb
@ -36,14 +36,15 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
html = ox.cache.read_url(data["url"], timeout=timeout) html = ox.cache.read_url(data["url"], timeout=timeout)
data["number"] = find_re(html, "<li>Spine #(\d+)") data["number"] = find_re(html, "<li>Spine #(\d+)")
data["title"] = find_re(html, "<h1 class=\"movietitle\">(.*?)</h1>") data["title"] = decode_html(find_re(html, "<h1 class=\"movietitle\">(.*?)</h1>"))
data["title"] = data["title"].split(u' \u2014 The Television Version')[0] data["title"] = data["title"].split(u' \u2014 The Television Version')[0].strip()
data["director"] = strip_tags(find_re(html, "<h2 class=\"director\">(.*?)</h2>")) data["director"] = strip_tags(find_re(html, "<h2 class=\"director\">(.*?)</h2>"))
results = find_re(html, '<div class="left_column">(.*?)</div>') results = find_re(html, '<div class="left_column">(.*?)</div>')
results = re.compile("<li>(.*?)</li>").findall(results) results = re.compile("<li>(.*?)</li>").findall(results)
data["country"] = results[0] data["country"] = results[0]
data["year"] = results[1] data["year"] = results[1]
data["synopsis"] = strip_tags(find_re(html, "<div class=\"content_block last\">.*?<p>(.*?)</p>")) data["synopsis"] = decode_html(strip_tags(find_re(html,
"<div class=\"content_block last\">.*?<p>(.*?)</p>")))
result = find_re(html, "<div class=\"purchase\">(.*?)</div>") result = find_re(html, "<div class=\"purchase\">(.*?)</div>")
if 'Blu-Ray' in result or 'Essential Art House DVD' in result: if 'Blu-Ray' in result or 'Essential Art House DVD' in result:

View file

@ -6,7 +6,7 @@ import re
import time import time
import unicodedata import unicodedata
from six.moves import urllib from six.moves.urllib.parse import urlencode
from six import string_types from six import string_types
from .. import find_re, strip_tags, decode_html from .. import find_re, strip_tags, decode_html
@ -37,7 +37,7 @@ class Imdb(SiteParser):
'alternativeTitles': { 'alternativeTitles': {
'page': 'releaseinfo', 'page': 'releaseinfo',
're': [ 're': [
'name="akas".*?<table.*?>(.*?)</table>', '<table[^>]*?id="akas"[^>]*?>(.*?)</table>',
"td>(.*?)</td>.*?<td>(.*?)</td>" "td>(.*?)</td>.*?<td>(.*?)</td>"
], ],
'type': 'list' 'type': 'list'
@ -74,7 +74,7 @@ class Imdb(SiteParser):
'type': 'list' 'type': 'list'
}, },
'connections': { 'connections': {
'page': 'trivia?tab=mc', 'page': 'movieconnections',
're': '<h4 class="li_group">(.*?)</h4>(.*?)(<\/div>\n <a|<script)', 're': '<h4 class="li_group">(.*?)</h4>(.*?)(<\/div>\n <a|<script)',
'type': 'list' 'type': 'list'
}, },
@ -476,9 +476,8 @@ class Imdb(SiteParser):
alt[title].append(c) alt[title].append(c)
self['alternativeTitles'] = [] self['alternativeTitles'] = []
for t in sorted(alt, key=lambda a: sorted(alt[a])): for t in sorted(alt, key=lambda a: sorted(alt[a])):
if alt[t]: countries = sorted([normalize_country_name(c) or c for c in alt[t]])
countries = sorted([normalize_country_name(c) or c for c in alt[t]]) self['alternativeTitles'].append((t, countries))
self['alternativeTitles'].append((t, countries))
if not self['alternativeTitles']: if not self['alternativeTitles']:
del self['alternativeTitles'] del self['alternativeTitles']
@ -521,7 +520,7 @@ class Imdb(SiteParser):
if len(description) == 2 and description[-1].strip() != '-': if len(description) == 2 and description[-1].strip() != '-':
r['description'] = description[-1].strip() r['description'] = description[-1].strip()
return r return r
cc[rel] = list(map(get_conn, re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data))) cc[rel] = list(map(get_conn, re.compile('<a href="/title/tt(\d{7})/?">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data)))
self['connections'] = cc self['connections'] = cc
@ -665,7 +664,7 @@ def get_movie_by_title(title, timeout=-1):
params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1') params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1')
except: except:
params['q'] = params['q'].encode('utf-8') params['q'] = params['q'].encode('utf-8')
params = urllib.urlencode(params) params = urlencode(params)
url = "http://akas.imdb.com/find?" + params url = "http://akas.imdb.com/find?" + params
data = read_url(url, timeout=timeout, unicode=True) data = read_url(url, timeout=timeout, unicode=True)
#if search results in redirect, get id of current page #if search results in redirect, get id of current page
@ -741,7 +740,7 @@ def get_movie_id(title, director='', year='', timeout=-1):
params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1') params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1')
except: except:
params['q'] = params['q'].encode('utf-8') params['q'] = params['q'].encode('utf-8')
params = urllib.urlencode(params) params = urlencode(params)
url = "http://akas.imdb.com/find?" + params url = "http://akas.imdb.com/find?" + params
#print url #print url

View file

@ -2,7 +2,7 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import print_function from __future__ import print_function
import re import re
from six.moves import urllib from six.moves.urllib.parse import urlencode
from ox.cache import read_url from ox.cache import read_url
from ox.html import decode_html, strip_tags from ox.html import decode_html, strip_tags
@ -29,7 +29,7 @@ def compose_url(request, parameters):
if request == 'advancedSearch': if request == 'advancedSearch':
url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?' url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?'
if parameters['media'] == 'music': if parameters['media'] == 'music':
url += urllib.urlencode({ url += urlencode({
'albumTerm': parameters['title'], 'albumTerm': parameters['title'],
'allArtistNames': parameters['artist'], 'allArtistNames': parameters['artist'],
'composerTerm': '', 'composerTerm': '',
@ -42,7 +42,7 @@ def compose_url(request, parameters):
'songTerm': '' 'songTerm': ''
}) })
elif parameters['media'] == 'movie': elif parameters['media'] == 'movie':
url += urllib.urlencode({ url += urlencode({
'actorTerm': '', 'actorTerm': '',
'closedCaption': 0, 'closedCaption': 0,
'descriptionTerm': '', 'descriptionTerm': '',

View file

@ -7,12 +7,6 @@ from ox import find_re, strip_tags
def get_url(id=None, imdb=None): def get_url(id=None, imdb=None):
#this would also wor but does not cache:
'''
from urllib2 import urlopen
u = urlopen(url)
return u.url
'''
if imdb: if imdb:
url = "http://www.rottentomatoes.com/alias?type=imdbid&s=%s" % imdb url = "http://www.rottentomatoes.com/alias?type=imdbid&s=%s" % imdb
data = read_url(url) data = read_url(url)

View file

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from six.moves import urllib
import lxml.html
import ox
DEFAULT_MAX_RESULTS = 10
DEFAULT_TIMEOUT = 24*60*60
def read_url(url, data=None, headers=ox.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT):
return ox.cache.read_url(url, data, headers, timeout, unicode=True)
def quote_plus(s):
if not isinstance(s, bytes):
s = s.encode('utf-8')
return urllib.parse.quote_plus(s)
def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
"""
Return max_results tuples with title, url, description
>>> find("The Matrix site:imdb.com", 1)[0][0]
u'The Matrix (1999) - IMDb'
>>> find("The Matrix site:imdb.com", 1)[0][1]
u'http://www.imdb.com/title/tt0133093/'
"""
results = []
url = 'https://eu1.startpage.com/do/search?nosteeraway=1&abp=1&language=english&cmd=process_search&query=%s&x=0&y=0&cat=web&engine0=v1all' % quote_plus(query)
data = read_url(url, timeout=timeout)
doc = lxml.html.document_fromstring(data)
for r in doc.xpath("//div[contains(@class, 'result')]"):
t = r.find('h3')
if t is not None:
title = t.text_content().strip()
url = t.find('a').attrib['href']
description = r.find_class('desc')[0].text_content()
results.append((title, url, description))
if len(results) >= max_results:
break
return results

View file

@ -25,7 +25,7 @@ def find_movies(query=None, imdb=None, max_results=10):
if imdb: if imdb:
query = "tt" + normalize_imdbid(imdb) query = "tt" + normalize_imdbid(imdb)
results = [] results = []
next = ["http://thepiratebay.org/search/%s/0/3/200" % quote(query), ] next = ["https://thepiratebay.se/search/%s/0/3/200" % quote(query), ]
page_count = 1 page_count = 1
while next and page_count < 4: while next and page_count < 4:
page_count += 1 page_count += 1
@ -33,12 +33,12 @@ def find_movies(query=None, imdb=None, max_results=10):
if not url.startswith('http'): if not url.startswith('http'):
if not url.startswith('/'): if not url.startswith('/'):
url = "/" + url url = "/" + url
url = "http://thepiratebay.org" + url url = "https://thepiratebay.se" + url
data = read_url(url, timeout=cache_timeout, unicode=True) data = read_url(url, timeout=cache_timeout, unicode=True)
regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/torrent/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>''' regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/torrent/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>'''
for row in re.compile(regexp, re.DOTALL).findall(data): for row in re.compile(regexp, re.DOTALL).findall(data):
torrentType = row[0] torrentType = row[0]
torrentLink = "http://thepiratebay.org" + row[1] torrentLink = "https://thepiratebay.se" + row[1]
torrentTitle = decode_html(row[2]) torrentTitle = decode_html(row[2])
# 201 = Movies , 202 = Movie DVDR, 205 TV Shows # 201 = Movies , 202 = Movie DVDR, 205 TV Shows
if torrentType in ['201']: if torrentType in ['201']:
@ -61,7 +61,7 @@ def get_id(piratebayId):
def exists(piratebayId): def exists(piratebayId):
piratebayId = get_id(piratebayId) piratebayId = get_id(piratebayId)
return ox.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId) return ox.net.exists("https://thepiratebay.se/torrent/%s" % piratebayId)
def get_data(piratebayId): def get_data(piratebayId):
_key_map = { _key_map = {
@ -75,7 +75,7 @@ def get_data(piratebayId):
torrent = dict() torrent = dict()
torrent[u'id'] = piratebayId torrent[u'id'] = piratebayId
torrent[u'domain'] = 'thepiratebay.org' torrent[u'domain'] = 'thepiratebay.org'
torrent[u'comment_link'] = 'http://thepiratebay.org/torrent/%s' % piratebayId torrent[u'comment_link'] = 'https://thepiratebay.se/torrent/%s' % piratebayId
data = read_url(torrent['comment_link'], unicode=True) data = read_url(torrent['comment_link'], unicode=True)
torrent[u'title'] = find_re(data, '<title>(.*?) \(download torrent\) - TPB</title>') torrent[u'title'] = find_re(data, '<title>(.*?) \(download torrent\) - TPB</title>')

View file

@ -3,12 +3,14 @@
from __future__ import print_function from __future__ import print_function
import re import re
from ox import find_re, strip_tags, decode_html import lxml.html
from ox import strip_tags, decode_html
from ox.cache import read_url from ox.cache import read_url
def get_id(url): def get_id(url):
return url.replace('http://www.ubu.com/', '').split('.html')[0] return url.replace('http://www.ubu.com/', '').split('.html')[0].replace('/./', '/')
def get_url(id): def get_url(id):
return 'http://www.ubu.com/%s.html' % id return 'http://www.ubu.com/%s.html' % id
@ -22,51 +24,92 @@ def get_data(url):
'url': url, 'url': url,
'type': re.compile('ubu.com/(.*?)/').findall(url)[0] 'type': re.compile('ubu.com/(.*?)/').findall(url)[0]
} }
for videourl, title in re.compile('<a href="(http://ubumexico.centro.org.mx/.*?)">(.*?)</a>').findall(data): if m['type'] == 'sound':
if videourl.endswith('.srt'): m['tracks'] = [{
m['srt'] = videourl 'title': strip_tags(decode_html(t[1])).strip(),
elif not 'video' in m: 'url': t[0]
m['video'] = videourl } for t in re.compile('"(http.*?.mp3)"[^>]*>(.+)</a', re.IGNORECASE).findall(data)]
m['video'] = m['video'].replace('/video/ ', '/video/').replace(' ', '%20')
if m['video'] == 'http://ubumexico.centro.org.mx/video/':
del m['video']
m['title'] = strip_tags(decode_html(title)).strip()
if not 'url' in m:
print(url, 'missing')
if 'title' in m:
m['title'] = re.sub('(.*?) \(\d{4}\)$', '\\1', m['title'])
match = re.compile("flashvars','file=(.*?.flv)'").findall(data)
if match:
m['flv'] = match[0]
m['flv'] = m['flv'].replace('/video/ ', '/video/').replace(' ', '%20')
y = re.compile('\((\d{4})\)').findall(data)
if y:
m['year'] = int(y[0])
d = re.compile('Director: (.+)').findall(data)
if d:
m['director'] = strip_tags(decode_html(d[0])).strip()
a = re.compile('<a href="(.*?)">Back to (.*?)</a>', re.DOTALL).findall(data)
if a:
m['artist'] = strip_tags(decode_html(a[0][1])).strip()
else: else:
a = re.compile('<a href="(.*?)">(.*?) in UbuWeb Film').findall(data) for videourl, title in re.compile('href="(http://ubumexico.centro.org.mx/.*?)">(.*?)</a>').findall(data):
if videourl.endswith('.srt'):
m['srt'] = videourl
elif not 'video' in m:
m['video'] = videourl
m['video'] = m['video'].replace('/video/ ', '/video/').replace(' ', '%20')
if m['video'] == 'http://ubumexico.centro.org.mx/video/':
del m['video']
if not 'title' in m:
m['title'] = strip_tags(decode_html(title)).strip()
if not 'url' in m:
print(url, 'missing')
if 'title' in m:
m['title'] = re.sub('(.*?) \(\d{4}\)$', '\\1', m['title'])
if not 'title' in m:
match = re.compile('<span id="ubuwork">(.*?)</span>').findall(data)
if match:
m['title'] = strip_tags(decode_html(match[0])).strip()
if not 'title' in m:
match = re.compile("<title>.*?&amp;(.*?)</title>", re.DOTALL).findall(data)
if match:
m['title'] = re.sub('\s+', ' ', match[0]).strip()
if ' - ' in m['title']:
m['title'] = m['title'].split(' - ', 1)[-1]
if 'title' in m:
m['title'] = strip_tags(decode_html(m['title']).strip())
match = re.compile("flashvars','file=(.*?.flv)'").findall(data)
if match:
m['flv'] = match[0]
m['flv'] = m['flv'].replace('/video/ ', '/video/').replace(' ', '%20')
match = re.compile('''src=(.*?) type="video/mp4"''').findall(data)
if match:
m['mp4'] = match[0].strip('"').strip("'").replace(' ', '%20')
if not m['mp4'].startswith('http'):
m['mp4'] = 'http://ubumexico.centro.org.mx/video/' + m['mp4']
elif 'video' in m and (m['video'].endswith('.mp4') or m['video'].endswith('.m4v')):
m['mp4'] = m['video']
doc = lxml.html.document_fromstring(read_url(url))
desc = doc.xpath("//div[contains(@id, 'ubudesc')]")
if len(desc):
txt = []
for part in desc[0].text_content().split('\n\n'):
if part == 'RESOURCES:':
break
if part.strip():
txt.append(part)
if txt:
if len(txt) > 1 and txt[0].strip() == m.get('title'):
txt = txt[1:]
m['description'] = '\n\n'.join(txt).split('RESOURCES')[0].split('RELATED')[0].strip()
y = re.compile('\((\d{4})\)').findall(data)
if y:
m['year'] = int(y[0])
d = re.compile('Director: (.+)').findall(data)
if d:
m['director'] = strip_tags(decode_html(d[0])).strip()
a = re.compile('<a href="(.*?)">Back to (.*?)</a>', re.DOTALL).findall(data)
if a: if a:
m['artist'] = strip_tags(decode_html(a[0][1])).strip() m['artist'] = strip_tags(decode_html(a[0][1])).strip()
else: else:
a = re.compile('<b>(.*?)\(b\..*?\d{4}\)').findall(data) a = re.compile('<a href="(.*?)">(.*?) in UbuWeb Film').findall(data)
if a: if a:
m['artist'] = strip_tags(decode_html(a[0])).strip() m['artist'] = strip_tags(decode_html(a[0][1])).strip()
elif m['id'] == 'film/lawder_color': else:
m['artist'] = 'Standish Lawder' a = re.compile('<b>(.*?)\(b\..*?\d{4}\)').findall(data)
if 'artist' in m: if a:
m['artist'] = m['artist'].replace('in UbuWeb Film', '') m['artist'] = strip_tags(decode_html(a[0])).strip()
m['artist'] = m['artist'].replace('on UbuWeb Film', '').strip() elif m['id'] == 'film/lawder_color':
if m['id'] == 'film/coulibeuf': m['artist'] = 'Standish Lawder'
m['title'] = 'Balkan Baroque'
m['year'] = 1999 if 'artist' in m:
m['artist'] = m['artist'].replace('in UbuWeb Film', '')
m['artist'] = m['artist'].replace('on UbuWeb Film', '').strip()
if m['id'] == 'film/coulibeuf':
m['title'] = 'Balkan Baroque'
m['year'] = 1999
return m return m
def get_films(): def get_films():
@ -98,3 +141,12 @@ def get_ids():
ids.append(u) ids.append(u)
ids = [get_id(url) for url in list(set(ids))] ids = [get_id(url) for url in list(set(ids))]
return ids return ids
def get_sound_ids():
data = read_url('http://www.ubu.com/sound/')
ids = []
for url, author in re.compile('<a href="(\./.*?)">(.*?)</a>').findall(data):
url = 'http://www.ubu.com/sound' + url[1:]
ids.append(url)
ids = [get_id(url) for url in sorted(set(ids))]
return ids

Some files were not shown because too many files have changed in this diff Show more