update Shared

This commit is contained in:
j 2015-11-04 13:01:55 +01:00
parent e7ebbedd38
commit 6881f3471a
184 changed files with 13080 additions and 13691 deletions

1
.gitignore vendored
View file

@ -4,3 +4,4 @@
*.pyd
__pycache__
pip_cache
Linux_x86_64/bin

View file

@ -1,11 +1,10 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import re
# EASY-INSTALL-ENTRY-SCRIPT: 'chardet==2.3.0','console_scripts','chardetect'
__requires__ = 'chardet==2.3.0'
import sys
from chardet.chardetect import main
from pkg_resources import load_entry_point
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())
sys.exit(
load_entry_point('chardet==2.3.0', 'console_scripts', 'chardetect')()
)

View file

@ -1,23 +1,23 @@
../PyPDF2/xmp.py
../PyPDF2/utils.py
../PyPDF2/filters.py
../PyPDF2/__init__.py
../PyPDF2/_version.py
../PyPDF2/generic.py
../PyPDF2/merger.py
../PyPDF2/pagerange.py
../PyPDF2/pdf.py
../PyPDF2/merger.py
../PyPDF2/__pycache__/xmp.cpython-34.pyc
../PyPDF2/__pycache__/utils.cpython-34.pyc
../PyPDF2/utils.py
../PyPDF2/xmp.py
../PyPDF2/_version.py
../PyPDF2/__init__.py
../PyPDF2/__pycache__/filters.cpython-34.pyc
../PyPDF2/__pycache__/__init__.cpython-34.pyc
../PyPDF2/__pycache__/_version.cpython-34.pyc
../PyPDF2/__pycache__/generic.cpython-34.pyc
../PyPDF2/__pycache__/merger.cpython-34.pyc
../PyPDF2/__pycache__/pagerange.cpython-34.pyc
../PyPDF2/__pycache__/pdf.cpython-34.pyc
../PyPDF2/__pycache__/merger.cpython-34.pyc
../PyPDF2/__pycache__/utils.cpython-34.pyc
../PyPDF2/__pycache__/xmp.cpython-34.pyc
../PyPDF2/__pycache__/_version.cpython-34.pyc
../PyPDF2/__pycache__/__init__.cpython-34.pyc
./
top_level.txt
dependency_links.txt
PKG-INFO
SOURCES.txt
top_level.txt

View file

@ -1,42 +0,0 @@
Metadata-Version: 1.1
Name: certifi
Version: 14.05.14
Summary: Python package for providing Mozilla's CA Bundle.
Home-page: http://python-requests.org
Author: Kenneth Reitz
Author-email: me@kennethreitz.com
License: ISC
Description: Certifi: Python SSL Certificates
================================
This installable Python package contains a CA Bundle that you can reference
in your Python code. This is useful for verifying HTTP requests, for example.
This is the same CA Bundle which ships with the Requests codebase, and is
derived from Mozilla Firefox's canonical set.
Usage
-----
To reference the installed CA Bundle, you can use the built-in function::
>>> import certifi
>>> certifi.where()
'/usr/local/lib/python2.7/site-packages/certifi/cacert.pem'
Enjoy!
Platform: UNKNOWN
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Natural Language :: English
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 2.5
Classifier: Programming Language :: Python :: 2.6
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3.0
Classifier: Programming Language :: Python :: 3.1
Classifier: Programming Language :: Python :: 3.2
Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4

View file

@ -1,13 +0,0 @@
LICENSE
MANIFEST.in
README.rst
setup.cfg
setup.py
certifi/__init__.py
certifi/__main__.py
certifi/cacert.pem
certifi/core.py
certifi.egg-info/PKG-INFO
certifi.egg-info/SOURCES.txt
certifi.egg-info/dependency_links.txt
certifi.egg-info/top_level.txt

View file

@ -1,12 +0,0 @@
../certifi/__init__.py
../certifi/core.py
../certifi/__main__.py
../certifi/cacert.pem
../certifi/__pycache__/__init__.cpython-34.pyc
../certifi/__pycache__/core.cpython-34.pyc
../certifi/__pycache__/__main__.cpython-34.pyc
./
dependency_links.txt
PKG-INFO
SOURCES.txt
top_level.txt

View file

@ -0,0 +1,30 @@
Certifi: Python SSL Certificates
================================
`Certifi`_ is a carefully curated collection of Root Certificates for
validating the trustworthiness of SSL certificates while verifying the identity
of TLS hosts. It has been extracted from the `Requests`_ project.
Installation
------------
`certifi` is available on PyPI. Simply install it with `pip`::
$ pip install certifi
Usage
-----
To reference the installed CA Bundle, you can use the built-in function::
>>> import certifi
>>> certifi.where()
'/usr/local/lib/python2.7/site-packages/certifi/cacert.pem'
Enjoy!
.. _`Certifi`: http://certifi.io/en/latest/
.. _`Requests`: http://docs.python-requests.org/en/latest/

View file

@ -0,0 +1,52 @@
Metadata-Version: 2.0
Name: certifi
Version: 2015.9.6.2
Summary: Python package for providing Mozilla's CA Bundle.
Home-page: http://certifi.io/
Author: Kenneth Reitz
Author-email: me@kennethreitz.com
License: ISC
Platform: UNKNOWN
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Natural Language :: English
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 2.5
Classifier: Programming Language :: Python :: 2.6
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3.0
Classifier: Programming Language :: Python :: 3.1
Classifier: Programming Language :: Python :: 3.2
Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4
Certifi: Python SSL Certificates
================================
`Certifi`_ is a carefully curated collection of Root Certificates for
validating the trustworthiness of SSL certificates while verifying the identity
of TLS hosts. It has been extracted from the `Requests`_ project.
Installation
------------
`certifi` is available on PyPI. Simply install it with `pip`::
$ pip install certifi
Usage
-----
To reference the installed CA Bundle, you can use the built-in function::
>>> import certifi
>>> certifi.where()
'/usr/local/lib/python2.7/site-packages/certifi/cacert.pem'
Enjoy!
.. _`Certifi`: http://certifi.io/en/latest/
.. _`Requests`: http://docs.python-requests.org/en/latest/

View file

@ -0,0 +1,15 @@
certifi/__init__.py,sha256=T8LOdkem2W_EqteuCirstbPu3iS11BmKnS_nKqQI_kQ,65
certifi/__main__.py,sha256=FiOYt1Fltst7wk9DRa6GCoBr8qBUxlNQu_MKJf04E6s,41
certifi/cacert.pem,sha256=wY10ezo0r5ZPcgfctoi3Q9KRZ79_tpb_MPDGsgWiOwE,320698
certifi/core.py,sha256=DqvIINYNNXsp3Srlk_NRaiizaww8po3l8t8ksz-Xt6Q,716
certifi/old_root.pem,sha256=Sm1SGy9Y3FjEDEy9ie0EX39fcJCv_r6gAPtj9yBrXEY,24014
certifi/weak.pem,sha256=5xzWFRrSP0ZsXiW6emg8UQ_w497lT4qWCv32OO8R1ME,344712
certifi-2015.9.6.2.dist-info/DESCRIPTION.rst,sha256=1HthO7cC8rfi_tZB3iPCnK7Npcd48svSApnFrl8J89Q,716
certifi-2015.9.6.2.dist-info/METADATA,sha256=-IMJn5G46t_YY0VsjSgXQalm6mC4sChB8lsDanFlTME,1532
certifi-2015.9.6.2.dist-info/metadata.json,sha256=LNvgTP4aFSgWMQ-8ySDRnRE7506kiisjTkPqBHna1YE,911
certifi-2015.9.6.2.dist-info/RECORD,,
certifi-2015.9.6.2.dist-info/top_level.txt,sha256=KMu4vUCfsjLrkPbSNdgdekS-pVJzBAJFO__nI8NF6-U,8
certifi-2015.9.6.2.dist-info/WHEEL,sha256=AvR0WeTpDaxT645bl5FQxUK6NPsTls2ttpcGJg3j1Xg,110
certifi/__pycache__/__init__.cpython-34.pyc,,
certifi/__pycache__/__main__.cpython-34.pyc,,
certifi/__pycache__/core.cpython-34.pyc,,

View file

@ -0,0 +1 @@
{"license": "ISC", "name": "certifi", "metadata_version": "2.0", "generator": "bdist_wheel (0.24.0)", "summary": "Python package for providing Mozilla's CA Bundle.", "version": "2015.9.6.2", "extensions": {"python.details": {"project_urls": {"Home": "http://certifi.io/"}, "document_names": {"description": "DESCRIPTION.rst"}, "contacts": [{"role": "author", "email": "me@kennethreitz.com", "name": "Kenneth Reitz"}]}}, "classifiers": ["Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Natural Language :: English", "Programming Language :: Python", "Programming Language :: Python :: 2.5", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.0", "Programming Language :: Python :: 3.1", "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4"]}

View file

@ -1 +1,3 @@
from .core import where
from .core import where, old_where
__version__ = "2015.09.06.2"

File diff suppressed because it is too large Load diff

View file

@ -7,13 +7,30 @@ certifi.py
This module returns the installation location of cacert.pem.
"""
import os
import warnings
class DeprecatedBundleWarning(DeprecationWarning):
"""
The weak security bundle is being deprecated. Please bother your service
provider to get them to stop using cross-signed roots.
"""
def where():
f = os.path.split(__file__)[0]
return os.path.join(f, 'cacert.pem')
def old_where():
warnings.warn(
"The weak security bundle is being deprecated.",
DeprecatedBundleWarning
)
f = os.path.split(__file__)[0]
return os.path.join(f, 'weak.pem')
if __name__ == '__main__':
print(where())

View file

@ -0,0 +1,387 @@
# Issuer: CN=Entrust.net Secure Server Certification Authority O=Entrust.net OU=www.entrust.net/CPS incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited
# Subject: CN=Entrust.net Secure Server Certification Authority O=Entrust.net OU=www.entrust.net/CPS incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited
# Label: "Entrust.net Secure Server CA"
# Serial: 927650371
# MD5 Fingerprint: df:f2:80:73:cc:f1:e6:61:73:fc:f5:42:e9:c5:7c:ee
# SHA1 Fingerprint: 99:a6:9b:e6:1a:fe:88:6b:4d:2b:82:00:7c:b8:54:fc:31:7e:15:39
# SHA256 Fingerprint: 62:f2:40:27:8c:56:4c:4d:d8:bf:7d:9d:4f:6f:36:6e:a8:94:d2:2f:5f:34:d9:89:a9:83:ac:ec:2f:ff:ed:50
-----BEGIN CERTIFICATE-----
MIIE2DCCBEGgAwIBAgIEN0rSQzANBgkqhkiG9w0BAQUFADCBwzELMAkGA1UEBhMC
VVMxFDASBgNVBAoTC0VudHJ1c3QubmV0MTswOQYDVQQLEzJ3d3cuZW50cnVzdC5u
ZXQvQ1BTIGluY29ycC4gYnkgcmVmLiAobGltaXRzIGxpYWIuKTElMCMGA1UECxMc
KGMpIDE5OTkgRW50cnVzdC5uZXQgTGltaXRlZDE6MDgGA1UEAxMxRW50cnVzdC5u
ZXQgU2VjdXJlIFNlcnZlciBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05OTA1
MjUxNjA5NDBaFw0xOTA1MjUxNjM5NDBaMIHDMQswCQYDVQQGEwJVUzEUMBIGA1UE
ChMLRW50cnVzdC5uZXQxOzA5BgNVBAsTMnd3dy5lbnRydXN0Lm5ldC9DUFMgaW5j
b3JwLiBieSByZWYuIChsaW1pdHMgbGlhYi4pMSUwIwYDVQQLExwoYykgMTk5OSBF
bnRydXN0Lm5ldCBMaW1pdGVkMTowOAYDVQQDEzFFbnRydXN0Lm5ldCBTZWN1cmUg
U2VydmVyIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGdMA0GCSqGSIb3DQEBAQUA
A4GLADCBhwKBgQDNKIM0VBuJ8w+vN5Ex/68xYMmo6LIQaO2f55M28Qpku0f1BBc/
I0dNxScZgSYMVHINiC3ZH5oSn7yzcdOAGT9HZnuMNSjSuQrfJNqc1lB5gXpa0zf3
wkrYKZImZNHkmGw6AIr1NJtl+O3jEP/9uElY3KDegjlrgbEWGWG5VLbmQwIBA6OC
AdcwggHTMBEGCWCGSAGG+EIBAQQEAwIABzCCARkGA1UdHwSCARAwggEMMIHeoIHb
oIHYpIHVMIHSMQswCQYDVQQGEwJVUzEUMBIGA1UEChMLRW50cnVzdC5uZXQxOzA5
BgNVBAsTMnd3dy5lbnRydXN0Lm5ldC9DUFMgaW5jb3JwLiBieSByZWYuIChsaW1p
dHMgbGlhYi4pMSUwIwYDVQQLExwoYykgMTk5OSBFbnRydXN0Lm5ldCBMaW1pdGVk
MTowOAYDVQQDEzFFbnRydXN0Lm5ldCBTZWN1cmUgU2VydmVyIENlcnRpZmljYXRp
b24gQXV0aG9yaXR5MQ0wCwYDVQQDEwRDUkwxMCmgJ6AlhiNodHRwOi8vd3d3LmVu
dHJ1c3QubmV0L0NSTC9uZXQxLmNybDArBgNVHRAEJDAigA8xOTk5MDUyNTE2MDk0
MFqBDzIwMTkwNTI1MTYwOTQwWjALBgNVHQ8EBAMCAQYwHwYDVR0jBBgwFoAU8Bdi
E1U9s/8KAGv7UISX8+1i0BowHQYDVR0OBBYEFPAXYhNVPbP/CgBr+1CEl/PtYtAa
MAwGA1UdEwQFMAMBAf8wGQYJKoZIhvZ9B0EABAwwChsEVjQuMAMCBJAwDQYJKoZI
hvcNAQEFBQADgYEAkNwwAvpkdMKnCqV8IY00F6j7Rw7/JXyNEwr75Ji174z4xRAN
95K+8cPV1ZVqBLssziY2ZcgxxufuP+NXdYR6Ee9GTxj005i7qIcyunL2POI9n9cd
2cNgQ4xYDiKWL2KjLB+6rQXvqzJ4h6BUcxm1XAX5Uj5tLUUL9wqT6u0G+bI=
-----END CERTIFICATE-----
# Issuer: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 2 Policy Validation Authority
# Subject: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 2 Policy Validation Authority
# Label: "ValiCert Class 2 VA"
# Serial: 1
# MD5 Fingerprint: a9:23:75:9b:ba:49:36:6e:31:c2:db:f2:e7:66:ba:87
# SHA1 Fingerprint: 31:7a:2a:d0:7f:2b:33:5e:f5:a1:c3:4e:4b:57:e8:b7:d8:f1:fc:a6
# SHA256 Fingerprint: 58:d0:17:27:9c:d4:dc:63:ab:dd:b1:96:a6:c9:90:6c:30:c4:e0:87:83:ea:e8:c1:60:99:54:d6:93:55:59:6b
-----BEGIN CERTIFICATE-----
MIIC5zCCAlACAQEwDQYJKoZIhvcNAQEFBQAwgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0
IFZhbGlkYXRpb24gTmV0d29yazEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAz
BgNVBAsTLFZhbGlDZXJ0IENsYXNzIDIgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9y
aXR5MSEwHwYDVQQDExhodHRwOi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG
9w0BCQEWEWluZm9AdmFsaWNlcnQuY29tMB4XDTk5MDYyNjAwMTk1NFoXDTE5MDYy
NjAwMTk1NFowgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0IFZhbGlkYXRpb24gTmV0d29y
azEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAzBgNVBAsTLFZhbGlDZXJ0IENs
YXNzIDIgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9yaXR5MSEwHwYDVQQDExhodHRw
Oi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG9w0BCQEWEWluZm9AdmFsaWNl
cnQuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDOOnHK5avIWZJV16vY
dA757tn2VUdZZUcOBVXc65g2PFxTXdMwzzjsvUGJ7SVCCSRrCl6zfN1SLUzm1NZ9
WlmpZdRJEy0kTRxQb7XBhVQ7/nHk01xC+YDgkRoKWzk2Z/M/VXwbP7RfZHM047QS
v4dk+NoS/zcnwbNDu+97bi5p9wIDAQABMA0GCSqGSIb3DQEBBQUAA4GBADt/UG9v
UJSZSWI4OB9L+KXIPqeCgfYrx+jFzug6EILLGACOTb2oWH+heQC1u+mNr0HZDzTu
IYEZoDJJKPTEjlbVUjP9UNV+mWwD5MlM/Mtsq2azSiGM5bUMMj4QssxsodyamEwC
W/POuZ6lcg5Ktz885hZo+L7tdEy8W9ViH0Pd
-----END CERTIFICATE-----
# Issuer: CN=NetLock Expressz (Class C) Tanusitvanykiado O=NetLock Halozatbiztonsagi Kft. OU=Tanusitvanykiadok
# Subject: CN=NetLock Expressz (Class C) Tanusitvanykiado O=NetLock Halozatbiztonsagi Kft. OU=Tanusitvanykiadok
# Label: "NetLock Express (Class C) Root"
# Serial: 104
# MD5 Fingerprint: 4f:eb:f1:f0:70:c2:80:63:5d:58:9f:da:12:3c:a9:c4
# SHA1 Fingerprint: e3:92:51:2f:0a:cf:f5:05:df:f6:de:06:7f:75:37:e1:65:ea:57:4b
# SHA256 Fingerprint: 0b:5e:ed:4e:84:64:03:cf:55:e0:65:84:84:40:ed:2a:82:75:8b:f5:b9:aa:1f:25:3d:46:13:cf:a0:80:ff:3f
-----BEGIN CERTIFICATE-----
MIIFTzCCBLigAwIBAgIBaDANBgkqhkiG9w0BAQQFADCBmzELMAkGA1UEBhMCSFUx
ETAPBgNVBAcTCEJ1ZGFwZXN0MScwJQYDVQQKEx5OZXRMb2NrIEhhbG96YXRiaXp0
b25zYWdpIEtmdC4xGjAYBgNVBAsTEVRhbnVzaXR2YW55a2lhZG9rMTQwMgYDVQQD
EytOZXRMb2NrIEV4cHJlc3N6IChDbGFzcyBDKSBUYW51c2l0dmFueWtpYWRvMB4X
DTk5MDIyNTE0MDgxMVoXDTE5MDIyMDE0MDgxMVowgZsxCzAJBgNVBAYTAkhVMREw
DwYDVQQHEwhCdWRhcGVzdDEnMCUGA1UEChMeTmV0TG9jayBIYWxvemF0Yml6dG9u
c2FnaSBLZnQuMRowGAYDVQQLExFUYW51c2l0dmFueWtpYWRvazE0MDIGA1UEAxMr
TmV0TG9jayBFeHByZXNzeiAoQ2xhc3MgQykgVGFudXNpdHZhbnlraWFkbzCBnzAN
BgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA6+ywbGGKIyWvYCDj2Z/8kwvbXY2wobNA
OoLO/XXgeDIDhlqGlZHtU/qdQPzm6N3ZW3oDvV3zOwzDUXmbrVWg6dADEK8KuhRC
2VImESLH0iDMgqSaqf64gXadarfSNnU+sYYJ9m5tfk63euyucYT2BDMIJTLrdKwW
RMbkQJMdf60CAwEAAaOCAp8wggKbMBIGA1UdEwEB/wQIMAYBAf8CAQQwDgYDVR0P
AQH/BAQDAgAGMBEGCWCGSAGG+EIBAQQEAwIABzCCAmAGCWCGSAGG+EIBDQSCAlEW
ggJNRklHWUVMRU0hIEV6ZW4gdGFudXNpdHZhbnkgYSBOZXRMb2NrIEtmdC4gQWx0
YWxhbm9zIFN6b2xnYWx0YXRhc2kgRmVsdGV0ZWxlaWJlbiBsZWlydCBlbGphcmFz
b2sgYWxhcGphbiBrZXN6dWx0LiBBIGhpdGVsZXNpdGVzIGZvbHlhbWF0YXQgYSBO
ZXRMb2NrIEtmdC4gdGVybWVrZmVsZWxvc3NlZy1iaXp0b3NpdGFzYSB2ZWRpLiBB
IGRpZ2l0YWxpcyBhbGFpcmFzIGVsZm9nYWRhc2FuYWsgZmVsdGV0ZWxlIGF6IGVs
b2lydCBlbGxlbm9yemVzaSBlbGphcmFzIG1lZ3RldGVsZS4gQXogZWxqYXJhcyBs
ZWlyYXNhIG1lZ3RhbGFsaGF0byBhIE5ldExvY2sgS2Z0LiBJbnRlcm5ldCBob25s
YXBqYW4gYSBodHRwczovL3d3dy5uZXRsb2NrLm5ldC9kb2NzIGNpbWVuIHZhZ3kg
a2VyaGV0byBheiBlbGxlbm9yemVzQG5ldGxvY2submV0IGUtbWFpbCBjaW1lbi4g
SU1QT1JUQU5UISBUaGUgaXNzdWFuY2UgYW5kIHRoZSB1c2Ugb2YgdGhpcyBjZXJ0
aWZpY2F0ZSBpcyBzdWJqZWN0IHRvIHRoZSBOZXRMb2NrIENQUyBhdmFpbGFibGUg
YXQgaHR0cHM6Ly93d3cubmV0bG9jay5uZXQvZG9jcyBvciBieSBlLW1haWwgYXQg
Y3BzQG5ldGxvY2submV0LjANBgkqhkiG9w0BAQQFAAOBgQAQrX/XDDKACtiG8XmY
ta3UzbM2xJZIwVzNmtkFLp++UOv0JhQQLdRmF/iewSf98e3ke0ugbLWrmldwpu2g
pO0u9f38vf5NNwgMvOOWgyL1SRt/Syu0VMGAfJlOHdCM7tCs5ZL6dVb+ZKATj7i4
Fp1hBWeAyNDYpQcCNJgEjTME1A==
-----END CERTIFICATE-----
# Issuer: CN=NetLock Uzleti (Class B) Tanusitvanykiado O=NetLock Halozatbiztonsagi Kft. OU=Tanusitvanykiadok
# Subject: CN=NetLock Uzleti (Class B) Tanusitvanykiado O=NetLock Halozatbiztonsagi Kft. OU=Tanusitvanykiadok
# Label: "NetLock Business (Class B) Root"
# Serial: 105
# MD5 Fingerprint: 39:16:aa:b9:6a:41:e1:14:69:df:9e:6c:3b:72:dc:b6
# SHA1 Fingerprint: 87:9f:4b:ee:05:df:98:58:3b:e3:60:d6:33:e7:0d:3f:fe:98:71:af
# SHA256 Fingerprint: 39:df:7b:68:2b:7b:93:8f:84:71:54:81:cc:de:8d:60:d8:f2:2e:c5:98:87:7d:0a:aa:c1:2b:59:18:2b:03:12
-----BEGIN CERTIFICATE-----
MIIFSzCCBLSgAwIBAgIBaTANBgkqhkiG9w0BAQQFADCBmTELMAkGA1UEBhMCSFUx
ETAPBgNVBAcTCEJ1ZGFwZXN0MScwJQYDVQQKEx5OZXRMb2NrIEhhbG96YXRiaXp0
b25zYWdpIEtmdC4xGjAYBgNVBAsTEVRhbnVzaXR2YW55a2lhZG9rMTIwMAYDVQQD
EylOZXRMb2NrIFV6bGV0aSAoQ2xhc3MgQikgVGFudXNpdHZhbnlraWFkbzAeFw05
OTAyMjUxNDEwMjJaFw0xOTAyMjAxNDEwMjJaMIGZMQswCQYDVQQGEwJIVTERMA8G
A1UEBxMIQnVkYXBlc3QxJzAlBgNVBAoTHk5ldExvY2sgSGFsb3phdGJpenRvbnNh
Z2kgS2Z0LjEaMBgGA1UECxMRVGFudXNpdHZhbnlraWFkb2sxMjAwBgNVBAMTKU5l
dExvY2sgVXpsZXRpIChDbGFzcyBCKSBUYW51c2l0dmFueWtpYWRvMIGfMA0GCSqG
SIb3DQEBAQUAA4GNADCBiQKBgQCx6gTsIKAjwo84YM/HRrPVG/77uZmeBNwcf4xK
gZjupNTKihe5In+DCnVMm8Bp2GQ5o+2So/1bXHQawEfKOml2mrriRBf8TKPV/riX
iK+IA4kfpPIEPsgHC+b5sy96YhQJRhTKZPWLgLViqNhr1nGTLbO/CVRY7QbrqHvc
Q7GhaQIDAQABo4ICnzCCApswEgYDVR0TAQH/BAgwBgEB/wIBBDAOBgNVHQ8BAf8E
BAMCAAYwEQYJYIZIAYb4QgEBBAQDAgAHMIICYAYJYIZIAYb4QgENBIICURaCAk1G
SUdZRUxFTSEgRXplbiB0YW51c2l0dmFueSBhIE5ldExvY2sgS2Z0LiBBbHRhbGFu
b3MgU3pvbGdhbHRhdGFzaSBGZWx0ZXRlbGVpYmVuIGxlaXJ0IGVsamFyYXNvayBh
bGFwamFuIGtlc3p1bHQuIEEgaGl0ZWxlc2l0ZXMgZm9seWFtYXRhdCBhIE5ldExv
Y2sgS2Z0LiB0ZXJtZWtmZWxlbG9zc2VnLWJpenRvc2l0YXNhIHZlZGkuIEEgZGln
aXRhbGlzIGFsYWlyYXMgZWxmb2dhZGFzYW5hayBmZWx0ZXRlbGUgYXogZWxvaXJ0
IGVsbGVub3J6ZXNpIGVsamFyYXMgbWVndGV0ZWxlLiBBeiBlbGphcmFzIGxlaXJh
c2EgbWVndGFsYWxoYXRvIGEgTmV0TG9jayBLZnQuIEludGVybmV0IGhvbmxhcGph
biBhIGh0dHBzOi8vd3d3Lm5ldGxvY2submV0L2RvY3MgY2ltZW4gdmFneSBrZXJo
ZXRvIGF6IGVsbGVub3J6ZXNAbmV0bG9jay5uZXQgZS1tYWlsIGNpbWVuLiBJTVBP
UlRBTlQhIFRoZSBpc3N1YW5jZSBhbmQgdGhlIHVzZSBvZiB0aGlzIGNlcnRpZmlj
YXRlIGlzIHN1YmplY3QgdG8gdGhlIE5ldExvY2sgQ1BTIGF2YWlsYWJsZSBhdCBo
dHRwczovL3d3dy5uZXRsb2NrLm5ldC9kb2NzIG9yIGJ5IGUtbWFpbCBhdCBjcHNA
bmV0bG9jay5uZXQuMA0GCSqGSIb3DQEBBAUAA4GBAATbrowXr/gOkDFOzT4JwG06
sPgzTEdM43WIEJessDgVkcYplswhwG08pXTP2IKlOcNl40JwuyKQ433bNXbhoLXa
n3BukxowOR0w2y7jfLKRstE3Kfq51hdcR0/jHTjrn9V7lagonhVK0dHQKwCXoOKS
NitjrFgBazMpUIaD8QFI
-----END CERTIFICATE-----
# Issuer: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 3 Policy Validation Authority
# Subject: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 3 Policy Validation Authority
# Label: "RSA Root Certificate 1"
# Serial: 1
# MD5 Fingerprint: a2:6f:53:b7:ee:40:db:4a:68:e7:fa:18:d9:10:4b:72
# SHA1 Fingerprint: 69:bd:8c:f4:9c:d3:00:fb:59:2e:17:93:ca:55:6a:f3:ec:aa:35:fb
# SHA256 Fingerprint: bc:23:f9:8a:31:3c:b9:2d:e3:bb:fc:3a:5a:9f:44:61:ac:39:49:4c:4a:e1:5a:9e:9d:f1:31:e9:9b:73:01:9a
-----BEGIN CERTIFICATE-----
MIIC5zCCAlACAQEwDQYJKoZIhvcNAQEFBQAwgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0
IFZhbGlkYXRpb24gTmV0d29yazEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAz
BgNVBAsTLFZhbGlDZXJ0IENsYXNzIDMgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9y
aXR5MSEwHwYDVQQDExhodHRwOi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG
9w0BCQEWEWluZm9AdmFsaWNlcnQuY29tMB4XDTk5MDYyNjAwMjIzM1oXDTE5MDYy
NjAwMjIzM1owgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0IFZhbGlkYXRpb24gTmV0d29y
azEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAzBgNVBAsTLFZhbGlDZXJ0IENs
YXNzIDMgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9yaXR5MSEwHwYDVQQDExhodHRw
Oi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG9w0BCQEWEWluZm9AdmFsaWNl
cnQuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDjmFGWHOjVsQaBalfD
cnWTq8+epvzzFlLWLU2fNUSoLgRNB0mKOCn1dzfnt6td3zZxFJmP3MKS8edgkpfs
2Ejcv8ECIMYkpChMMFp2bbFc893enhBxoYjHW5tBbcqwuI4V7q0zK89HBFx1cQqY
JJgpp0lZpd34t0NiYfPT4tBVPwIDAQABMA0GCSqGSIb3DQEBBQUAA4GBAFa7AliE
Zwgs3x/be0kz9dNnnfS0ChCzycUs4pJqcXgn8nCDQtM+z6lU9PHYkhaM0QTLS6vJ
n0WuPIqpsHEzXcjFV9+vqDWzf4mH6eglkrh/hXqu1rweN1gqZ8mRzyqBPu3GOd/A
PhmcGcwTTYJBtYze4D1gCCAPRX5ron+jjBXu
-----END CERTIFICATE-----
# Issuer: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 1 Policy Validation Authority
# Subject: CN=http://www.valicert.com/ O=ValiCert, Inc. OU=ValiCert Class 1 Policy Validation Authority
# Label: "ValiCert Class 1 VA"
# Serial: 1
# MD5 Fingerprint: 65:58:ab:15:ad:57:6c:1e:a8:a7:b5:69:ac:bf:ff:eb
# SHA1 Fingerprint: e5:df:74:3c:b6:01:c4:9b:98:43:dc:ab:8c:e8:6a:81:10:9f:e4:8e
# SHA256 Fingerprint: f4:c1:49:55:1a:30:13:a3:5b:c7:bf:fe:17:a7:f3:44:9b:c1:ab:5b:5a:0a:e7:4b:06:c2:3b:90:00:4c:01:04
-----BEGIN CERTIFICATE-----
MIIC5zCCAlACAQEwDQYJKoZIhvcNAQEFBQAwgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0
IFZhbGlkYXRpb24gTmV0d29yazEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAz
BgNVBAsTLFZhbGlDZXJ0IENsYXNzIDEgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9y
aXR5MSEwHwYDVQQDExhodHRwOi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG
9w0BCQEWEWluZm9AdmFsaWNlcnQuY29tMB4XDTk5MDYyNTIyMjM0OFoXDTE5MDYy
NTIyMjM0OFowgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0IFZhbGlkYXRpb24gTmV0d29y
azEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAzBgNVBAsTLFZhbGlDZXJ0IENs
YXNzIDEgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9yaXR5MSEwHwYDVQQDExhodHRw
Oi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG9w0BCQEWEWluZm9AdmFsaWNl
cnQuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDYWYJ6ibiWuqYvaG9Y
LqdUHAZu9OqNSLwxlBfw8068srg1knaw0KWlAdcAAxIiGQj4/xEjm84H9b9pGib+
TunRf50sQB1ZaG6m+FiwnRqP0z/x3BkGgagO4DrdyFNFCQbmD3DD+kCmDuJWBQ8Y
TfwggtFzVXSNdnKgHZ0dwN0/cQIDAQABMA0GCSqGSIb3DQEBBQUAA4GBAFBoPUn0
LBwGlN+VYH+Wexf+T3GtZMjdd9LvWVXoP+iOBSoh8gfStadS/pyxtuJbdxdA6nLW
I8sogTLDAHkY7FkXicnGah5xyf23dKUlRWnFSKsZ4UWKJWsZ7uW7EvV/96aNUcPw
nXS3qT6gpf+2SQMT2iLM7XGCK5nPOrf1LXLI
-----END CERTIFICATE-----
# Issuer: CN=Equifax Secure eBusiness CA-1 O=Equifax Secure Inc.
# Subject: CN=Equifax Secure eBusiness CA-1 O=Equifax Secure Inc.
# Label: "Equifax Secure eBusiness CA 1"
# Serial: 4
# MD5 Fingerprint: 64:9c:ef:2e:44:fc:c6:8f:52:07:d0:51:73:8f:cb:3d
# SHA1 Fingerprint: da:40:18:8b:91:89:a3:ed:ee:ae:da:97:fe:2f:9d:f5:b7:d1:8a:41
# SHA256 Fingerprint: cf:56:ff:46:a4:a1:86:10:9d:d9:65:84:b5:ee:b5:8a:51:0c:42:75:b0:e5:f9:4f:40:bb:ae:86:5e:19:f6:73
-----BEGIN CERTIFICATE-----
MIICgjCCAeugAwIBAgIBBDANBgkqhkiG9w0BAQQFADBTMQswCQYDVQQGEwJVUzEc
MBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5jLjEmMCQGA1UEAxMdRXF1aWZheCBT
ZWN1cmUgZUJ1c2luZXNzIENBLTEwHhcNOTkwNjIxMDQwMDAwWhcNMjAwNjIxMDQw
MDAwWjBTMQswCQYDVQQGEwJVUzEcMBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5j
LjEmMCQGA1UEAxMdRXF1aWZheCBTZWN1cmUgZUJ1c2luZXNzIENBLTEwgZ8wDQYJ
KoZIhvcNAQEBBQADgY0AMIGJAoGBAM4vGbwXt3fek6lfWg0XTzQaDJj0ItlZ1MRo
RvC0NcWFAyDGr0WlIVFFQesWWDYyb+JQYmT5/VGcqiTZ9J2DKocKIdMSODRsjQBu
WqDZQu4aIZX5UkxVWsUPOE9G+m34LjXWHXzr4vCwdYDIqROsvojvOm6rXyo4YgKw
Env+j6YDAgMBAAGjZjBkMBEGCWCGSAGG+EIBAQQEAwIABzAPBgNVHRMBAf8EBTAD
AQH/MB8GA1UdIwQYMBaAFEp4MlIR21kWNl7fwRQ2QGpHfEyhMB0GA1UdDgQWBBRK
eDJSEdtZFjZe38EUNkBqR3xMoTANBgkqhkiG9w0BAQQFAAOBgQB1W6ibAxHm6VZM
zfmpTMANmvPMZWnmJXbMWbfWVMMdzZmsGd20hdXgPfxiIKeES1hl8eL5lSE/9dR+
WB5Hh1Q+WKG1tfgq73HnvMP2sUlG4tega+VWeponmHxGYhTnyfxuAxJ5gDgdSIKN
/Bf+KpYrtWKmpj29f5JZzVoqgrI3eQ==
-----END CERTIFICATE-----
# Issuer: CN=Equifax Secure Global eBusiness CA-1 O=Equifax Secure Inc.
# Subject: CN=Equifax Secure Global eBusiness CA-1 O=Equifax Secure Inc.
# Label: "Equifax Secure Global eBusiness CA"
# Serial: 1
# MD5 Fingerprint: 8f:5d:77:06:27:c4:98:3c:5b:93:78:e7:d7:7d:9b:cc
# SHA1 Fingerprint: 7e:78:4a:10:1c:82:65:cc:2d:e1:f1:6d:47:b4:40:ca:d9:0a:19:45
# SHA256 Fingerprint: 5f:0b:62:ea:b5:e3:53:ea:65:21:65:16:58:fb:b6:53:59:f4:43:28:0a:4a:fb:d1:04:d7:7d:10:f9:f0:4c:07
-----BEGIN CERTIFICATE-----
MIICkDCCAfmgAwIBAgIBATANBgkqhkiG9w0BAQQFADBaMQswCQYDVQQGEwJVUzEc
MBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5jLjEtMCsGA1UEAxMkRXF1aWZheCBT
ZWN1cmUgR2xvYmFsIGVCdXNpbmVzcyBDQS0xMB4XDTk5MDYyMTA0MDAwMFoXDTIw
MDYyMTA0MDAwMFowWjELMAkGA1UEBhMCVVMxHDAaBgNVBAoTE0VxdWlmYXggU2Vj
dXJlIEluYy4xLTArBgNVBAMTJEVxdWlmYXggU2VjdXJlIEdsb2JhbCBlQnVzaW5l
c3MgQ0EtMTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAuucXkAJlsTRVPEnC
UdXfp9E3j9HngXNBUmCbnaEXJnitx7HoJpQytd4zjTov2/KaelpzmKNc6fuKcxtc
58O/gGzNqfTWK8D3+ZmqY6KxRwIP1ORROhI8bIpaVIRw28HFkM9yRcuoWcDNM50/
o5brhTMhHD4ePmBudpxnhcXIw2ECAwEAAaNmMGQwEQYJYIZIAYb4QgEBBAQDAgAH
MA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAUvqigdHJQa0S3ySPY+6j/s1dr
aGwwHQYDVR0OBBYEFL6ooHRyUGtEt8kj2Puo/7NXa2hsMA0GCSqGSIb3DQEBBAUA
A4GBADDiAVGqx+pf2rnQZQ8w1j7aDRRJbpGTJxQx78T3LUX47Me/okENI7SS+RkA
Z70Br83gcfxaz2TE4JaY0KNA4gGK7ycH8WUBikQtBmV1UsCGECAhX2xrD2yuCRyv
8qIYNMR1pHMc8Y3c7635s3a0kr/clRAevsvIO1qEYBlWlKlV
-----END CERTIFICATE-----
# Issuer: CN=Thawte Premium Server CA O=Thawte Consulting cc OU=Certification Services Division
# Subject: CN=Thawte Premium Server CA O=Thawte Consulting cc OU=Certification Services Division
# Label: "Thawte Premium Server CA"
# Serial: 1
# MD5 Fingerprint: 06:9f:69:79:16:66:90:02:1b:8c:8c:a2:c3:07:6f:3a
# SHA1 Fingerprint: 62:7f:8d:78:27:65:63:99:d2:7d:7f:90:44:c9:fe:b3:f3:3e:fa:9a
# SHA256 Fingerprint: ab:70:36:36:5c:71:54:aa:29:c2:c2:9f:5d:41:91:16:3b:16:2a:22:25:01:13:57:d5:6d:07:ff:a7:bc:1f:72
-----BEGIN CERTIFICATE-----
MIIDJzCCApCgAwIBAgIBATANBgkqhkiG9w0BAQQFADCBzjELMAkGA1UEBhMCWkEx
FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYD
VQQKExRUaGF3dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlv
biBTZXJ2aWNlcyBEaXZpc2lvbjEhMB8GA1UEAxMYVGhhd3RlIFByZW1pdW0gU2Vy
dmVyIENBMSgwJgYJKoZIhvcNAQkBFhlwcmVtaXVtLXNlcnZlckB0aGF3dGUuY29t
MB4XDTk2MDgwMTAwMDAwMFoXDTIwMTIzMTIzNTk1OVowgc4xCzAJBgNVBAYTAlpB
MRUwEwYDVQQIEwxXZXN0ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEdMBsG
A1UEChMUVGhhd3RlIENvbnN1bHRpbmcgY2MxKDAmBgNVBAsTH0NlcnRpZmljYXRp
b24gU2VydmljZXMgRGl2aXNpb24xITAfBgNVBAMTGFRoYXd0ZSBQcmVtaXVtIFNl
cnZlciBDQTEoMCYGCSqGSIb3DQEJARYZcHJlbWl1bS1zZXJ2ZXJAdGhhd3RlLmNv
bTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA0jY2aovXwlue2oFBYo847kkE
VdbQ7xwblRZH7xhINTpS9CtqBo87L+pW46+GjZ4X9560ZXUCTe/LCaIhUdib0GfQ
ug2SBhRz1JPLlyoAnFxODLz6FVL88kRu2hFKbgifLy3j+ao6hnO2RlNYyIkFvYMR
uHM/qgeN9EJN50CdHDcCAwEAAaMTMBEwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG
9w0BAQQFAAOBgQAmSCwWwlj66BZ0DKqqX1Q/8tfJeGBeXm43YyJ3Nn6yF8Q0ufUI
hfzJATj/Tb7yFkJD57taRvvBxhEf8UqwKEbJw8RCfbz6q1lu1bdRiBHjpIUZa4JM
pAwSremkrj/xw0llmozFyD4lt5SZu5IycQfwhl7tUCemDaYj+bvLpgcUQg==
-----END CERTIFICATE-----
# Issuer: CN=Thawte Server CA O=Thawte Consulting cc OU=Certification Services Division
# Subject: CN=Thawte Server CA O=Thawte Consulting cc OU=Certification Services Division
# Label: "Thawte Server CA"
# Serial: 1
# MD5 Fingerprint: c5:70:c4:a2:ed:53:78:0c:c8:10:53:81:64:cb:d0:1d
# SHA1 Fingerprint: 23:e5:94:94:51:95:f2:41:48:03:b4:d5:64:d2:a3:a3:f5:d8:8b:8c
# SHA256 Fingerprint: b4:41:0b:73:e2:e6:ea:ca:47:fb:c4:2f:8f:a4:01:8a:f4:38:1d:c5:4c:fa:a8:44:50:46:1e:ed:09:45:4d:e9
-----BEGIN CERTIFICATE-----
MIIDEzCCAnygAwIBAgIBATANBgkqhkiG9w0BAQQFADCBxDELMAkGA1UEBhMCWkEx
FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYD
VQQKExRUaGF3dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlv
biBTZXJ2aWNlcyBEaXZpc2lvbjEZMBcGA1UEAxMQVGhhd3RlIFNlcnZlciBDQTEm
MCQGCSqGSIb3DQEJARYXc2VydmVyLWNlcnRzQHRoYXd0ZS5jb20wHhcNOTYwODAx
MDAwMDAwWhcNMjAxMjMxMjM1OTU5WjCBxDELMAkGA1UEBhMCWkExFTATBgNVBAgT
DFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYDVQQKExRUaGF3
dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlvbiBTZXJ2aWNl
cyBEaXZpc2lvbjEZMBcGA1UEAxMQVGhhd3RlIFNlcnZlciBDQTEmMCQGCSqGSIb3
DQEJARYXc2VydmVyLWNlcnRzQHRoYXd0ZS5jb20wgZ8wDQYJKoZIhvcNAQEBBQAD
gY0AMIGJAoGBANOkUG7I/1Zr5s9dtuoMaHVHoqrC2oQl/Kj0R1HahbUgdJSGHg91
yekIYfUGbTBuFRkC6VLAYttNmZ7iagxEOM3+vuNkCXDF/rFrKbYvScg71CcEJRCX
L+eQbcAoQpnXTEPew/UhbVSfXcNY4cDk2VuwuNy0e982OsK1ZiIS1ocNAgMBAAGj
EzARMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEEBQADgYEAB/pMaVz7lcxG
7oWDTSEwjsrZqG9JGubaUeNgcGyEYRGhGshIPllDfU+VPaGLtwtimHp1it2ITk6e
QNuozDJ0uW8NxuOzRAvZim+aKZuZGCg70eNAKJpaPNW15yAbi8qkq43pUdniTCxZ
qdq5snUb9kLy78fyGPmJvKP/iiMucEc=
-----END CERTIFICATE-----
# Issuer: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority
# Subject: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority
# Label: "Verisign Class 3 Public Primary Certification Authority"
# Serial: 149843929435818692848040365716851702463
# MD5 Fingerprint: 10:fc:63:5d:f6:26:3e:0d:f3:25:be:5f:79:cd:67:67
# SHA1 Fingerprint: 74:2c:31:92:e6:07:e4:24:eb:45:49:54:2b:e1:bb:c5:3e:61:74:e2
# SHA256 Fingerprint: e7:68:56:34:ef:ac:f6:9a:ce:93:9a:6b:25:5b:7b:4f:ab:ef:42:93:5b:50:a2:65:ac:b5:cb:60:27:e4:4e:70
-----BEGIN CERTIFICATE-----
MIICPDCCAaUCEHC65B0Q2Sk0tjjKewPMur8wDQYJKoZIhvcNAQECBQAwXzELMAkG
A1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFz
cyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTk2
MDEyOTAwMDAwMFoXDTI4MDgwMTIzNTk1OVowXzELMAkGA1UEBhMCVVMxFzAVBgNV
BAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFzcyAzIFB1YmxpYyBQcmlt
YXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGfMA0GCSqGSIb3DQEBAQUAA4GN
ADCBiQKBgQDJXFme8huKARS0EN8EQNvjV69qRUCPhAwL0TPZ2RHP7gJYHyX3KqhE
BarsAx94f56TuZoAqiN91qyFomNFx3InzPRMxnVx0jnvT0Lwdd8KkMaOIG+YD/is
I19wKTakyYbnsZogy1Olhec9vn2a/iRFM9x2Fe0PonFkTGUugWhFpwIDAQABMA0G
CSqGSIb3DQEBAgUAA4GBALtMEivPLCYATxQT3ab7/AoRhIzzKBxnki98tsX63/Do
lbwdj2wsqFHMc9ikwFPwTtYmwHYBV4GSXiHx0bH/59AhWM1pF+NEHJwZRDmJXNyc
AA9WjQKZ7aKQRUzkuxCkPfAyAw7xzvjoyVGM5mKf5p/AfbdynMk2OmufTqj/ZA1k
-----END CERTIFICATE-----
# Issuer: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority
# Subject: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority
# Label: "Verisign Class 3 Public Primary Certification Authority"
# Serial: 80507572722862485515306429940691309246
# MD5 Fingerprint: ef:5a:f1:33:ef:f1:cd:bb:51:02:ee:12:14:4b:96:c4
# SHA1 Fingerprint: a1:db:63:93:91:6f:17:e4:18:55:09:40:04:15:c7:02:40:b0:ae:6b
# SHA256 Fingerprint: a4:b6:b3:99:6f:c2:f3:06:b3:fd:86:81:bd:63:41:3d:8c:50:09:cc:4f:a3:29:c2:cc:f0:e2:fa:1b:14:03:05
-----BEGIN CERTIFICATE-----
MIICPDCCAaUCEDyRMcsf9tAbDpq40ES/Er4wDQYJKoZIhvcNAQEFBQAwXzELMAkG
A1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFz
cyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTk2
MDEyOTAwMDAwMFoXDTI4MDgwMjIzNTk1OVowXzELMAkGA1UEBhMCVVMxFzAVBgNV
BAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFzcyAzIFB1YmxpYyBQcmlt
YXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGfMA0GCSqGSIb3DQEBAQUAA4GN
ADCBiQKBgQDJXFme8huKARS0EN8EQNvjV69qRUCPhAwL0TPZ2RHP7gJYHyX3KqhE
BarsAx94f56TuZoAqiN91qyFomNFx3InzPRMxnVx0jnvT0Lwdd8KkMaOIG+YD/is
I19wKTakyYbnsZogy1Olhec9vn2a/iRFM9x2Fe0PonFkTGUugWhFpwIDAQABMA0G
CSqGSIb3DQEBBQUAA4GBABByUqkFFBkyCEHwxWsKzH4PIRnN5GfcX6kb5sroc50i
2JhucwNhkcV8sEVAbkSdjbCxlnRhLQ2pRdKkkirWmnWXbj9T/UWZYB2oK0z5XqcJ
2HUw19JlYD1n1khVdWk/kfVIC0dpImmClr7JyDiGSnoscxlIaU5rfGW/D/xwzoiQ
-----END CERTIFICATE-----
# Issuer: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority - G2/(c) 1998 VeriSign, Inc. - For authorized use only/VeriSign Trust Network
# Subject: O=VeriSign, Inc. OU=Class 3 Public Primary Certification Authority - G2/(c) 1998 VeriSign, Inc. - For authorized use only/VeriSign Trust Network
# Label: "Verisign Class 3 Public Primary Certification Authority - G2"
# Serial: 167285380242319648451154478808036881606
# MD5 Fingerprint: a2:33:9b:4c:74:78:73:d4:6c:e7:c1:f3:8d:cb:5c:e9
# SHA1 Fingerprint: 85:37:1c:a6:e5:50:14:3d:ce:28:03:47:1b:de:3a:09:e8:f8:77:0f
# SHA256 Fingerprint: 83:ce:3c:12:29:68:8a:59:3d:48:5f:81:97:3c:0f:91:95:43:1e:da:37:cc:5e:36:43:0e:79:c7:a8:88:63:8b
-----BEGIN CERTIFICATE-----
MIIDAjCCAmsCEH3Z/gfPqB63EHln+6eJNMYwDQYJKoZIhvcNAQEFBQAwgcExCzAJ
BgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xh
c3MgMyBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcy
MTowOAYDVQQLEzEoYykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3Jp
emVkIHVzZSBvbmx5MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMB4X
DTk4MDUxODAwMDAwMFoXDTI4MDgwMTIzNTk1OVowgcExCzAJBgNVBAYTAlVTMRcw
FQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xhc3MgMyBQdWJsaWMg
UHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcyMTowOAYDVQQLEzEo
YykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5
MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMIGfMA0GCSqGSIb3DQEB
AQUAA4GNADCBiQKBgQDMXtERXVxp0KvTuWpMmR9ZmDCOFoUgRm1HP9SFIIThbbP4
pO0M8RcPO/mn+SXXwc+EY/J8Y8+iR/LGWzOOZEAEaMGAuWQcRXfH2G71lSk8UOg0
13gfqLptQ5GVj0VXXn7F+8qkBOvqlzdUMG+7AUcyM83cV5tkaWH4mx0ciU9cZwID
AQABMA0GCSqGSIb3DQEBBQUAA4GBAFFNzb5cy5gZnBWyATl4Lk0PZ3BwmcYQWpSk
U01UbSuvDV1Ai2TT1+7eVmGSX6bEHRBhNtMsJzzoKQm5EWR0zLVznxxIqbxhAe7i
F6YM40AIOw7n60RzKprxaZLvcRTDOaxxp5EJb+RxBrO6WVcmeQD2+A2iMzAo1KpY
oJ2daZH9
-----END CERTIFICATE-----
# Issuer: CN=GTE CyberTrust Global Root O=GTE Corporation OU=GTE CyberTrust Solutions, Inc.
# Subject: CN=GTE CyberTrust Global Root O=GTE Corporation OU=GTE CyberTrust Solutions, Inc.
# Label: "GTE CyberTrust Global Root"
# Serial: 421
# MD5 Fingerprint: ca:3d:d3:68:f1:03:5c:d0:32:fa:b8:2b:59:e8:5a:db
# SHA1 Fingerprint: 97:81:79:50:d8:1c:96:70:cc:34:d8:09:cf:79:44:31:36:7e:f4:74
# SHA256 Fingerprint: a5:31:25:18:8d:21:10:aa:96:4b:02:c7:b7:c6:da:32:03:17:08:94:e5:fb:71:ff:fb:66:67:d5:e6:81:0a:36
-----BEGIN CERTIFICATE-----
MIICWjCCAcMCAgGlMA0GCSqGSIb3DQEBBAUAMHUxCzAJBgNVBAYTAlVTMRgwFgYD
VQQKEw9HVEUgQ29ycG9yYXRpb24xJzAlBgNVBAsTHkdURSBDeWJlclRydXN0IFNv
bHV0aW9ucywgSW5jLjEjMCEGA1UEAxMaR1RFIEN5YmVyVHJ1c3QgR2xvYmFsIFJv
b3QwHhcNOTgwODEzMDAyOTAwWhcNMTgwODEzMjM1OTAwWjB1MQswCQYDVQQGEwJV
UzEYMBYGA1UEChMPR1RFIENvcnBvcmF0aW9uMScwJQYDVQQLEx5HVEUgQ3liZXJU
cnVzdCBTb2x1dGlvbnMsIEluYy4xIzAhBgNVBAMTGkdURSBDeWJlclRydXN0IEds
b2JhbCBSb290MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCVD6C28FCc6HrH
iM3dFw4usJTQGz0O9pTAipTHBsiQl8i4ZBp6fmw8U+E3KHNgf7KXUwefU/ltWJTS
r41tiGeA5u2ylc9yMcqlHHK6XALnZELn+aks1joNrI1CqiQBOeacPwGFVw1Yh0X4
04Wqk2kmhXBIgD8SFcd5tB8FLztimQIDAQABMA0GCSqGSIb3DQEBBAUAA4GBAG3r
GwnpXtlR22ciYaQqPEh346B8pt5zohQDhT37qw4wxYMWM4ETCJ57NE7fQMh017l9
3PR2VX2bY1QY6fDq81yx2YtCHrnAlU66+tXifPVoYb+O7AWXX1uw16OFNMQkpw0P
lZPvy5TYnh+dXIVtx6quTx8itc2VrbqnzPmrC3p/
-----END CERTIFICATE-----

File diff suppressed because it is too large Load diff

View file

@ -1,40 +0,0 @@
Chardet: The Universal Character Encoding Detector
--------------------------------------------------
Detects
- ASCII, UTF-8, UTF-16 (2 variants), UTF-32 (4 variants)
- Big5, GB2312, EUC-TW, HZ-GB-2312, ISO-2022-CN (Traditional and Simplified Chinese)
- EUC-JP, SHIFT_JIS, ISO-2022-JP (Japanese)
- EUC-KR, ISO-2022-KR (Korean)
- KOI8-R, MacCyrillic, IBM855, IBM866, ISO-8859-5, windows-1251 (Cyrillic)
- ISO-8859-2, windows-1250 (Hungarian)
- ISO-8859-5, windows-1251 (Bulgarian)
- windows-1252 (English)
- ISO-8859-7, windows-1253 (Greek)
- ISO-8859-8, windows-1255 (Visual and Logical Hebrew)
- TIS-620 (Thai)
Requires Python 2.6 or later
Command-line Tool
-----------------
chardet comes with a command-line script which reports on the encodings of one
or more files::
% chardetect somefile someotherfile
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
About
-----
This is a continuation of Mark Pilgrim's excellent chardet. Previously, two
versions needed to be maintained: one that supported python 2.x and one that
supported python 3.x. We've recently merged with `Ian Corduscano <https://github.com/sigmavirus24>`_'s
`charade <https://github.com/sigmavirus24/charade>`_ fork, so now we have one
coherent version that works for Python 2.6+.
:maintainer: Dan Blanchard

View file

@ -1,58 +0,0 @@
Metadata-Version: 2.0
Name: chardet
Version: 2.2.1
Summary: Universal encoding detector for Python 2 and 3
Home-page: https://github.com/erikrose/chardet
Author: Ian Cordasco
Author-email: graffatcolmingov@gmail.com
License: LGPL
Keywords: encoding,i18n,xml
Platform: UNKNOWN
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: Text Processing :: Linguistic
Chardet: The Universal Character Encoding Detector
--------------------------------------------------
Detects
- ASCII, UTF-8, UTF-16 (2 variants), UTF-32 (4 variants)
- Big5, GB2312, EUC-TW, HZ-GB-2312, ISO-2022-CN (Traditional and Simplified Chinese)
- EUC-JP, SHIFT_JIS, ISO-2022-JP (Japanese)
- EUC-KR, ISO-2022-KR (Korean)
- KOI8-R, MacCyrillic, IBM855, IBM866, ISO-8859-5, windows-1251 (Cyrillic)
- ISO-8859-2, windows-1250 (Hungarian)
- ISO-8859-5, windows-1251 (Bulgarian)
- windows-1252 (English)
- ISO-8859-7, windows-1253 (Greek)
- ISO-8859-8, windows-1255 (Visual and Logical Hebrew)
- TIS-620 (Thai)
Requires Python 2.6 or later
Command-line Tool
-----------------
chardet comes with a command-line script which reports on the encodings of one
or more files::
% chardetect somefile someotherfile
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
About
-----
This is a continuation of Mark Pilgrim's excellent chardet. Previously, two
versions needed to be maintained: one that supported python 2.x and one that
supported python 3.x. We've recently merged with `Ian Corduscano <https://github.com/sigmavirus24>`_'s
`charade <https://github.com/sigmavirus24/charade>`_ fork, so now we have one
coherent version that works for Python 2.6+.
:maintainer: Dan Blanchard

View file

@ -1,85 +0,0 @@
chardet/cp949prober.py,sha256=FMvdLyB7fejPXRsTbca7LK1P3RUvvssmjUNyaEfz8zY,1782
chardet/mbcssm.py,sha256=UuiA4Ic8vEc0XpTKDneqZyiH2TwGuFVZxOxWJep3X_4,19608
chardet/langhebrewmodel.py,sha256=4ASl5vzKJPng4H278VHKtRYC03TpQpenlHTcsmZH1rE,11318
chardet/charsetgroupprober.py,sha256=0lKk7VE516fgMw119tNefFqLOxKfIE9WfdkpIT69OKU,3791
chardet/euctwfreq.py,sha256=G_I0BW9i1w0ONeeUwIYqV7_U09buIHdqh-wNHVaql7I,34872
chardet/charsetprober.py,sha256=Z48o2KiOj23FNqYH8FqzhH5m1qdm3rI8DcTm2Yqtklg,1902
chardet/jisfreq.py,sha256=ZcL4R5ekHHbP2KCYGakVMBsiKqZZZAABzhwi-uRkOps,47315
chardet/langcyrillicmodel.py,sha256=fkcd5OvogUp-GrNDWAZPgkYsSRCD2omotAEvqjlmLKE,17725
chardet/__init__.py,sha256=8-39Dg2qEuod5DNN7RMdn2ZYOO9zFU3fFfaE80iDWGc,1295
chardet/utf8prober.py,sha256=7tdNZGrJY7jZUBD483GGMkiP0Tx8Fp-cGvWHoAsilHg,2652
chardet/langthaimodel.py,sha256=-k7djh3dGKngAGnt3WfuoJN7acDcWcmHAPojhaUd7q4,11275
chardet/jpcntx.py,sha256=9fJ9oS0BUarcdZNySwmzVRuT03sYdClSmFwXDj3yVNg,19104
chardet/hebrewprober.py,sha256=8pdoUfsVXf_L4BnJde_BewS6H2yInV5688eu0nFhLHY,13359
chardet/sbcsgroupprober.py,sha256=8hLyH8RAG-aohBo7o_KciWVgRo42ZE_zEtuNG1JMRYI,3291
chardet/universaldetector.py,sha256=GkZdwNyNfbFWC8I1uqnzyhOUF7favWCqCOKqdQlx6gQ,6831
chardet/escprober.py,sha256=q5TcQKeVq31WxrW7Sv8yjpZkjEoaHO8S92EJZ9hodys,3187
chardet/euctwprober.py,sha256=upS2P6GuT5ujOxXYw-RJLcT7A4PTuo27KGUKU4UZpIQ,1676
chardet/mbcsgroupprober.py,sha256=SHRzNPLpDXfMJLA8phCHVU0WgqbgDCNxDQMolGX_7yk,1967
chardet/gb2312freq.py,sha256=M2gFdo_qQ_BslStEchrPW5CrPEZEacC0uyDLw4ok-kY,36011
chardet/sjisprober.py,sha256=1RjpQ2LU2gvoEB_4O839xDQVchWx2fG_C7_vXh52P5I,3734
chardet/eucjpprober.py,sha256=5IpfSEjAb7h3hcGMd6dkU80O900C2N6xku28rdYFKuc,3678
chardet/langgreekmodel.py,sha256=QHMy31CH_ot67UCtmurCEKqKx2WwoaKrw2YCYYBK2Lw,12628
chardet/langbulgarianmodel.py,sha256=ZyPsA796MSVhYdfWhMCgKWckupAKAnKqWcE3Cl3ej6o,12784
chardet/gb2312prober.py,sha256=VWnjoRa83Y6V6oczMaxyUr0uy48iCnC2nzk9zfEIRHc,1681
chardet/chardistribution.py,sha256=cUARQFr1oTLXeJCDQrDRkUP778AvSMzhSCnG8VLCV58,9226
chardet/langhungarianmodel.py,sha256=SXwuUzh49_cBeMXhshRHdrhlkz0T8_pZWV_pdqBKNFk,12536
chardet/chardetect.py,sha256=8g-dRSA97bSE6M25Tqe1roKKtl3XHSMnqi6vTzpHNV0,1141
chardet/constants.py,sha256=-UnY8U7EP7z9fTyd09yq35BEkSFEAUAiv9ohd1DW1s4,1335
chardet/codingstatemachine.py,sha256=E85rYhHVMw9xDEJVgiQhp0OnLGr6i2r8_7QOWMKTH08,2318
chardet/latin1prober.py,sha256=g67gqZ2z89LUOlR7BZEAh4-p5a1yGWss9nWy8FCNm8Q,5241
chardet/sbcharsetprober.py,sha256=Xq0lODqJnDgxglBiQI4BqTFiPbn63-0a5XNA5-hVu7U,4793
chardet/compat.py,sha256=5mm6yrHwef1JEG5OxkPJlSq5lkjLVpEGh3iPgFBkpkM,1157
chardet/euckrprober.py,sha256=Wo7dnZ5Erw_nB4H-m5alMiOxOuJUmGHlwCSaGqExDZA,1675
chardet/big5prober.py,sha256=XX96C--6WKYW36mL-z7pJSAtc169Z8ZImByCP4pEN9A,1684
chardet/euckrfreq.py,sha256=T5saK5mImySG5ygQPtsp6o2uKulouCwYm2ElOyFkJqU,45978
chardet/mbcharsetprober.py,sha256=9rOCjDVsmSMp6e7q2syqak22j7lrbUZhJhMee2gbVL0,3268
chardet/escsm.py,sha256=7iljEKN8lXTh8JFXPUSwlibMno6R6ksq4evLxbkzfro,7839
chardet/big5freq.py,sha256=D8oTdz-GM7Jg8TsaWJDm65vM_OLHC3xub6qUJ3rOgsQ,82594
chardet-2.2.1.data/scripts/chardetect,sha256=snDx6K00XbSe_vd7iEHs65beBQMoyABTYGKS4YyykZA,298
chardet-2.2.1.dist-info/pydist.json,sha256=K3LzbyqsBHBv-HwWY25sFHBPy1kxdszEi_gd2AFD_Kg,926
chardet-2.2.1.dist-info/WHEEL,sha256=SXYYsi-y-rEGIva8sB8iKF6bAFD6YDhmqHX5hI3fc0o,110
chardet-2.2.1.dist-info/RECORD,,
chardet-2.2.1.dist-info/top_level.txt,sha256=AowzBbZy4x8EirABDdJSLJZMkJ_53iIag8xfKR6D7kI,8
chardet-2.2.1.dist-info/DESCRIPTION.rst,sha256=m1CcXHsjUJRXdWB4svHusBa6otO4GdUW6LgirEk4V2k,1344
chardet-2.2.1.dist-info/entry_points.txt,sha256=2T00JXwbiQBZQFSKyCFxud4LEQ3_8TKuOwUsSXT-kUI,56
chardet-2.2.1.dist-info/METADATA,sha256=Pzpbxhm72oav1pTeA7pAjXPWGZ_gmYRm9bwvXM8umaw,2013
/srv/openmedialibrary/platform/Shared/home/.local/bin/chardetect,sha256=zPsthwHzIOlO2Mxw0wdp5F7cfd7xSyEpiv11jcEgaEE,220
chardet/__pycache__/langhebrewmodel.cpython-34.pyc,,
chardet/__pycache__/mbcssm.cpython-34.pyc,,
chardet/__pycache__/euckrprober.cpython-34.pyc,,
chardet/__pycache__/chardetect.cpython-34.pyc,,
chardet/__pycache__/cp949prober.cpython-34.pyc,,
chardet/__pycache__/gb2312freq.cpython-34.pyc,,
chardet/__pycache__/universaldetector.cpython-34.pyc,,
chardet/__pycache__/charsetprober.cpython-34.pyc,,
chardet/__pycache__/compat.cpython-34.pyc,,
chardet/__pycache__/__init__.cpython-34.pyc,,
chardet/__pycache__/escprober.cpython-34.pyc,,
chardet/__pycache__/euctwfreq.cpython-34.pyc,,
chardet/__pycache__/langgreekmodel.cpython-34.pyc,,
chardet/__pycache__/codingstatemachine.cpython-34.pyc,,
chardet/__pycache__/hebrewprober.cpython-34.pyc,,
chardet/__pycache__/escsm.cpython-34.pyc,,
chardet/__pycache__/langcyrillicmodel.cpython-34.pyc,,
chardet/__pycache__/euctwprober.cpython-34.pyc,,
chardet/__pycache__/charsetgroupprober.cpython-34.pyc,,
chardet/__pycache__/constants.cpython-34.pyc,,
chardet/__pycache__/chardistribution.cpython-34.pyc,,
chardet/__pycache__/langthaimodel.cpython-34.pyc,,
chardet/__pycache__/utf8prober.cpython-34.pyc,,
chardet/__pycache__/sbcsgroupprober.cpython-34.pyc,,
chardet/__pycache__/big5prober.cpython-34.pyc,,
chardet/__pycache__/langhungarianmodel.cpython-34.pyc,,
chardet/__pycache__/mbcsgroupprober.cpython-34.pyc,,
chardet/__pycache__/big5freq.cpython-34.pyc,,
chardet/__pycache__/sjisprober.cpython-34.pyc,,
chardet/__pycache__/gb2312prober.cpython-34.pyc,,
chardet/__pycache__/langbulgarianmodel.cpython-34.pyc,,
chardet/__pycache__/sbcharsetprober.cpython-34.pyc,,
chardet/__pycache__/jpcntx.cpython-34.pyc,,
chardet/__pycache__/latin1prober.cpython-34.pyc,,
chardet/__pycache__/mbcharsetprober.cpython-34.pyc,,
chardet/__pycache__/euckrfreq.cpython-34.pyc,,
chardet/__pycache__/eucjpprober.cpython-34.pyc,,
chardet/__pycache__/jisfreq.cpython-34.pyc,,

View file

@ -1 +0,0 @@
{"license": "LGPL", "exports": {"console_scripts": {"chardetect": "chardet.chardetect:main"}}, "document_names": {"description": "DESCRIPTION.rst"}, "name": "chardet", "metadata_version": "2.0", "contacts": [{"role": "author", "email": "graffatcolmingov@gmail.com", "name": "Ian Cordasco"}], "generator": "bdist_wheel (0.22.0)", "commands": {"wrap_console": {"chardetect": "chardet.chardetect:main"}}, "summary": "Universal encoding detector for Python 2 and 3", "project_urls": {"Home": "https://github.com/erikrose/chardet"}, "version": "2.2.1", "keywords": "encoding,i18n,xml", "classifiers": ["Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)", "Operating System :: OS Independent", "Programming Language :: Python", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Linguistic"]}

View file

@ -0,0 +1,70 @@
Metadata-Version: 1.1
Name: chardet
Version: 2.3.0
Summary: Universal encoding detector for Python 2 and 3
Home-page: https://github.com/chardet/chardet
Author: Ian Cordasco
Author-email: graffatcolmingov@gmail.com
License: LGPL
Description: Chardet: The Universal Character Encoding Detector
--------------------------------------------------
Detects
- ASCII, UTF-8, UTF-16 (2 variants), UTF-32 (4 variants)
- Big5, GB2312, EUC-TW, HZ-GB-2312, ISO-2022-CN (Traditional and Simplified Chinese)
- EUC-JP, SHIFT_JIS, CP932, ISO-2022-JP (Japanese)
- EUC-KR, ISO-2022-KR (Korean)
- KOI8-R, MacCyrillic, IBM855, IBM866, ISO-8859-5, windows-1251 (Cyrillic)
- ISO-8859-2, windows-1250 (Hungarian)
- ISO-8859-5, windows-1251 (Bulgarian)
- windows-1252 (English)
- ISO-8859-7, windows-1253 (Greek)
- ISO-8859-8, windows-1255 (Visual and Logical Hebrew)
- TIS-620 (Thai)
Requires Python 2.6 or later
Installation
------------
Install from `PyPI <https://pypi.python.org/pypi/chardet>`_::
pip install chardet
Command-line Tool
-----------------
chardet comes with a command-line script which reports on the encodings of one
or more files::
% chardetect somefile someotherfile
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
About
-----
This is a continuation of Mark Pilgrim's excellent chardet. Previously, two
versions needed to be maintained: one that supported python 2.x and one that
supported python 3.x. We've recently merged with `Ian Cordasco <https://github.com/sigmavirus24>`_'s
`charade <https://github.com/sigmavirus24/charade>`_ fork, so now we have one
coherent version that works for Python 2.6+.
:maintainer: Dan Blanchard
Keywords: encoding,i18n,xml
Platform: UNKNOWN
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 2
Classifier: Programming Language :: Python :: 2.6
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.2
Classifier: Programming Language :: Python :: 3.3
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: Text Processing :: Linguistic

View file

@ -0,0 +1,48 @@
LICENSE
MANIFEST.in
README.rst
setup.cfg
setup.py
chardet/__init__.py
chardet/big5freq.py
chardet/big5prober.py
chardet/chardetect.py
chardet/chardistribution.py
chardet/charsetgroupprober.py
chardet/charsetprober.py
chardet/codingstatemachine.py
chardet/compat.py
chardet/constants.py
chardet/cp949prober.py
chardet/escprober.py
chardet/escsm.py
chardet/eucjpprober.py
chardet/euckrfreq.py
chardet/euckrprober.py
chardet/euctwfreq.py
chardet/euctwprober.py
chardet/gb2312freq.py
chardet/gb2312prober.py
chardet/hebrewprober.py
chardet/jisfreq.py
chardet/jpcntx.py
chardet/langbulgarianmodel.py
chardet/langcyrillicmodel.py
chardet/langgreekmodel.py
chardet/langhebrewmodel.py
chardet/langhungarianmodel.py
chardet/langthaimodel.py
chardet/latin1prober.py
chardet/mbcharsetprober.py
chardet/mbcsgroupprober.py
chardet/mbcssm.py
chardet/sbcharsetprober.py
chardet/sbcsgroupprober.py
chardet/sjisprober.py
chardet/universaldetector.py
chardet/utf8prober.py
chardet.egg-info/PKG-INFO
chardet.egg-info/SOURCES.txt
chardet.egg-info/dependency_links.txt
chardet.egg-info/entry_points.txt
chardet.egg-info/top_level.txt

View file

@ -0,0 +1,83 @@
../chardet/__init__.py
../chardet/big5freq.py
../chardet/big5prober.py
../chardet/chardetect.py
../chardet/chardistribution.py
../chardet/charsetgroupprober.py
../chardet/charsetprober.py
../chardet/codingstatemachine.py
../chardet/compat.py
../chardet/constants.py
../chardet/cp949prober.py
../chardet/escprober.py
../chardet/escsm.py
../chardet/eucjpprober.py
../chardet/euckrfreq.py
../chardet/euckrprober.py
../chardet/euctwfreq.py
../chardet/euctwprober.py
../chardet/gb2312freq.py
../chardet/gb2312prober.py
../chardet/hebrewprober.py
../chardet/jisfreq.py
../chardet/jpcntx.py
../chardet/langbulgarianmodel.py
../chardet/langcyrillicmodel.py
../chardet/langgreekmodel.py
../chardet/langhebrewmodel.py
../chardet/langhungarianmodel.py
../chardet/langthaimodel.py
../chardet/latin1prober.py
../chardet/mbcharsetprober.py
../chardet/mbcsgroupprober.py
../chardet/mbcssm.py
../chardet/sbcharsetprober.py
../chardet/sbcsgroupprober.py
../chardet/sjisprober.py
../chardet/universaldetector.py
../chardet/utf8prober.py
../chardet/__pycache__/__init__.cpython-34.pyc
../chardet/__pycache__/big5freq.cpython-34.pyc
../chardet/__pycache__/big5prober.cpython-34.pyc
../chardet/__pycache__/chardetect.cpython-34.pyc
../chardet/__pycache__/chardistribution.cpython-34.pyc
../chardet/__pycache__/charsetgroupprober.cpython-34.pyc
../chardet/__pycache__/charsetprober.cpython-34.pyc
../chardet/__pycache__/codingstatemachine.cpython-34.pyc
../chardet/__pycache__/compat.cpython-34.pyc
../chardet/__pycache__/constants.cpython-34.pyc
../chardet/__pycache__/cp949prober.cpython-34.pyc
../chardet/__pycache__/escprober.cpython-34.pyc
../chardet/__pycache__/escsm.cpython-34.pyc
../chardet/__pycache__/eucjpprober.cpython-34.pyc
../chardet/__pycache__/euckrfreq.cpython-34.pyc
../chardet/__pycache__/euckrprober.cpython-34.pyc
../chardet/__pycache__/euctwfreq.cpython-34.pyc
../chardet/__pycache__/euctwprober.cpython-34.pyc
../chardet/__pycache__/gb2312freq.cpython-34.pyc
../chardet/__pycache__/gb2312prober.cpython-34.pyc
../chardet/__pycache__/hebrewprober.cpython-34.pyc
../chardet/__pycache__/jisfreq.cpython-34.pyc
../chardet/__pycache__/jpcntx.cpython-34.pyc
../chardet/__pycache__/langbulgarianmodel.cpython-34.pyc
../chardet/__pycache__/langcyrillicmodel.cpython-34.pyc
../chardet/__pycache__/langgreekmodel.cpython-34.pyc
../chardet/__pycache__/langhebrewmodel.cpython-34.pyc
../chardet/__pycache__/langhungarianmodel.cpython-34.pyc
../chardet/__pycache__/langthaimodel.cpython-34.pyc
../chardet/__pycache__/latin1prober.cpython-34.pyc
../chardet/__pycache__/mbcharsetprober.cpython-34.pyc
../chardet/__pycache__/mbcsgroupprober.cpython-34.pyc
../chardet/__pycache__/mbcssm.cpython-34.pyc
../chardet/__pycache__/sbcharsetprober.cpython-34.pyc
../chardet/__pycache__/sbcsgroupprober.cpython-34.pyc
../chardet/__pycache__/sjisprober.cpython-34.pyc
../chardet/__pycache__/universaldetector.cpython-34.pyc
../chardet/__pycache__/utf8prober.cpython-34.pyc
./
dependency_links.txt
entry_points.txt
PKG-INFO
SOURCES.txt
top_level.txt
../../../../bin/chardetect

View file

@ -15,7 +15,7 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
__version__ = "2.2.1"
__version__ = "2.3.0"
from sys import version_info

View file

@ -12,34 +12,68 @@ Example::
If no paths are provided, it takes its input from stdin.
"""
from io import open
from sys import argv, stdin
from __future__ import absolute_import, print_function, unicode_literals
import argparse
import sys
from io import open
from chardet import __version__
from chardet.universaldetector import UniversalDetector
def description_of(file, name='stdin'):
"""Return a string describing the probable encoding of a file."""
def description_of(lines, name='stdin'):
"""
Return a string describing the probable encoding of a file or
list of strings.
:param lines: The lines to get the encoding of.
:type lines: Iterable of bytes
:param name: Name of file or collection of lines
:type name: str
"""
u = UniversalDetector()
for line in file:
for line in lines:
u.feed(line)
u.close()
result = u.result
if result['encoding']:
return '%s: %s with confidence %s' % (name,
result['encoding'],
result['confidence'])
return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
result['confidence'])
else:
return '%s: no result' % name
return '{0}: no result'.format(name)
def main():
if len(argv) <= 1:
print(description_of(stdin))
else:
for path in argv[1:]:
with open(path, 'rb') as f:
print(description_of(f, path))
def main(argv=None):
'''
Handles command line arguments and gets things started.
:param argv: List of arguments, as if specified on the command-line.
If None, ``sys.argv[1:]`` is used instead.
:type argv: list of str
'''
# Get command line arguments
parser = argparse.ArgumentParser(
description="Takes one or more file paths and reports their detected \
encodings",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
conflict_handler='resolve')
parser.add_argument('input',
help='File whose encoding we would like to determine.',
type=argparse.FileType('rb'), nargs='*',
default=[sys.stdin])
parser.add_argument('--version', action='version',
version='%(prog)s {0}'.format(__version__))
args = parser.parse_args(argv)
for f in args.input:
if f.isatty():
print("You are running chardetect interactively. Press " +
"CTRL-D twice at the start of a blank line to signal the " +
"end of your input. If you want help, run chardetect " +
"--help\n", file=sys.stderr)
print(description_of(f, f.name))
if __name__ == '__main__':

View file

@ -177,6 +177,12 @@ class JapaneseContextAnalysis:
return -1, 1
class SJISContextAnalysis(JapaneseContextAnalysis):
def __init__(self):
self.charset_name = "SHIFT_JIS"
def get_charset_name(self):
return self.charset_name
def get_order(self, aBuf):
if not aBuf:
return -1, 1
@ -184,6 +190,8 @@ class SJISContextAnalysis(JapaneseContextAnalysis):
first_char = wrap_ord(aBuf[0])
if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):
charLen = 2
if (first_char == 0x87) or (0xFA <= first_char <= 0xFC):
self.charset_name = "CP932"
else:
charLen = 1

View file

@ -129,11 +129,11 @@ class Latin1Prober(CharSetProber):
if total < 0.01:
confidence = 0.0
else:
confidence = ((self._mFreqCounter[3] / total)
- (self._mFreqCounter[1] * 20.0 / total))
confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0)
/ total)
if confidence < 0.0:
confidence = 0.0
# lower the confidence of latin1 so that other more accurate
# detector can take priority.
confidence = confidence * 0.5
confidence = confidence * 0.73
return confidence

View file

@ -353,7 +353,7 @@ SJIS_cls = (
2,2,2,2,2,2,2,2, # 68 - 6f
2,2,2,2,2,2,2,2, # 70 - 77
2,2,2,2,2,2,2,1, # 78 - 7f
3,3,3,3,3,3,3,3, # 80 - 87
3,3,3,3,3,2,2,3, # 80 - 87
3,3,3,3,3,3,3,3, # 88 - 8f
3,3,3,3,3,3,3,3, # 90 - 97
3,3,3,3,3,3,3,3, # 98 - 9f
@ -369,9 +369,8 @@ SJIS_cls = (
2,2,2,2,2,2,2,2, # d8 - df
3,3,3,3,3,3,3,3, # e0 - e7
3,3,3,3,3,4,4,4, # e8 - ef
4,4,4,4,4,4,4,4, # f0 - f7
4,4,4,4,4,0,0,0 # f8 - ff
)
3,3,3,3,3,3,3,3, # f0 - f7
3,3,3,3,3,0,0,0) # f8 - ff
SJIS_st = (
@ -571,5 +570,3 @@ UTF8SMModel = {'classTable': UTF8_cls,
'stateTable': UTF8_st,
'charLenTable': UTF8CharLenTable,
'name': 'UTF-8'}
# flake8: noqa

View file

@ -47,7 +47,7 @@ class SJISProber(MultiByteCharSetProber):
self._mContextAnalyzer.reset()
def get_charset_name(self):
return "SHIFT_JIS"
return self._mContextAnalyzer.get_charset_name()
def feed(self, aBuf):
aLen = len(aBuf)

View file

@ -71,9 +71,9 @@ class UniversalDetector:
if not self._mGotData:
# If the data starts with BOM, we know it is UTF
if aBuf[:3] == codecs.BOM:
if aBuf[:3] == codecs.BOM_UTF8:
# EF BB BF UTF-8 with BOM
self.result = {'encoding': "UTF-8", 'confidence': 1.0}
self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0}
elif aBuf[:4] == codecs.BOM_UTF32_LE:
# FF FE 00 00 UTF-32, little-endian BOM
self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}

View file

@ -1,7 +1,7 @@
./
dependency_links.txt
PKG-INFO
SOURCES.txt
zip-safe
top_level.txt
requires.txt
SOURCES.txt
top_level.txt
zip-safe

View file

@ -1,12 +1,12 @@
Metadata-Version: 1.1
Name: feedparser
Version: 5.1.3
Version: 5.2.1
Summary: Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds
Home-page: http://code.google.com/p/feedparser/
Home-page: https://github.com/kurtmckee/feedparser
Author: Kurt McKee
Author-email: contactme@kurtmckee.org
License: UNKNOWN
Download-URL: http://code.google.com/p/feedparser/
Download-URL: https://pypi.python.org/pypi/feedparser
Description: UNKNOWN
Keywords: atom,cdf,feed,parser,rdf,rss
Platform: POSIX
@ -26,5 +26,6 @@ Classifier: Programming Language :: Python :: 3.0
Classifier: Programming Language :: Python :: 3.1
Classifier: Programming Language :: Python :: 3.2
Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: Text Processing :: Markup :: XML

View file

@ -1,7 +1,7 @@
LICENSE
MANIFEST.in
NEWS
README
README.rst
setup.cfg
setup.py
docs/add_custom_css.py
@ -46,7 +46,6 @@ docs/http.rst
docs/index.rst
docs/introduction.rst
docs/license.rst
docs/microformats.rst
docs/namespace-handling.rst
docs/reference-bozo.rst
docs/reference-bozo_exception.rst
@ -77,8 +76,6 @@ docs/reference-entry-title.rst
docs/reference-entry-title_detail.rst
docs/reference-entry-updated.rst
docs/reference-entry-updated_parsed.rst
docs/reference-entry-vcard.rst
docs/reference-entry-xfn.rst
docs/reference-entry.rst
docs/reference-etag.rst
docs/reference-feed-author.rst
@ -623,15 +620,14 @@ feedparser/tests/http/http_status_303.xml
feedparser/tests/http/http_status_304.xml
feedparser/tests/http/http_status_307.xml
feedparser/tests/http/http_status_404.xml
feedparser/tests/http/http_status_9001.xml
feedparser/tests/http/target.xml
feedparser/tests/illformed/aaa_illformed.xml
feedparser/tests/illformed/always_strip_doctype.xml
feedparser/tests/illformed/http_high_bit_date.xml
feedparser/tests/illformed/non-ascii-tag.xml
feedparser/tests/illformed/rdf_channel_empty_textinput.xml
feedparser/tests/illformed/rss_empty_document.xml
feedparser/tests/illformed/rss_incomplete_cdata.xml
feedparser/tests/illformed/rss_item_link_corrupted_ampersands.xml
feedparser/tests/illformed/undeclared_namespace.xml
feedparser/tests/illformed/chardet/big5.xml
feedparser/tests/illformed/chardet/eucjp.xml
@ -641,131 +637,8 @@ feedparser/tests/illformed/chardet/koi8r.xml
feedparser/tests/illformed/chardet/shiftjis.xml
feedparser/tests/illformed/chardet/tis620.xml
feedparser/tests/illformed/chardet/windows1255.xml
feedparser/tests/microformats/hcard/2-4-2-vcard.xml
feedparser/tests/microformats/hcard/3-1-1-fn-unicode-char.xml
feedparser/tests/microformats/hcard/3-1-1-fn.xml
feedparser/tests/microformats/hcard/3-1-2-n-2-plural.xml
feedparser/tests/microformats/hcard/3-1-2-n-2-singular.xml
feedparser/tests/microformats/hcard/3-1-2-n-plural.xml
feedparser/tests/microformats/hcard/3-1-2-n-singular.xml
feedparser/tests/microformats/hcard/3-1-3-nickname-2-plural.xml
feedparser/tests/microformats/hcard/3-1-3-nickname-2-singular.xml
feedparser/tests/microformats/hcard/3-1-3-nickname.xml
feedparser/tests/microformats/hcard/3-1-4-photo-inline.xml
feedparser/tests/microformats/hcard/3-1-4-photo.xml
feedparser/tests/microformats/hcard/3-1-5-bday-2.xml
feedparser/tests/microformats/hcard/3-1-5-bday-3.xml
feedparser/tests/microformats/hcard/3-1-5-bday.xml
feedparser/tests/microformats/hcard/3-2-1-adr.xml
feedparser/tests/microformats/hcard/3-2-2-label.xml
feedparser/tests/microformats/hcard/3-3-1-tel.xml
feedparser/tests/microformats/hcard/3-3-2-email-2.xml
feedparser/tests/microformats/hcard/3-3-2-email-3.xml
feedparser/tests/microformats/hcard/3-3-2-email.xml
feedparser/tests/microformats/hcard/3-3-3-mailer.xml
feedparser/tests/microformats/hcard/3-4-1-tz-2.xml
feedparser/tests/microformats/hcard/3-4-1-tz.xml
feedparser/tests/microformats/hcard/3-4-2-geo.xml
feedparser/tests/microformats/hcard/3-5-1-title.xml
feedparser/tests/microformats/hcard/3-5-2-role.xml
feedparser/tests/microformats/hcard/3-5-3-logo-2.xml
feedparser/tests/microformats/hcard/3-5-3-logo.xml
feedparser/tests/microformats/hcard/3-5-4-agent-2.xml
feedparser/tests/microformats/hcard/3-5-4-agent.xml
feedparser/tests/microformats/hcard/3-5-5-org.xml
feedparser/tests/microformats/hcard/3-6-1-categories-2-plural.xml
feedparser/tests/microformats/hcard/3-6-1-categories-2-singular.xml
feedparser/tests/microformats/hcard/3-6-1-categories.xml
feedparser/tests/microformats/hcard/3-6-2-note.xml
feedparser/tests/microformats/hcard/3-6-4-rev-2.xml
feedparser/tests/microformats/hcard/3-6-4-rev.xml
feedparser/tests/microformats/hcard/3-6-5-sort-string-2.xml
feedparser/tests/microformats/hcard/3-6-5-sort-string-3.xml
feedparser/tests/microformats/hcard/3-6-5-sort-string-4.xml
feedparser/tests/microformats/hcard/3-6-5-sort-string-5.xml
feedparser/tests/microformats/hcard/3-6-5-sort-string.xml
feedparser/tests/microformats/hcard/3-6-6-sound-2.xml
feedparser/tests/microformats/hcard/3-6-6-sound.xml
feedparser/tests/microformats/hcard/3-6-7-uid.xml
feedparser/tests/microformats/hcard/3-6-8-url.xml
feedparser/tests/microformats/hcard/3-7-1-class-2.xml
feedparser/tests/microformats/hcard/3-7-1-class-3.xml
feedparser/tests/microformats/hcard/3-7-1-class.xml
feedparser/tests/microformats/hcard/3-7-2-key.xml
feedparser/tests/microformats/hcard/7-authors.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_avi.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_bin.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_bz2.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_deb.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_dmg.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_exe.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_gz.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_hqx.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_img.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_iso.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_jar.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_m4a.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_m4v.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_mp2.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_mp3.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_mp4.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_msi.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_ogg.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_rar.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_rpm.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_sit.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_sitx.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_tar.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_tbz2.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_tgz.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_wma.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_wmv.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_z.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_ext_zip.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_type_application_ogg.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_type_audio.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_autodetect_by_type_video.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_href_invalid.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_no_autodetect.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_no_autodetect_xml.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_title.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_title_from_link_text.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_title_overrides_link_text.xml
feedparser/tests/microformats/rel_enclosure/rel_enclosure_type.xml
feedparser/tests/microformats/rel_tag/rel_tag_duplicate.xml
feedparser/tests/microformats/rel_tag/rel_tag_label.xml
feedparser/tests/microformats/rel_tag/rel_tag_scheme.xml
feedparser/tests/microformats/rel_tag/rel_tag_term.xml
feedparser/tests/microformats/rel_tag/rel_tag_term_trailing_slash.xml
feedparser/tests/microformats/xfn/xfn_acquaintance.xml
feedparser/tests/microformats/xfn/xfn_brother.xml
feedparser/tests/microformats/xfn/xfn_child.xml
feedparser/tests/microformats/xfn/xfn_co-resident.xml
feedparser/tests/microformats/xfn/xfn_co-worker.xml
feedparser/tests/microformats/xfn/xfn_colleague.xml
feedparser/tests/microformats/xfn/xfn_contact.xml
feedparser/tests/microformats/xfn/xfn_coresident.xml
feedparser/tests/microformats/xfn/xfn_coworker.xml
feedparser/tests/microformats/xfn/xfn_crush.xml
feedparser/tests/microformats/xfn/xfn_date.xml
feedparser/tests/microformats/xfn/xfn_friend.xml
feedparser/tests/microformats/xfn/xfn_href.xml
feedparser/tests/microformats/xfn/xfn_husband.xml
feedparser/tests/microformats/xfn/xfn_kin.xml
feedparser/tests/microformats/xfn/xfn_me.xml
feedparser/tests/microformats/xfn/xfn_met.xml
feedparser/tests/microformats/xfn/xfn_multiple.xml
feedparser/tests/microformats/xfn/xfn_muse.xml
feedparser/tests/microformats/xfn/xfn_name.xml
feedparser/tests/microformats/xfn/xfn_neighbor.xml
feedparser/tests/microformats/xfn/xfn_parent.xml
feedparser/tests/microformats/xfn/xfn_relative.xml
feedparser/tests/microformats/xfn/xfn_sibling.xml
feedparser/tests/microformats/xfn/xfn_sister.xml
feedparser/tests/microformats/xfn/xfn_spouse.xml
feedparser/tests/microformats/xfn/xfn_sweetheart.xml
feedparser/tests/microformats/xfn/xfn_wife.xml
feedparser/tests/illformed/geo/georss_point_no_coords.xml
feedparser/tests/illformed/geo/georss_polygon_insufficient_coords.xml
feedparser/tests/wellformed/amp/amp01.xml
feedparser/tests/wellformed/amp/amp02.xml
feedparser/tests/wellformed/amp/amp03.xml
@ -988,6 +861,7 @@ feedparser/tests/wellformed/atom/feed_title_inline_markup_2.xml
feedparser/tests/wellformed/atom/feed_title_naked_markup.xml
feedparser/tests/wellformed/atom/feed_title_text_plain.xml
feedparser/tests/wellformed/atom/feed_updated_parsed.xml
feedparser/tests/wellformed/atom/media_group.xml
feedparser/tests/wellformed/atom/media_player1.xml
feedparser/tests/wellformed/atom/media_thumbnail.xml
feedparser/tests/wellformed/atom/relative_uri.xml
@ -1382,6 +1256,16 @@ feedparser/tests/wellformed/cdf/item_lastmod.xml
feedparser/tests/wellformed/cdf/item_lastmod_parsed.xml
feedparser/tests/wellformed/cdf/item_title.xml
feedparser/tests/wellformed/feedburner/feedburner_browserfriendly.xml
feedparser/tests/wellformed/geo/georss_box.xml
feedparser/tests/wellformed/geo/georss_line.xml
feedparser/tests/wellformed/geo/georss_point.xml
feedparser/tests/wellformed/geo/georss_polygon.xml
feedparser/tests/wellformed/geo/gml_linestring.xml
feedparser/tests/wellformed/geo/gml_linestring_utm.xml
feedparser/tests/wellformed/geo/gml_point.xml
feedparser/tests/wellformed/geo/gml_point_3d.xml
feedparser/tests/wellformed/geo/gml_point_utm.xml
feedparser/tests/wellformed/geo/gml_polygon.xml
feedparser/tests/wellformed/http/headers_content_location-relative.xml
feedparser/tests/wellformed/http/headers_content_location-unsafe.xml
feedparser/tests/wellformed/http/headers_etag.xml
@ -1508,8 +1392,6 @@ feedparser/tests/wellformed/lang/item_fullitem_xml_lang.xml
feedparser/tests/wellformed/lang/item_fullitem_xml_lang_inherit.xml
feedparser/tests/wellformed/lang/item_xhtml_body_xml_lang.xml
feedparser/tests/wellformed/lang/item_xhtml_body_xml_lang_inherit.xml
feedparser/tests/wellformed/mf_hcard/3-5-5-org-unicode.xml
feedparser/tests/wellformed/mf_rel_tag/rel_tag_term_no_term.xml
feedparser/tests/wellformed/namespace/atommathml.xml
feedparser/tests/wellformed/namespace/atomsvg.xml
feedparser/tests/wellformed/namespace/atomsvgdctitle.xml
@ -1532,9 +1414,16 @@ feedparser/tests/wellformed/namespace/rss2.0svg5.xml
feedparser/tests/wellformed/namespace/rss2.0svgtitle.xml
feedparser/tests/wellformed/namespace/rss2.0withAtomNS.xml
feedparser/tests/wellformed/namespace/rss2.0xlink.xml
feedparser/tests/wellformed/namespace/unknown-namespace.xml
feedparser/tests/wellformed/node_precedence/atom10_arbitrary_element.xml
feedparser/tests/wellformed/node_precedence/atom10_id.xml
feedparser/tests/wellformed/node_precedence/atom10_title.xml
feedparser/tests/wellformed/psc/atomsimplechapter.xml
feedparser/tests/wellformed/psc/atomsimplechapterexternal.xml
feedparser/tests/wellformed/psc/ignore_multiple_psc_chapters.xml
feedparser/tests/wellformed/psc/rss2.0simplechapter.xml
feedparser/tests/wellformed/psc/rss2.0simplechapter2items.xml
feedparser/tests/wellformed/psc/version_1.0_keyerror.xml
feedparser/tests/wellformed/rdf/doctype_contains_entity_decl.xml
feedparser/tests/wellformed/rdf/rdf_channel_description.xml
feedparser/tests/wellformed/rdf/rdf_channel_link.xml
@ -1587,6 +1476,12 @@ feedparser/tests/wellformed/rss/channel_dcterms_issued.xml
feedparser/tests/wellformed/rss/channel_dcterms_issued_parsed.xml
feedparser/tests/wellformed/rss/channel_dcterms_modified.xml
feedparser/tests/wellformed/rss/channel_dcterms_modified_parsed.xml
feedparser/tests/wellformed/rss/channel_dcterms_validity.xml
feedparser/tests/wellformed/rss/channel_dcterms_validity_end.xml
feedparser/tests/wellformed/rss/channel_dcterms_validity_end_parsed.xml
feedparser/tests/wellformed/rss/channel_dcterms_validity_parsed.xml
feedparser/tests/wellformed/rss/channel_dcterms_validity_start.xml
feedparser/tests/wellformed/rss/channel_dcterms_validity_start_parsed.xml
feedparser/tests/wellformed/rss/channel_description.xml
feedparser/tests/wellformed/rss/channel_description_escaped_markup.xml
feedparser/tests/wellformed/rss/channel_description_map_tagline.xml
@ -1693,6 +1588,7 @@ feedparser/tests/wellformed/rss/item_fullitem_type.xml
feedparser/tests/wellformed/rss/item_guid.xml
feedparser/tests/wellformed/rss/item_guid_conflict_link.xml
feedparser/tests/wellformed/rss/item_guid_guidislink.xml
feedparser/tests/wellformed/rss/item_guid_isPermaLink_ValueError.xml
feedparser/tests/wellformed/rss/item_guid_isPermaLink_conflict_link.xml
feedparser/tests/wellformed/rss/item_guid_isPermaLink_conflict_link_not_guidislink.xml
feedparser/tests/wellformed/rss/item_guid_isPermaLink_guidislink.xml
@ -1706,6 +1602,8 @@ feedparser/tests/wellformed/rss/item_guid_not_permalink_not_url.xml
feedparser/tests/wellformed/rss/item_image_link_bleed.xml
feedparser/tests/wellformed/rss/item_image_link_conflict.xml
feedparser/tests/wellformed/rss/item_link.xml
feedparser/tests/wellformed/rss/item_madeup_tags_element.xml
feedparser/tests/wellformed/rss/item_multiple_dc_creator.xml
feedparser/tests/wellformed/rss/item_pubDate.xml
feedparser/tests/wellformed/rss/item_pubDate_map_updated_parsed.xml
feedparser/tests/wellformed/rss/item_source.xml

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
Metadata-Version: 1.1
Name: html5lib
Version: 0.999
Summary: HTML parser based on the WHATWG HTML specifcation
Version: 0.9999999
Summary: HTML parser based on the WHATWG HTML specification
Home-page: https://github.com/html5lib/html5lib-python
Author: James Graham
Author-email: james@hoppipolla.co.uk
@ -167,6 +167,61 @@ Description: html5lib
Change Log
----------
0.9999999/1.0b8
~~~~~~~~~~~~~~~
Released on September 10, 2015
* Fix #195: fix the sanitizer to drop broken URLs (it threw an
exception between 0.9999 and 0.999999).
0.999999/1.0b7
~~~~~~~~~~~~~~
Released on July 7, 2015
* Fix #189: fix the sanitizer to allow relative URLs again (as it did
prior to 0.9999/1.0b5).
0.99999/1.0b6
~~~~~~~~~~~~~
Released on April 30, 2015
* Fix #188: fix the sanitizer to not throw an exception when sanitizing
bogus data URLs.
0.9999/1.0b5
~~~~~~~~~~~~
Released on April 29, 2015
* Fix #153: Sanitizer fails to treat some attributes as URLs. Despite how
this sounds, this has no known security implications. No known version
of IE (5.5 to current), Firefox (3 to current), Safari (6 to current),
Chrome (1 to current), or Opera (12 to current) will run any script
provided in these attributes.
* Pass error message to the ParseError exception in strict parsing mode.
* Allow data URIs in the sanitizer, with a whitelist of content-types.
* Add support for Python implementations that don't support lone
surrogates (read: Jython). Fixes #2.
* Remove localization of error messages. This functionality was totally
unused (and untested that everything was localizable), so we may as
well follow numerous browsers in not supporting translating technical
strings.
* Expose treewalkers.pprint as a public API.
* Add a documentEncoding property to HTML5Parser, fix #121.
0.999
~~~~~
@ -340,5 +395,6 @@ Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.2
Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Classifier: Topic :: Text Processing :: Markup :: HTML

View file

@ -1,78 +1,78 @@
../html5lib/utils.py
../html5lib/ihatexml.py
../html5lib/__init__.py
../html5lib/tokenizer.py
../html5lib/html5parser.py
../html5lib/sanitizer.py
../html5lib/inputstream.py
../html5lib/constants.py
../html5lib/serializer/__init__.py
../html5lib/serializer/htmlserializer.py
../html5lib/treebuilders/_base.py
../html5lib/treebuilders/__init__.py
../html5lib/treebuilders/etree_lxml.py
../html5lib/treebuilders/dom.py
../html5lib/treebuilders/etree.py
../html5lib/filters/whitespace.py
../html5lib/filters/_base.py
../html5lib/html5parser.py
../html5lib/ihatexml.py
../html5lib/inputstream.py
../html5lib/sanitizer.py
../html5lib/tokenizer.py
../html5lib/utils.py
../html5lib/filters/__init__.py
../html5lib/filters/sanitizer.py
../html5lib/filters/_base.py
../html5lib/filters/alphabeticalattributes.py
../html5lib/filters/inject_meta_charset.py
../html5lib/filters/lint.py
../html5lib/filters/optionaltags.py
../html5lib/filters/inject_meta_charset.py
../html5lib/filters/alphabeticalattributes.py
../html5lib/treewalkers/pulldom.py
../html5lib/treewalkers/_base.py
../html5lib/treewalkers/genshistream.py
../html5lib/filters/sanitizer.py
../html5lib/filters/whitespace.py
../html5lib/serializer/__init__.py
../html5lib/serializer/htmlserializer.py
../html5lib/treeadapters/__init__.py
../html5lib/treeadapters/sax.py
../html5lib/treebuilders/__init__.py
../html5lib/treebuilders/_base.py
../html5lib/treebuilders/dom.py
../html5lib/treebuilders/etree.py
../html5lib/treebuilders/etree_lxml.py
../html5lib/treewalkers/__init__.py
../html5lib/treewalkers/_base.py
../html5lib/treewalkers/dom.py
../html5lib/treewalkers/etree.py
../html5lib/treewalkers/genshistream.py
../html5lib/treewalkers/lxmletree.py
../html5lib/trie/datrie.py
../html5lib/trie/_base.py
../html5lib/treewalkers/pulldom.py
../html5lib/trie/__init__.py
../html5lib/trie/_base.py
../html5lib/trie/datrie.py
../html5lib/trie/py.py
../html5lib/treeadapters/sax.py
../html5lib/treeadapters/__init__.py
../html5lib/__pycache__/utils.cpython-34.pyc
../html5lib/__pycache__/ihatexml.cpython-34.pyc
../html5lib/__pycache__/__init__.cpython-34.pyc
../html5lib/__pycache__/tokenizer.cpython-34.pyc
../html5lib/__pycache__/html5parser.cpython-34.pyc
../html5lib/__pycache__/sanitizer.cpython-34.pyc
../html5lib/__pycache__/inputstream.cpython-34.pyc
../html5lib/__pycache__/constants.cpython-34.pyc
../html5lib/serializer/__pycache__/__init__.cpython-34.pyc
../html5lib/serializer/__pycache__/htmlserializer.cpython-34.pyc
../html5lib/treebuilders/__pycache__/_base.cpython-34.pyc
../html5lib/treebuilders/__pycache__/__init__.cpython-34.pyc
../html5lib/treebuilders/__pycache__/etree_lxml.cpython-34.pyc
../html5lib/treebuilders/__pycache__/dom.cpython-34.pyc
../html5lib/treebuilders/__pycache__/etree.cpython-34.pyc
../html5lib/filters/__pycache__/whitespace.cpython-34.pyc
../html5lib/filters/__pycache__/_base.cpython-34.pyc
../html5lib/__pycache__/html5parser.cpython-34.pyc
../html5lib/__pycache__/ihatexml.cpython-34.pyc
../html5lib/__pycache__/inputstream.cpython-34.pyc
../html5lib/__pycache__/sanitizer.cpython-34.pyc
../html5lib/__pycache__/tokenizer.cpython-34.pyc
../html5lib/__pycache__/utils.cpython-34.pyc
../html5lib/filters/__pycache__/__init__.cpython-34.pyc
../html5lib/filters/__pycache__/sanitizer.cpython-34.pyc
../html5lib/filters/__pycache__/_base.cpython-34.pyc
../html5lib/filters/__pycache__/alphabeticalattributes.cpython-34.pyc
../html5lib/filters/__pycache__/inject_meta_charset.cpython-34.pyc
../html5lib/filters/__pycache__/lint.cpython-34.pyc
../html5lib/filters/__pycache__/optionaltags.cpython-34.pyc
../html5lib/filters/__pycache__/inject_meta_charset.cpython-34.pyc
../html5lib/filters/__pycache__/alphabeticalattributes.cpython-34.pyc
../html5lib/treewalkers/__pycache__/pulldom.cpython-34.pyc
../html5lib/treewalkers/__pycache__/_base.cpython-34.pyc
../html5lib/treewalkers/__pycache__/genshistream.cpython-34.pyc
../html5lib/filters/__pycache__/sanitizer.cpython-34.pyc
../html5lib/filters/__pycache__/whitespace.cpython-34.pyc
../html5lib/serializer/__pycache__/__init__.cpython-34.pyc
../html5lib/serializer/__pycache__/htmlserializer.cpython-34.pyc
../html5lib/treeadapters/__pycache__/__init__.cpython-34.pyc
../html5lib/treeadapters/__pycache__/sax.cpython-34.pyc
../html5lib/treebuilders/__pycache__/__init__.cpython-34.pyc
../html5lib/treebuilders/__pycache__/_base.cpython-34.pyc
../html5lib/treebuilders/__pycache__/dom.cpython-34.pyc
../html5lib/treebuilders/__pycache__/etree.cpython-34.pyc
../html5lib/treebuilders/__pycache__/etree_lxml.cpython-34.pyc
../html5lib/treewalkers/__pycache__/__init__.cpython-34.pyc
../html5lib/treewalkers/__pycache__/_base.cpython-34.pyc
../html5lib/treewalkers/__pycache__/dom.cpython-34.pyc
../html5lib/treewalkers/__pycache__/etree.cpython-34.pyc
../html5lib/treewalkers/__pycache__/genshistream.cpython-34.pyc
../html5lib/treewalkers/__pycache__/lxmletree.cpython-34.pyc
../html5lib/trie/__pycache__/datrie.cpython-34.pyc
../html5lib/trie/__pycache__/_base.cpython-34.pyc
../html5lib/treewalkers/__pycache__/pulldom.cpython-34.pyc
../html5lib/trie/__pycache__/__init__.cpython-34.pyc
../html5lib/trie/__pycache__/_base.cpython-34.pyc
../html5lib/trie/__pycache__/datrie.cpython-34.pyc
../html5lib/trie/__pycache__/py.cpython-34.pyc
../html5lib/treeadapters/__pycache__/sax.cpython-34.pyc
../html5lib/treeadapters/__pycache__/__init__.cpython-34.pyc
./
dependency_links.txt
PKG-INFO
SOURCES.txt
top_level.txt
PKG-INFO
requires.txt
dependency_links.txt
SOURCES.txt

View file

@ -20,4 +20,6 @@ from .serializer import serialize
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
"getTreeWalker", "serialize"]
__version__ = "0.999"
# this has to be at the top level, see how setup.py parses this
__version__ = "0.9999999"

View file

@ -1,292 +1,290 @@
from __future__ import absolute_import, division, unicode_literals
import string
import gettext
_ = gettext.gettext
EOF = None
E = {
"null-character":
_("Null character in input stream, replaced with U+FFFD."),
"Null character in input stream, replaced with U+FFFD.",
"invalid-codepoint":
_("Invalid codepoint in stream."),
"Invalid codepoint in stream.",
"incorrectly-placed-solidus":
_("Solidus (/) incorrectly placed in tag."),
"Solidus (/) incorrectly placed in tag.",
"incorrect-cr-newline-entity":
_("Incorrect CR newline entity, replaced with LF."),
"Incorrect CR newline entity, replaced with LF.",
"illegal-windows-1252-entity":
_("Entity used with illegal number (windows-1252 reference)."),
"Entity used with illegal number (windows-1252 reference).",
"cant-convert-numeric-entity":
_("Numeric entity couldn't be converted to character "
"(codepoint U+%(charAsInt)08x)."),
"Numeric entity couldn't be converted to character "
"(codepoint U+%(charAsInt)08x).",
"illegal-codepoint-for-numeric-entity":
_("Numeric entity represents an illegal codepoint: "
"U+%(charAsInt)08x."),
"Numeric entity represents an illegal codepoint: "
"U+%(charAsInt)08x.",
"numeric-entity-without-semicolon":
_("Numeric entity didn't end with ';'."),
"Numeric entity didn't end with ';'.",
"expected-numeric-entity-but-got-eof":
_("Numeric entity expected. Got end of file instead."),
"Numeric entity expected. Got end of file instead.",
"expected-numeric-entity":
_("Numeric entity expected but none found."),
"Numeric entity expected but none found.",
"named-entity-without-semicolon":
_("Named entity didn't end with ';'."),
"Named entity didn't end with ';'.",
"expected-named-entity":
_("Named entity expected. Got none."),
"Named entity expected. Got none.",
"attributes-in-end-tag":
_("End tag contains unexpected attributes."),
"End tag contains unexpected attributes.",
'self-closing-flag-on-end-tag':
_("End tag contains unexpected self-closing flag."),
"End tag contains unexpected self-closing flag.",
"expected-tag-name-but-got-right-bracket":
_("Expected tag name. Got '>' instead."),
"Expected tag name. Got '>' instead.",
"expected-tag-name-but-got-question-mark":
_("Expected tag name. Got '?' instead. (HTML doesn't "
"support processing instructions.)"),
"Expected tag name. Got '?' instead. (HTML doesn't "
"support processing instructions.)",
"expected-tag-name":
_("Expected tag name. Got something else instead"),
"Expected tag name. Got something else instead",
"expected-closing-tag-but-got-right-bracket":
_("Expected closing tag. Got '>' instead. Ignoring '</>'."),
"Expected closing tag. Got '>' instead. Ignoring '</>'.",
"expected-closing-tag-but-got-eof":
_("Expected closing tag. Unexpected end of file."),
"Expected closing tag. Unexpected end of file.",
"expected-closing-tag-but-got-char":
_("Expected closing tag. Unexpected character '%(data)s' found."),
"Expected closing tag. Unexpected character '%(data)s' found.",
"eof-in-tag-name":
_("Unexpected end of file in the tag name."),
"Unexpected end of file in the tag name.",
"expected-attribute-name-but-got-eof":
_("Unexpected end of file. Expected attribute name instead."),
"Unexpected end of file. Expected attribute name instead.",
"eof-in-attribute-name":
_("Unexpected end of file in attribute name."),
"Unexpected end of file in attribute name.",
"invalid-character-in-attribute-name":
_("Invalid character in attribute name"),
"Invalid character in attribute name",
"duplicate-attribute":
_("Dropped duplicate attribute on tag."),
"Dropped duplicate attribute on tag.",
"expected-end-of-tag-name-but-got-eof":
_("Unexpected end of file. Expected = or end of tag."),
"Unexpected end of file. Expected = or end of tag.",
"expected-attribute-value-but-got-eof":
_("Unexpected end of file. Expected attribute value."),
"Unexpected end of file. Expected attribute value.",
"expected-attribute-value-but-got-right-bracket":
_("Expected attribute value. Got '>' instead."),
"Expected attribute value. Got '>' instead.",
'equals-in-unquoted-attribute-value':
_("Unexpected = in unquoted attribute"),
"Unexpected = in unquoted attribute",
'unexpected-character-in-unquoted-attribute-value':
_("Unexpected character in unquoted attribute"),
"Unexpected character in unquoted attribute",
"invalid-character-after-attribute-name":
_("Unexpected character after attribute name."),
"Unexpected character after attribute name.",
"unexpected-character-after-attribute-value":
_("Unexpected character after attribute value."),
"Unexpected character after attribute value.",
"eof-in-attribute-value-double-quote":
_("Unexpected end of file in attribute value (\")."),
"Unexpected end of file in attribute value (\").",
"eof-in-attribute-value-single-quote":
_("Unexpected end of file in attribute value (')."),
"Unexpected end of file in attribute value (').",
"eof-in-attribute-value-no-quotes":
_("Unexpected end of file in attribute value."),
"Unexpected end of file in attribute value.",
"unexpected-EOF-after-solidus-in-tag":
_("Unexpected end of file in tag. Expected >"),
"Unexpected end of file in tag. Expected >",
"unexpected-character-after-solidus-in-tag":
_("Unexpected character after / in tag. Expected >"),
"Unexpected character after / in tag. Expected >",
"expected-dashes-or-doctype":
_("Expected '--' or 'DOCTYPE'. Not found."),
"Expected '--' or 'DOCTYPE'. Not found.",
"unexpected-bang-after-double-dash-in-comment":
_("Unexpected ! after -- in comment"),
"Unexpected ! after -- in comment",
"unexpected-space-after-double-dash-in-comment":
_("Unexpected space after -- in comment"),
"Unexpected space after -- in comment",
"incorrect-comment":
_("Incorrect comment."),
"Incorrect comment.",
"eof-in-comment":
_("Unexpected end of file in comment."),
"Unexpected end of file in comment.",
"eof-in-comment-end-dash":
_("Unexpected end of file in comment (-)"),
"Unexpected end of file in comment (-)",
"unexpected-dash-after-double-dash-in-comment":
_("Unexpected '-' after '--' found in comment."),
"Unexpected '-' after '--' found in comment.",
"eof-in-comment-double-dash":
_("Unexpected end of file in comment (--)."),
"Unexpected end of file in comment (--).",
"eof-in-comment-end-space-state":
_("Unexpected end of file in comment."),
"Unexpected end of file in comment.",
"eof-in-comment-end-bang-state":
_("Unexpected end of file in comment."),
"Unexpected end of file in comment.",
"unexpected-char-in-comment":
_("Unexpected character in comment found."),
"Unexpected character in comment found.",
"need-space-after-doctype":
_("No space after literal string 'DOCTYPE'."),
"No space after literal string 'DOCTYPE'.",
"expected-doctype-name-but-got-right-bracket":
_("Unexpected > character. Expected DOCTYPE name."),
"Unexpected > character. Expected DOCTYPE name.",
"expected-doctype-name-but-got-eof":
_("Unexpected end of file. Expected DOCTYPE name."),
"Unexpected end of file. Expected DOCTYPE name.",
"eof-in-doctype-name":
_("Unexpected end of file in DOCTYPE name."),
"Unexpected end of file in DOCTYPE name.",
"eof-in-doctype":
_("Unexpected end of file in DOCTYPE."),
"Unexpected end of file in DOCTYPE.",
"expected-space-or-right-bracket-in-doctype":
_("Expected space or '>'. Got '%(data)s'"),
"Expected space or '>'. Got '%(data)s'",
"unexpected-end-of-doctype":
_("Unexpected end of DOCTYPE."),
"Unexpected end of DOCTYPE.",
"unexpected-char-in-doctype":
_("Unexpected character in DOCTYPE."),
"Unexpected character in DOCTYPE.",
"eof-in-innerhtml":
_("XXX innerHTML EOF"),
"XXX innerHTML EOF",
"unexpected-doctype":
_("Unexpected DOCTYPE. Ignored."),
"Unexpected DOCTYPE. Ignored.",
"non-html-root":
_("html needs to be the first start tag."),
"html needs to be the first start tag.",
"expected-doctype-but-got-eof":
_("Unexpected End of file. Expected DOCTYPE."),
"Unexpected End of file. Expected DOCTYPE.",
"unknown-doctype":
_("Erroneous DOCTYPE."),
"Erroneous DOCTYPE.",
"expected-doctype-but-got-chars":
_("Unexpected non-space characters. Expected DOCTYPE."),
"Unexpected non-space characters. Expected DOCTYPE.",
"expected-doctype-but-got-start-tag":
_("Unexpected start tag (%(name)s). Expected DOCTYPE."),
"Unexpected start tag (%(name)s). Expected DOCTYPE.",
"expected-doctype-but-got-end-tag":
_("Unexpected end tag (%(name)s). Expected DOCTYPE."),
"Unexpected end tag (%(name)s). Expected DOCTYPE.",
"end-tag-after-implied-root":
_("Unexpected end tag (%(name)s) after the (implied) root element."),
"Unexpected end tag (%(name)s) after the (implied) root element.",
"expected-named-closing-tag-but-got-eof":
_("Unexpected end of file. Expected end tag (%(name)s)."),
"Unexpected end of file. Expected end tag (%(name)s).",
"two-heads-are-not-better-than-one":
_("Unexpected start tag head in existing head. Ignored."),
"Unexpected start tag head in existing head. Ignored.",
"unexpected-end-tag":
_("Unexpected end tag (%(name)s). Ignored."),
"Unexpected end tag (%(name)s). Ignored.",
"unexpected-start-tag-out-of-my-head":
_("Unexpected start tag (%(name)s) that can be in head. Moved."),
"Unexpected start tag (%(name)s) that can be in head. Moved.",
"unexpected-start-tag":
_("Unexpected start tag (%(name)s)."),
"Unexpected start tag (%(name)s).",
"missing-end-tag":
_("Missing end tag (%(name)s)."),
"Missing end tag (%(name)s).",
"missing-end-tags":
_("Missing end tags (%(name)s)."),
"Missing end tags (%(name)s).",
"unexpected-start-tag-implies-end-tag":
_("Unexpected start tag (%(startName)s) "
"implies end tag (%(endName)s)."),
"Unexpected start tag (%(startName)s) "
"implies end tag (%(endName)s).",
"unexpected-start-tag-treated-as":
_("Unexpected start tag (%(originalName)s). Treated as %(newName)s."),
"Unexpected start tag (%(originalName)s). Treated as %(newName)s.",
"deprecated-tag":
_("Unexpected start tag %(name)s. Don't use it!"),
"Unexpected start tag %(name)s. Don't use it!",
"unexpected-start-tag-ignored":
_("Unexpected start tag %(name)s. Ignored."),
"Unexpected start tag %(name)s. Ignored.",
"expected-one-end-tag-but-got-another":
_("Unexpected end tag (%(gotName)s). "
"Missing end tag (%(expectedName)s)."),
"Unexpected end tag (%(gotName)s). "
"Missing end tag (%(expectedName)s).",
"end-tag-too-early":
_("End tag (%(name)s) seen too early. Expected other end tag."),
"End tag (%(name)s) seen too early. Expected other end tag.",
"end-tag-too-early-named":
_("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."),
"Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).",
"end-tag-too-early-ignored":
_("End tag (%(name)s) seen too early. Ignored."),
"End tag (%(name)s) seen too early. Ignored.",
"adoption-agency-1.1":
_("End tag (%(name)s) violates step 1, "
"paragraph 1 of the adoption agency algorithm."),
"End tag (%(name)s) violates step 1, "
"paragraph 1 of the adoption agency algorithm.",
"adoption-agency-1.2":
_("End tag (%(name)s) violates step 1, "
"paragraph 2 of the adoption agency algorithm."),
"End tag (%(name)s) violates step 1, "
"paragraph 2 of the adoption agency algorithm.",
"adoption-agency-1.3":
_("End tag (%(name)s) violates step 1, "
"paragraph 3 of the adoption agency algorithm."),
"End tag (%(name)s) violates step 1, "
"paragraph 3 of the adoption agency algorithm.",
"adoption-agency-4.4":
_("End tag (%(name)s) violates step 4, "
"paragraph 4 of the adoption agency algorithm."),
"End tag (%(name)s) violates step 4, "
"paragraph 4 of the adoption agency algorithm.",
"unexpected-end-tag-treated-as":
_("Unexpected end tag (%(originalName)s). Treated as %(newName)s."),
"Unexpected end tag (%(originalName)s). Treated as %(newName)s.",
"no-end-tag":
_("This element (%(name)s) has no end tag."),
"This element (%(name)s) has no end tag.",
"unexpected-implied-end-tag-in-table":
_("Unexpected implied end tag (%(name)s) in the table phase."),
"Unexpected implied end tag (%(name)s) in the table phase.",
"unexpected-implied-end-tag-in-table-body":
_("Unexpected implied end tag (%(name)s) in the table body phase."),
"Unexpected implied end tag (%(name)s) in the table body phase.",
"unexpected-char-implies-table-voodoo":
_("Unexpected non-space characters in "
"table context caused voodoo mode."),
"Unexpected non-space characters in "
"table context caused voodoo mode.",
"unexpected-hidden-input-in-table":
_("Unexpected input with type hidden in table context."),
"Unexpected input with type hidden in table context.",
"unexpected-form-in-table":
_("Unexpected form in table context."),
"Unexpected form in table context.",
"unexpected-start-tag-implies-table-voodoo":
_("Unexpected start tag (%(name)s) in "
"table context caused voodoo mode."),
"Unexpected start tag (%(name)s) in "
"table context caused voodoo mode.",
"unexpected-end-tag-implies-table-voodoo":
_("Unexpected end tag (%(name)s) in "
"table context caused voodoo mode."),
"Unexpected end tag (%(name)s) in "
"table context caused voodoo mode.",
"unexpected-cell-in-table-body":
_("Unexpected table cell start tag (%(name)s) "
"in the table body phase."),
"Unexpected table cell start tag (%(name)s) "
"in the table body phase.",
"unexpected-cell-end-tag":
_("Got table cell end tag (%(name)s) "
"while required end tags are missing."),
"Got table cell end tag (%(name)s) "
"while required end tags are missing.",
"unexpected-end-tag-in-table-body":
_("Unexpected end tag (%(name)s) in the table body phase. Ignored."),
"Unexpected end tag (%(name)s) in the table body phase. Ignored.",
"unexpected-implied-end-tag-in-table-row":
_("Unexpected implied end tag (%(name)s) in the table row phase."),
"Unexpected implied end tag (%(name)s) in the table row phase.",
"unexpected-end-tag-in-table-row":
_("Unexpected end tag (%(name)s) in the table row phase. Ignored."),
"Unexpected end tag (%(name)s) in the table row phase. Ignored.",
"unexpected-select-in-select":
_("Unexpected select start tag in the select phase "
"treated as select end tag."),
"Unexpected select start tag in the select phase "
"treated as select end tag.",
"unexpected-input-in-select":
_("Unexpected input start tag in the select phase."),
"Unexpected input start tag in the select phase.",
"unexpected-start-tag-in-select":
_("Unexpected start tag token (%(name)s in the select phase. "
"Ignored."),
"Unexpected start tag token (%(name)s in the select phase. "
"Ignored.",
"unexpected-end-tag-in-select":
_("Unexpected end tag (%(name)s) in the select phase. Ignored."),
"Unexpected end tag (%(name)s) in the select phase. Ignored.",
"unexpected-table-element-start-tag-in-select-in-table":
_("Unexpected table element start tag (%(name)s) in the select in table phase."),
"Unexpected table element start tag (%(name)s) in the select in table phase.",
"unexpected-table-element-end-tag-in-select-in-table":
_("Unexpected table element end tag (%(name)s) in the select in table phase."),
"Unexpected table element end tag (%(name)s) in the select in table phase.",
"unexpected-char-after-body":
_("Unexpected non-space characters in the after body phase."),
"Unexpected non-space characters in the after body phase.",
"unexpected-start-tag-after-body":
_("Unexpected start tag token (%(name)s)"
" in the after body phase."),
"Unexpected start tag token (%(name)s)"
" in the after body phase.",
"unexpected-end-tag-after-body":
_("Unexpected end tag token (%(name)s)"
" in the after body phase."),
"Unexpected end tag token (%(name)s)"
" in the after body phase.",
"unexpected-char-in-frameset":
_("Unexpected characters in the frameset phase. Characters ignored."),
"Unexpected characters in the frameset phase. Characters ignored.",
"unexpected-start-tag-in-frameset":
_("Unexpected start tag token (%(name)s)"
" in the frameset phase. Ignored."),
"Unexpected start tag token (%(name)s)"
" in the frameset phase. Ignored.",
"unexpected-frameset-in-frameset-innerhtml":
_("Unexpected end tag token (frameset) "
"in the frameset phase (innerHTML)."),
"Unexpected end tag token (frameset) "
"in the frameset phase (innerHTML).",
"unexpected-end-tag-in-frameset":
_("Unexpected end tag token (%(name)s)"
" in the frameset phase. Ignored."),
"Unexpected end tag token (%(name)s)"
" in the frameset phase. Ignored.",
"unexpected-char-after-frameset":
_("Unexpected non-space characters in the "
"after frameset phase. Ignored."),
"Unexpected non-space characters in the "
"after frameset phase. Ignored.",
"unexpected-start-tag-after-frameset":
_("Unexpected start tag (%(name)s)"
" in the after frameset phase. Ignored."),
"Unexpected start tag (%(name)s)"
" in the after frameset phase. Ignored.",
"unexpected-end-tag-after-frameset":
_("Unexpected end tag (%(name)s)"
" in the after frameset phase. Ignored."),
"Unexpected end tag (%(name)s)"
" in the after frameset phase. Ignored.",
"unexpected-end-tag-after-body-innerhtml":
_("Unexpected end tag after body(innerHtml)"),
"Unexpected end tag after body(innerHtml)",
"expected-eof-but-got-char":
_("Unexpected non-space characters. Expected end of file."),
"Unexpected non-space characters. Expected end of file.",
"expected-eof-but-got-start-tag":
_("Unexpected start tag (%(name)s)"
". Expected end of file."),
"Unexpected start tag (%(name)s)"
". Expected end of file.",
"expected-eof-but-got-end-tag":
_("Unexpected end tag (%(name)s)"
". Expected end of file."),
"Unexpected end tag (%(name)s)"
". Expected end of file.",
"eof-in-table":
_("Unexpected end of file. Expected table content."),
"Unexpected end of file. Expected table content.",
"eof-in-select":
_("Unexpected end of file. Expected select content."),
"Unexpected end of file. Expected select content.",
"eof-in-frameset":
_("Unexpected end of file. Expected frameset content."),
"Unexpected end of file. Expected frameset content.",
"eof-in-script-in-script":
_("Unexpected end of file. Expected script content."),
"Unexpected end of file. Expected script content.",
"eof-in-foreign-lands":
_("Unexpected end of file. Expected foreign content"),
"Unexpected end of file. Expected foreign content",
"non-void-element-with-trailing-solidus":
_("Trailing solidus not allowed on element %(name)s"),
"Trailing solidus not allowed on element %(name)s",
"unexpected-html-element-in-foreign-content":
_("Element %(name)s not allowed in a non-html context"),
"Element %(name)s not allowed in a non-html context",
"unexpected-end-tag-before-html":
_("Unexpected end tag (%(name)s) before html."),
"Unexpected end tag (%(name)s) before html.",
"XXX-undefined-error":
_("Undefined error (this sucks and should be fixed)"),
"Undefined error (this sucks and should be fixed)",
}
namespaces = {
@ -298,7 +296,7 @@ namespaces = {
"xmlns": "http://www.w3.org/2000/xmlns/"
}
scopingElements = frozenset((
scopingElements = frozenset([
(namespaces["html"], "applet"),
(namespaces["html"], "caption"),
(namespaces["html"], "html"),
@ -316,9 +314,9 @@ scopingElements = frozenset((
(namespaces["svg"], "foreignObject"),
(namespaces["svg"], "desc"),
(namespaces["svg"], "title"),
))
])
formattingElements = frozenset((
formattingElements = frozenset([
(namespaces["html"], "a"),
(namespaces["html"], "b"),
(namespaces["html"], "big"),
@ -333,9 +331,9 @@ formattingElements = frozenset((
(namespaces["html"], "strong"),
(namespaces["html"], "tt"),
(namespaces["html"], "u")
))
])
specialElements = frozenset((
specialElements = frozenset([
(namespaces["html"], "address"),
(namespaces["html"], "applet"),
(namespaces["html"], "area"),
@ -416,22 +414,22 @@ specialElements = frozenset((
(namespaces["html"], "wbr"),
(namespaces["html"], "xmp"),
(namespaces["svg"], "foreignObject")
))
])
htmlIntegrationPointElements = frozenset((
htmlIntegrationPointElements = frozenset([
(namespaces["mathml"], "annotaion-xml"),
(namespaces["svg"], "foreignObject"),
(namespaces["svg"], "desc"),
(namespaces["svg"], "title")
))
])
mathmlTextIntegrationPointElements = frozenset((
mathmlTextIntegrationPointElements = frozenset([
(namespaces["mathml"], "mi"),
(namespaces["mathml"], "mo"),
(namespaces["mathml"], "mn"),
(namespaces["mathml"], "ms"),
(namespaces["mathml"], "mtext")
))
])
adjustForeignAttributes = {
"xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
@ -451,21 +449,21 @@ adjustForeignAttributes = {
unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
adjustForeignAttributes.items()])
spaceCharacters = frozenset((
spaceCharacters = frozenset([
"\t",
"\n",
"\u000C",
" ",
"\r"
))
])
tableInsertModeElements = frozenset((
tableInsertModeElements = frozenset([
"table",
"tbody",
"tfoot",
"thead",
"tr"
))
])
asciiLowercase = frozenset(string.ascii_lowercase)
asciiUppercase = frozenset(string.ascii_uppercase)
@ -486,7 +484,7 @@ headingElements = (
"h6"
)
voidElements = frozenset((
voidElements = frozenset([
"base",
"command",
"event-source",
@ -502,11 +500,11 @@ voidElements = frozenset((
"input",
"source",
"track"
))
])
cdataElements = frozenset(('title', 'textarea'))
cdataElements = frozenset(['title', 'textarea'])
rcdataElements = frozenset((
rcdataElements = frozenset([
'style',
'script',
'xmp',
@ -514,27 +512,27 @@ rcdataElements = frozenset((
'noembed',
'noframes',
'noscript'
))
])
booleanAttributes = {
"": frozenset(("irrelevant",)),
"style": frozenset(("scoped",)),
"img": frozenset(("ismap",)),
"audio": frozenset(("autoplay", "controls")),
"video": frozenset(("autoplay", "controls")),
"script": frozenset(("defer", "async")),
"details": frozenset(("open",)),
"datagrid": frozenset(("multiple", "disabled")),
"command": frozenset(("hidden", "disabled", "checked", "default")),
"hr": frozenset(("noshade")),
"menu": frozenset(("autosubmit",)),
"fieldset": frozenset(("disabled", "readonly")),
"option": frozenset(("disabled", "readonly", "selected")),
"optgroup": frozenset(("disabled", "readonly")),
"button": frozenset(("disabled", "autofocus")),
"input": frozenset(("disabled", "readonly", "required", "autofocus", "checked", "ismap")),
"select": frozenset(("disabled", "readonly", "autofocus", "multiple")),
"output": frozenset(("disabled", "readonly")),
"": frozenset(["irrelevant"]),
"style": frozenset(["scoped"]),
"img": frozenset(["ismap"]),
"audio": frozenset(["autoplay", "controls"]),
"video": frozenset(["autoplay", "controls"]),
"script": frozenset(["defer", "async"]),
"details": frozenset(["open"]),
"datagrid": frozenset(["multiple", "disabled"]),
"command": frozenset(["hidden", "disabled", "checked", "default"]),
"hr": frozenset(["noshade"]),
"menu": frozenset(["autosubmit"]),
"fieldset": frozenset(["disabled", "readonly"]),
"option": frozenset(["disabled", "readonly", "selected"]),
"optgroup": frozenset(["disabled", "readonly"]),
"button": frozenset(["disabled", "autofocus"]),
"input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]),
"select": frozenset(["disabled", "readonly", "autofocus", "multiple"]),
"output": frozenset(["disabled", "readonly"]),
}
# entitiesWindows1252 has to be _ordered_ and needs to have an index. It
@ -574,7 +572,7 @@ entitiesWindows1252 = (
376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
)
xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;'))
xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;'])
entities = {
"AElig": "\xc6",
@ -3088,8 +3086,8 @@ tokenTypes = {
"ParseError": 7
}
tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],
tokenTypes["EmptyTag"]))
tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
tokenTypes["EmptyTag"]])
prefixes = dict([(v, k) for k, v in namespaces.items()])

View file

@ -1,8 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
from gettext import gettext
_ = gettext
from . import _base
from ..constants import cdataElements, rcdataElements, voidElements
@ -23,24 +20,24 @@ class Filter(_base.Filter):
if type in ("StartTag", "EmptyTag"):
name = token["name"]
if contentModelFlag != "PCDATA":
raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name})
raise LintError("StartTag not in PCDATA content model flag: %(tag)s" % {"tag": name})
if not isinstance(name, str):
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
if not name:
raise LintError(_("Empty tag name"))
raise LintError("Empty tag name")
if type == "StartTag" and name in voidElements:
raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name})
raise LintError("Void element reported as StartTag token: %(tag)s" % {"tag": name})
elif type == "EmptyTag" and name not in voidElements:
raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]})
raise LintError("Non-void element reported as EmptyTag token: %(tag)s" % {"tag": token["name"]})
if type == "StartTag":
open_elements.append(name)
for name, value in token["data"]:
if not isinstance(name, str):
raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name})
raise LintError("Attribute name is not a string: %(name)r" % {"name": name})
if not name:
raise LintError(_("Empty attribute name"))
raise LintError("Empty attribute name")
if not isinstance(value, str):
raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value})
raise LintError("Attribute value is not a string: %(value)r" % {"value": value})
if name in cdataElements:
contentModelFlag = "CDATA"
elif name in rcdataElements:
@ -51,43 +48,43 @@ class Filter(_base.Filter):
elif type == "EndTag":
name = token["name"]
if not isinstance(name, str):
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
if not name:
raise LintError(_("Empty tag name"))
raise LintError("Empty tag name")
if name in voidElements:
raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name})
raise LintError("Void element reported as EndTag token: %(tag)s" % {"tag": name})
start_name = open_elements.pop()
if start_name != name:
raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name})
raise LintError("EndTag (%(end)s) does not match StartTag (%(start)s)" % {"end": name, "start": start_name})
contentModelFlag = "PCDATA"
elif type == "Comment":
if contentModelFlag != "PCDATA":
raise LintError(_("Comment not in PCDATA content model flag"))
raise LintError("Comment not in PCDATA content model flag")
elif type in ("Characters", "SpaceCharacters"):
data = token["data"]
if not isinstance(data, str):
raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data})
raise LintError("Attribute name is not a string: %(name)r" % {"name": data})
if not data:
raise LintError(_("%(type)s token with empty data") % {"type": type})
raise LintError("%(type)s token with empty data" % {"type": type})
if type == "SpaceCharacters":
data = data.strip(spaceCharacters)
if data:
raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data})
raise LintError("Non-space character(s) found in SpaceCharacters token: %(token)r" % {"token": data})
elif type == "Doctype":
name = token["name"]
if contentModelFlag != "PCDATA":
raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name})
raise LintError("Doctype not in PCDATA content model flag: %(name)s" % {"name": name})
if not isinstance(name, str):
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
# XXX: what to do with token["data"] ?
elif type in ("ParseError", "SerializeError"):
pass
else:
raise LintError(_("Unknown token type: %(type)s") % {"type": type})
raise LintError("Unknown token type: %(type)s" % {"type": type})
yield token

View file

@ -18,6 +18,7 @@ from .constants import cdataElements, rcdataElements
from .constants import tokenTypes, ReparseException, namespaces
from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
from .constants import adjustForeignAttributes as adjustForeignAttributesMap
from .constants import E
def parse(doc, treebuilder="etree", encoding=None,
@ -129,6 +130,17 @@ class HTMLParser(object):
self.framesetOK = True
@property
def documentEncoding(self):
"""The name of the character encoding
that was used to decode the input stream,
or :obj:`None` if that is not determined yet.
"""
if not hasattr(self, 'tokenizer'):
return None
return self.tokenizer.stream.charEncoding[0]
def isHTMLIntegrationPoint(self, element):
if (element.name == "annotation-xml" and
element.namespace == namespaces["mathml"]):
@ -245,7 +257,7 @@ class HTMLParser(object):
# XXX The idea is to make errorcode mandatory.
self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))
if self.strict:
raise ParseError
raise ParseError(E[errorcode] % datavars)
def normalizeToken(self, token):
""" HTML5 specific normalizations to the token stream """
@ -868,7 +880,7 @@ def getPhases(debug):
self.startTagHandler = utils.MethodDispatcher([
("html", self.startTagHtml),
(("base", "basefont", "bgsound", "command", "link", "meta",
"noframes", "script", "style", "title"),
"script", "style", "title"),
self.startTagProcessInHead),
("body", self.startTagBody),
("frameset", self.startTagFrameset),
@ -1205,8 +1217,7 @@ def getPhases(debug):
attributes["name"] = "isindex"
self.processStartTag(impliedTagToken("input", "StartTag",
attributes=attributes,
selfClosing=
token["selfClosing"]))
selfClosing=token["selfClosing"]))
self.processEndTag(impliedTagToken("label"))
self.processStartTag(impliedTagToken("hr", "StartTag"))
self.processEndTag(impliedTagToken("form"))

View file

@ -28,7 +28,18 @@ asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
invalid_unicode_re = re.compile("[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]")
invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]"
if utils.supports_lone_surrogates:
# Use one extra step of indirection and create surrogates with
# unichr. Not using this indirection would introduce an illegal
# unicode literal on platforms not supporting such lone
# surrogates.
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate +
eval('"\\uD800-\\uDFFF"'))
else:
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
@ -164,13 +175,18 @@ class HTMLUnicodeInputStream(object):
"""
# Craziness
if len("\U0010FFFF") == 1:
if not utils.supports_lone_surrogates:
# Such platforms will have already checked for such
# surrogate errors, so no need to do this checking.
self.reportCharacterErrors = None
self.replaceCharactersRegexp = None
elif len("\U0010FFFF") == 1:
self.reportCharacterErrors = self.characterErrorsUCS4
self.replaceCharactersRegexp = re.compile("[\uD800-\uDFFF]")
self.replaceCharactersRegexp = re.compile(eval('"[\\uD800-\\uDFFF]"'))
else:
self.reportCharacterErrors = self.characterErrorsUCS2
self.replaceCharactersRegexp = re.compile("([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])")
self.replaceCharactersRegexp = re.compile(
eval('"([\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?<![\\uD800-\\uDBFF])[\\uDC00-\\uDFFF])"'))
# List of where new lines occur
self.newLines = [0]
@ -265,11 +281,12 @@ class HTMLUnicodeInputStream(object):
self._bufferedCharacter = data[-1]
data = data[:-1]
self.reportCharacterErrors(data)
if self.reportCharacterErrors:
self.reportCharacterErrors(data)
# Replace invalid characters
# Note U+0000 is dealt with in the tokenizer
data = self.replaceCharactersRegexp.sub("\ufffd", data)
# Replace invalid characters
# Note U+0000 is dealt with in the tokenizer
data = self.replaceCharactersRegexp.sub("\ufffd", data)
data = data.replace("\r\n", "\n")
data = data.replace("\r", "\n")

View file

@ -2,11 +2,26 @@ from __future__ import absolute_import, division, unicode_literals
import re
from xml.sax.saxutils import escape, unescape
from six.moves import urllib_parse as urlparse
from .tokenizer import HTMLTokenizer
from .constants import tokenTypes
content_type_rgx = re.compile(r'''
^
# Match a content type <application>/<type>
(?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
# Match any character set and encoding
(?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
|(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
# Assume the rest is data
,.*
$
''',
re.VERBOSE)
class HTMLSanitizerMixin(object):
""" sanitization of XHTML+MathML+SVG and of inline style attributes."""
@ -100,8 +115,8 @@ class HTMLSanitizerMixin(object):
'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
'y1', 'y2', 'zoomAndPan']
attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster',
'xlink:href', 'xml:base']
attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster', 'background', 'datasrc',
'dynsrc', 'lowsrc', 'ping', 'poster', 'xlink:href', 'xml:base']
svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
@ -138,7 +153,9 @@ class HTMLSanitizerMixin(object):
acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
'ssh', 'sftp', 'rtsp', 'afs']
'ssh', 'sftp', 'rtsp', 'afs', 'data']
acceptable_content_types = ['image/png', 'image/jpeg', 'image/gif', 'image/webp', 'image/bmp', 'text/plain']
# subclasses may define their own versions of these constants
allowed_elements = acceptable_elements + mathml_elements + svg_elements
@ -147,6 +164,7 @@ class HTMLSanitizerMixin(object):
allowed_css_keywords = acceptable_css_keywords
allowed_svg_properties = acceptable_svg_properties
allowed_protocols = acceptable_protocols
allowed_content_types = acceptable_content_types
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
# stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
@ -189,10 +207,21 @@ class HTMLSanitizerMixin(object):
unescape(attrs[attr])).lower()
# remove replacement characters from unescaped characters
val_unescaped = val_unescaped.replace("\ufffd", "")
if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and
(val_unescaped.split(':')[0] not in
self.allowed_protocols)):
try:
uri = urlparse.urlparse(val_unescaped)
except ValueError:
uri = None
del attrs[attr]
if uri and uri.scheme:
if uri.scheme not in self.allowed_protocols:
del attrs[attr]
if uri.scheme == 'data':
m = content_type_rgx.match(uri.path)
if not m:
del attrs[attr]
elif m.group('content_type') not in self.allowed_content_types:
del attrs[attr]
for attr in self.svg_attr_val_allows_ref:
if attr in attrs:
attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
@ -245,7 +274,7 @@ class HTMLSanitizerMixin(object):
elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
'padding']:
for keyword in value.split():
if not keyword in self.acceptable_css_keywords and \
if keyword not in self.acceptable_css_keywords and \
not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
break
else:

View file

@ -1,9 +1,6 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type
import gettext
_ = gettext.gettext
try:
from functools import reduce
except ImportError:
@ -35,7 +32,7 @@ else:
v = utils.surrogatePairToCodepoint(v)
else:
v = ord(v)
if not v in encode_entity_map or k.islower():
if v not in encode_entity_map or k.islower():
# prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
encode_entity_map[v] = k
@ -208,7 +205,7 @@ class HTMLSerializer(object):
if token["systemId"]:
if token["systemId"].find('"') >= 0:
if token["systemId"].find("'") >= 0:
self.serializeError(_("System identifer contains both single and double quote characters"))
self.serializeError("System identifer contains both single and double quote characters")
quote_char = "'"
else:
quote_char = '"'
@ -220,7 +217,7 @@ class HTMLSerializer(object):
elif type in ("Characters", "SpaceCharacters"):
if type == "SpaceCharacters" or in_cdata:
if in_cdata and token["data"].find("</") >= 0:
self.serializeError(_("Unexpected </ in CDATA"))
self.serializeError("Unexpected </ in CDATA")
yield self.encode(token["data"])
else:
yield self.encode(escape(token["data"]))
@ -231,7 +228,7 @@ class HTMLSerializer(object):
if name in rcdataElements and not self.escape_rcdata:
in_cdata = True
elif in_cdata:
self.serializeError(_("Unexpected child element of a CDATA element"))
self.serializeError("Unexpected child element of a CDATA element")
for (attr_namespace, attr_name), attr_value in token["data"].items():
# TODO: Add namespace support here
k = attr_name
@ -279,20 +276,20 @@ class HTMLSerializer(object):
if name in rcdataElements:
in_cdata = False
elif in_cdata:
self.serializeError(_("Unexpected child element of a CDATA element"))
self.serializeError("Unexpected child element of a CDATA element")
yield self.encodeStrict("</%s>" % name)
elif type == "Comment":
data = token["data"]
if data.find("--") >= 0:
self.serializeError(_("Comment contains --"))
self.serializeError("Comment contains --")
yield self.encodeStrict("<!--%s-->" % token["data"])
elif type == "Entity":
name = token["name"]
key = name + ";"
if not key in entities:
self.serializeError(_("Entity %s not recognized" % name))
if key not in entities:
self.serializeError("Entity %s not recognized" % name)
if self.resolve_entities and key not in xmlEntities:
data = entities[key]
else:

View file

@ -158,7 +158,7 @@ def getDomBuilder(DomImplementation):
else:
# HACK: allow text nodes as children of the document node
if hasattr(self.dom, '_child_node_types'):
if not Node.TEXT_NODE in self.dom._child_node_types:
if Node.TEXT_NODE not in self.dom._child_node_types:
self.dom._child_node_types = list(self.dom._child_node_types)
self.dom._child_node_types.append(Node.TEXT_NODE)
self.dom.appendChild(self.dom.createTextNode(data))

View file

@ -10,8 +10,12 @@ returning an iterator generating tokens.
from __future__ import absolute_import, division, unicode_literals
__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree",
"pulldom"]
import sys
from .. import constants
from ..utils import default_etree
treeWalkerCache = {}
@ -55,3 +59,89 @@ def getTreeWalker(treeType, implementation=None, **kwargs):
# XXX: NEVER cache here, caching is done in the etree submodule
return etree.getETreeModule(implementation, **kwargs).TreeWalker
return treeWalkerCache.get(treeType)
def concatenateCharacterTokens(tokens):
pendingCharacters = []
for token in tokens:
type = token["type"]
if type in ("Characters", "SpaceCharacters"):
pendingCharacters.append(token["data"])
else:
if pendingCharacters:
yield {"type": "Characters", "data": "".join(pendingCharacters)}
pendingCharacters = []
yield token
if pendingCharacters:
yield {"type": "Characters", "data": "".join(pendingCharacters)}
def pprint(walker):
"""Pretty printer for tree walkers"""
output = []
indent = 0
for token in concatenateCharacterTokens(walker):
type = token["type"]
if type in ("StartTag", "EmptyTag"):
# tag name
if token["namespace"] and token["namespace"] != constants.namespaces["html"]:
if token["namespace"] in constants.prefixes:
ns = constants.prefixes[token["namespace"]]
else:
ns = token["namespace"]
name = "%s %s" % (ns, token["name"])
else:
name = token["name"]
output.append("%s<%s>" % (" " * indent, name))
indent += 2
# attributes (sorted for consistent ordering)
attrs = token["data"]
for (namespace, localname), value in sorted(attrs.items()):
if namespace:
if namespace in constants.prefixes:
ns = constants.prefixes[namespace]
else:
ns = namespace
name = "%s %s" % (ns, localname)
else:
name = localname
output.append("%s%s=\"%s\"" % (" " * indent, name, value))
# self-closing
if type == "EmptyTag":
indent -= 2
elif type == "EndTag":
indent -= 2
elif type == "Comment":
output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
elif type == "Doctype":
if token["name"]:
if token["publicId"]:
output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
(" " * indent,
token["name"],
token["publicId"],
token["systemId"] if token["systemId"] else ""))
elif token["systemId"]:
output.append("""%s<!DOCTYPE %s "" "%s">""" %
(" " * indent,
token["name"],
token["systemId"]))
else:
output.append("%s<!DOCTYPE %s>" % (" " * indent,
token["name"]))
else:
output.append("%s<!DOCTYPE >" % (" " * indent,))
elif type == "Characters":
output.append("%s\"%s\"" % (" " * indent, token["data"]))
elif type == "SpaceCharacters":
assert False, "concatenateCharacterTokens should have got rid of all Space tokens"
else:
raise ValueError("Unknown token type, %s" % type)
return "\n".join(output)

View file

@ -1,8 +1,8 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type, string_types
import gettext
_ = gettext.gettext
__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN",
"TreeWalker", "NonRecursiveTreeWalker"]
from xml.dom import Node
@ -58,7 +58,7 @@ class TreeWalker(object):
"namespace": to_text(namespace),
"data": attrs}
if hasChildren:
yield self.error(_("Void element has children"))
yield self.error("Void element has children")
def startTag(self, namespace, name, attrs):
assert namespace is None or isinstance(namespace, string_types), type(namespace)
@ -122,7 +122,7 @@ class TreeWalker(object):
return {"type": "Entity", "name": text_type(name)}
def unknown(self, nodeType):
return self.error(_("Unknown node type: ") + nodeType)
return self.error("Unknown node type: " + nodeType)
class NonRecursiveTreeWalker(TreeWalker):

View file

@ -2,9 +2,6 @@ from __future__ import absolute_import, division, unicode_literals
from xml.dom import Node
import gettext
_ = gettext.gettext
from . import _base

View file

@ -7,12 +7,10 @@ except ImportError:
from ordereddict import OrderedDict
except ImportError:
OrderedDict = dict
import gettext
_ = gettext.gettext
import re
from six import text_type
from six import string_types
from . import _base
from ..utils import moduleFactoryFactory
@ -60,7 +58,7 @@ def getETreeBuilder(ElementTreeImplementation):
return _base.COMMENT, node.text
else:
assert type(node.tag) == text_type, type(node.tag)
assert isinstance(node.tag, string_types), type(node.tag)
# This is assumed to be an ordinary element
match = tag_regexp.match(node.tag)
if match:

View file

@ -4,9 +4,6 @@ from six import text_type
from lxml import etree
from ..treebuilders.etree import tag_regexp
from gettext import gettext
_ = gettext
from . import _base
from .. import ihatexml
@ -130,7 +127,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
def getNodeDetails(self, node):
if isinstance(node, tuple): # Text node
node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
return _base.TEXT, ensure_str(getattr(node, key))
elif isinstance(node, Root):
@ -169,7 +166,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
attrs, len(node) > 0 or node.text)
def getFirstChild(self, node):
assert not isinstance(node, tuple), _("Text nodes have no children")
assert not isinstance(node, tuple), "Text nodes have no children"
assert len(node) or node.text, "Node has no children"
if node.text:
@ -180,7 +177,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
def getNextSibling(self, node):
if isinstance(node, tuple): # Text node
node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
if key == "text":
# XXX: we cannot use a "bool(node) and node[0] or None" construct here
# because node[0] might evaluate to False if it has no child element
@ -196,7 +193,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
def getParentNode(self, node):
if isinstance(node, tuple): # Text node
node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
if key == "text":
return node
# else: fallback to "normal" processing

View file

@ -2,6 +2,8 @@ from __future__ import absolute_import, division, unicode_literals
from types import ModuleType
from six import text_type
try:
import xml.etree.cElementTree as default_etree
except ImportError:
@ -9,7 +11,26 @@ except ImportError:
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
"surrogatePairToCodepoint", "moduleFactoryFactory"]
"surrogatePairToCodepoint", "moduleFactoryFactory",
"supports_lone_surrogates"]
# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
# caught by the below test. In general this would be any platform
# using UTF-16 as its encoding of unicode strings, such as
# Jython. This is because UTF-16 itself is based on the use of such
# surrogates, and there is no mechanism to further escape such
# escapes.
try:
_x = eval('"\\uD800"')
if not isinstance(_x, text_type):
# We need this with u"" because of http://bugs.jython.org/issue2039
_x = eval('u"\\uD800"')
assert isinstance(_x, text_type)
except:
supports_lone_surrogates = False
else:
supports_lone_surrogates = True
class MethodDispatcher(dict):

View file

@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: ox
Version: 2.1.unknown
Version: 2.3.x
Summary: python-ox - the web in a dict
Home-page: http://code.0x2620.org/python-ox
Author: 0x2620

View file

@ -42,7 +42,7 @@ ox/django/api/urls.py
ox/django/api/views.py
ox/torrent/__init__.py
ox/torrent/bencode.py
ox/torrent/btformats.py
ox/torrent/bencode3.py
ox/torrent/makemetafile.py
ox/web/__init__.py
ox/web/abebooks.py
@ -74,6 +74,7 @@ ox/web/piratecinema.py
ox/web/rottentomatoes.py
ox/web/siteparser.py
ox/web/spiegel.py
ox/web/startpage.py
ox/web/thepiratebay.py
ox/web/torrent.py
ox/web/tv.py

View file

@ -1,162 +1,164 @@
../ox/image.py
../ox/location.py
../ox/__init__.py
../ox/api.py
../ox/cache.py
../ox/net.py
../ox/utils.py
../ox/jsonc.py
../ox/normalize.py
../ox/file.py
../ox/fixunicode.py
../ox/form.py
../ox/format.py
../ox/__init__.py
../ox/movie.py
../ox/text.py
../ox/geo.py
../ox/api.py
../ox/fixunicode.py
../ox/oembed.py
../ox/html.py
../ox/file.py
../ox/srt.py
../ox/js.py
../ox/image.py
../ox/iso.py
../ox/django/http.py
../ox/django/utils.py
../ox/django/monitor.py
../ox/js.py
../ox/jsonc.py
../ox/location.py
../ox/movie.py
../ox/net.py
../ox/normalize.py
../ox/oembed.py
../ox/srt.py
../ox/text.py
../ox/utils.py
../ox/django/__init__.py
../ox/django/middleware.py
../ox/django/decorators.py
../ox/django/fields.py
../ox/django/shortcuts.py
../ox/django/views.py
../ox/django/http.py
../ox/django/middleware.py
../ox/django/monitor.py
../ox/django/query.py
../ox/django/shortcuts.py
../ox/django/utils.py
../ox/django/views.py
../ox/django/widgets.py
../ox/django/api/__init__.py
../ox/django/api/actions.py
../ox/django/api/urls.py
../ox/django/api/views.py
../ox/django/api/actions.py
../ox/torrent/__init__.py
../ox/torrent/makemetafile.py
../ox/torrent/bencode.py
../ox/torrent/btformats.py
../ox/web/oxdb.py
../ox/web/lyricsfly.py
../ox/web/spiegel.py
../ox/web/allmovie.py
../ox/web/twitter.py
../ox/web/siteparser.py
../ox/web/ubu.py
../ox/web/epguides.py
../ox/torrent/bencode3.py
../ox/torrent/makemetafile.py
../ox/web/__init__.py
../ox/web/archive.py
../ox/web/freebase.py
../ox/web/vimeo.py
../ox/web/thepiratebay.py
../ox/web/auth.py
../ox/web/duckduckgo.py
../ox/web/flixter.py
../ox/web/rottentomatoes.py
../ox/web/criterion.py
../ox/web/lookupbyisbn.py
../ox/web/wikipedia.py
../ox/web/abebooks.py
../ox/web/allmovie.py
../ox/web/amazon.py
../ox/web/impawards.py
../ox/web/tv.py
../ox/web/dailymotion.py
../ox/web/movieposterdb.py
../ox/web/filmsdivision.py
../ox/web/arsenalberlin.py
../ox/web/youtube.py
../ox/web/google.py
../ox/web/itunes.py
../ox/web/piratecinema.py
../ox/web/opensubtitles.py
../ox/web/mininova.py
../ox/web/imdb.py
../ox/web/apple.py
../ox/web/torrent.py
../ox/web/archive.py
../ox/web/arsenalberlin.py
../ox/web/auth.py
../ox/web/criterion.py
../ox/web/dailymotion.py
../ox/web/duckduckgo.py
../ox/web/epguides.py
../ox/web/filmsdivision.py
../ox/web/flixter.py
../ox/web/freebase.py
../ox/web/google.py
../ox/web/imdb.py
../ox/web/impawards.py
../ox/web/itunes.py
../ox/web/lookupbyisbn.py
../ox/web/lyricsfly.py
../ox/web/metacritic.py
../ox/__pycache__/image.cpython-34.pyc
../ox/__pycache__/location.cpython-34.pyc
../ox/web/mininova.py
../ox/web/movieposterdb.py
../ox/web/opensubtitles.py
../ox/web/oxdb.py
../ox/web/piratecinema.py
../ox/web/rottentomatoes.py
../ox/web/siteparser.py
../ox/web/spiegel.py
../ox/web/startpage.py
../ox/web/thepiratebay.py
../ox/web/torrent.py
../ox/web/tv.py
../ox/web/twitter.py
../ox/web/ubu.py
../ox/web/vimeo.py
../ox/web/wikipedia.py
../ox/web/youtube.py
../ox/__pycache__/__init__.cpython-34.pyc
../ox/__pycache__/api.cpython-34.pyc
../ox/__pycache__/cache.cpython-34.pyc
../ox/__pycache__/net.cpython-34.pyc
../ox/__pycache__/utils.cpython-34.pyc
../ox/__pycache__/jsonc.cpython-34.pyc
../ox/__pycache__/normalize.cpython-34.pyc
../ox/__pycache__/file.cpython-34.pyc
../ox/__pycache__/fixunicode.cpython-34.pyc
../ox/__pycache__/form.cpython-34.pyc
../ox/__pycache__/format.cpython-34.pyc
../ox/__pycache__/__init__.cpython-34.pyc
../ox/__pycache__/movie.cpython-34.pyc
../ox/__pycache__/text.cpython-34.pyc
../ox/__pycache__/geo.cpython-34.pyc
../ox/__pycache__/api.cpython-34.pyc
../ox/__pycache__/fixunicode.cpython-34.pyc
../ox/__pycache__/oembed.cpython-34.pyc
../ox/__pycache__/html.cpython-34.pyc
../ox/__pycache__/file.cpython-34.pyc
../ox/__pycache__/srt.cpython-34.pyc
../ox/__pycache__/js.cpython-34.pyc
../ox/__pycache__/image.cpython-34.pyc
../ox/__pycache__/iso.cpython-34.pyc
../ox/django/__pycache__/http.cpython-34.pyc
../ox/django/__pycache__/utils.cpython-34.pyc
../ox/django/__pycache__/monitor.cpython-34.pyc
../ox/__pycache__/js.cpython-34.pyc
../ox/__pycache__/jsonc.cpython-34.pyc
../ox/__pycache__/location.cpython-34.pyc
../ox/__pycache__/movie.cpython-34.pyc
../ox/__pycache__/net.cpython-34.pyc
../ox/__pycache__/normalize.cpython-34.pyc
../ox/__pycache__/oembed.cpython-34.pyc
../ox/__pycache__/srt.cpython-34.pyc
../ox/__pycache__/text.cpython-34.pyc
../ox/__pycache__/utils.cpython-34.pyc
../ox/django/__pycache__/__init__.cpython-34.pyc
../ox/django/__pycache__/middleware.cpython-34.pyc
../ox/django/__pycache__/decorators.cpython-34.pyc
../ox/django/__pycache__/fields.cpython-34.pyc
../ox/django/__pycache__/shortcuts.cpython-34.pyc
../ox/django/__pycache__/views.cpython-34.pyc
../ox/django/__pycache__/http.cpython-34.pyc
../ox/django/__pycache__/middleware.cpython-34.pyc
../ox/django/__pycache__/monitor.cpython-34.pyc
../ox/django/__pycache__/query.cpython-34.pyc
../ox/django/__pycache__/shortcuts.cpython-34.pyc
../ox/django/__pycache__/utils.cpython-34.pyc
../ox/django/__pycache__/views.cpython-34.pyc
../ox/django/__pycache__/widgets.cpython-34.pyc
../ox/django/api/__pycache__/__init__.cpython-34.pyc
../ox/django/api/__pycache__/actions.cpython-34.pyc
../ox/django/api/__pycache__/urls.cpython-34.pyc
../ox/django/api/__pycache__/views.cpython-34.pyc
../ox/django/api/__pycache__/actions.cpython-34.pyc
../ox/torrent/__pycache__/__init__.cpython-34.pyc
../ox/torrent/__pycache__/makemetafile.cpython-34.pyc
../ox/torrent/__pycache__/bencode.cpython-34.pyc
../ox/torrent/__pycache__/btformats.cpython-34.pyc
../ox/web/__pycache__/oxdb.cpython-34.pyc
../ox/web/__pycache__/lyricsfly.cpython-34.pyc
../ox/web/__pycache__/spiegel.cpython-34.pyc
../ox/web/__pycache__/allmovie.cpython-34.pyc
../ox/web/__pycache__/twitter.cpython-34.pyc
../ox/web/__pycache__/siteparser.cpython-34.pyc
../ox/web/__pycache__/ubu.cpython-34.pyc
../ox/web/__pycache__/epguides.cpython-34.pyc
../ox/torrent/__pycache__/bencode3.cpython-34.pyc
../ox/torrent/__pycache__/makemetafile.cpython-34.pyc
../ox/web/__pycache__/__init__.cpython-34.pyc
../ox/web/__pycache__/archive.cpython-34.pyc
../ox/web/__pycache__/freebase.cpython-34.pyc
../ox/web/__pycache__/vimeo.cpython-34.pyc
../ox/web/__pycache__/thepiratebay.cpython-34.pyc
../ox/web/__pycache__/auth.cpython-34.pyc
../ox/web/__pycache__/duckduckgo.cpython-34.pyc
../ox/web/__pycache__/flixter.cpython-34.pyc
../ox/web/__pycache__/rottentomatoes.cpython-34.pyc
../ox/web/__pycache__/criterion.cpython-34.pyc
../ox/web/__pycache__/lookupbyisbn.cpython-34.pyc
../ox/web/__pycache__/wikipedia.cpython-34.pyc
../ox/web/__pycache__/abebooks.cpython-34.pyc
../ox/web/__pycache__/allmovie.cpython-34.pyc
../ox/web/__pycache__/amazon.cpython-34.pyc
../ox/web/__pycache__/impawards.cpython-34.pyc
../ox/web/__pycache__/tv.cpython-34.pyc
../ox/web/__pycache__/dailymotion.cpython-34.pyc
../ox/web/__pycache__/movieposterdb.cpython-34.pyc
../ox/web/__pycache__/filmsdivision.cpython-34.pyc
../ox/web/__pycache__/arsenalberlin.cpython-34.pyc
../ox/web/__pycache__/youtube.cpython-34.pyc
../ox/web/__pycache__/google.cpython-34.pyc
../ox/web/__pycache__/itunes.cpython-34.pyc
../ox/web/__pycache__/piratecinema.cpython-34.pyc
../ox/web/__pycache__/opensubtitles.cpython-34.pyc
../ox/web/__pycache__/mininova.cpython-34.pyc
../ox/web/__pycache__/imdb.cpython-34.pyc
../ox/web/__pycache__/apple.cpython-34.pyc
../ox/web/__pycache__/torrent.cpython-34.pyc
../ox/web/__pycache__/archive.cpython-34.pyc
../ox/web/__pycache__/arsenalberlin.cpython-34.pyc
../ox/web/__pycache__/auth.cpython-34.pyc
../ox/web/__pycache__/criterion.cpython-34.pyc
../ox/web/__pycache__/dailymotion.cpython-34.pyc
../ox/web/__pycache__/duckduckgo.cpython-34.pyc
../ox/web/__pycache__/epguides.cpython-34.pyc
../ox/web/__pycache__/filmsdivision.cpython-34.pyc
../ox/web/__pycache__/flixter.cpython-34.pyc
../ox/web/__pycache__/freebase.cpython-34.pyc
../ox/web/__pycache__/google.cpython-34.pyc
../ox/web/__pycache__/imdb.cpython-34.pyc
../ox/web/__pycache__/impawards.cpython-34.pyc
../ox/web/__pycache__/itunes.cpython-34.pyc
../ox/web/__pycache__/lookupbyisbn.cpython-34.pyc
../ox/web/__pycache__/lyricsfly.cpython-34.pyc
../ox/web/__pycache__/metacritic.cpython-34.pyc
../ox/web/__pycache__/mininova.cpython-34.pyc
../ox/web/__pycache__/movieposterdb.cpython-34.pyc
../ox/web/__pycache__/opensubtitles.cpython-34.pyc
../ox/web/__pycache__/oxdb.cpython-34.pyc
../ox/web/__pycache__/piratecinema.cpython-34.pyc
../ox/web/__pycache__/rottentomatoes.cpython-34.pyc
../ox/web/__pycache__/siteparser.cpython-34.pyc
../ox/web/__pycache__/spiegel.cpython-34.pyc
../ox/web/__pycache__/startpage.cpython-34.pyc
../ox/web/__pycache__/thepiratebay.cpython-34.pyc
../ox/web/__pycache__/torrent.cpython-34.pyc
../ox/web/__pycache__/tv.cpython-34.pyc
../ox/web/__pycache__/twitter.cpython-34.pyc
../ox/web/__pycache__/ubu.cpython-34.pyc
../ox/web/__pycache__/vimeo.cpython-34.pyc
../ox/web/__pycache__/wikipedia.cpython-34.pyc
../ox/web/__pycache__/youtube.cpython-34.pyc
./
dependency_links.txt
PKG-INFO
SOURCES.txt
top_level.txt
requires.txt
dependency_links.txt
top_level.txt
SOURCES.txt

View file

@ -5,7 +5,7 @@ try:
from . import __version
__version__ = __version.VERSION
except:
__version__ = '2.1.x'
__version__ = '2.3.x'
from . import cache
from . import js

View file

@ -1 +0,0 @@
VERSION="2.1.670"

View file

@ -52,6 +52,8 @@ class API(object):
def _add_action(self, action):
def method(self, *args, **kw):
if args and kw:
raise ValueError('pass either a dictionary or kwargs, not both')
if not kw:
if args:
kw = args[0]

View file

@ -62,6 +62,9 @@ def get_headers(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
store.set(url, data, -1, url_headers)
return url_headers
def get_json(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
return json.loads(read_url(url, data, headers, timeout).decode('utf-8'))
class InvalidResult(Exception):
"""Base class for exceptions in this module."""
def __init__(self, result, headers):
@ -113,10 +116,12 @@ def read_url(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, val
result = result.decode(encoding)
return result
get_url=read_url
def save_url(url, filename, overwrite=False):
if not os.path.exists(filename) or overwrite:
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
if dirname and not os.path.exists(dirname):
os.makedirs(dirname)
data = read_url(url)
with open(filename, 'wb') as f:

View file

@ -10,7 +10,7 @@ from ..shortcuts import render_to_json_response, json_response
from ...utils import json
def autodiscover():
#register api actions from all installed apps
# Register api actions from all installed apps
from django.utils.importlib import import_module
from django.utils.module_loading import module_has_submodule
for app in settings.INSTALLED_APPS:
@ -53,31 +53,24 @@ class ApiActions(dict):
versions = {}
def __init__(self):
def api(request):
def api(request, data):
'''
returns list of all known api actions
param data {
docs: bool
}
if docs is true, action properties contain docstrings
return {
status: {'code': int, 'text': string},
data: {
actions: {
'api': {
cache: true,
doc: 'recursion'
},
'hello': {
cache: true,
..
}
...
}
}
Returns a list of all api actions
takes {
code: boolean, // if true, return source code (optional)
docs: boolean // if true, return doc strings (optional)
}
returns {
actions: {
name: {
cache: boolean, // if false, don't cache results
code: string, // source code
doc: string // doc strings
},
... // more actions
}
}
'''
data = json.loads(request.POST.get('data', '{}'))
docs = data.get('docs', False)
code = data.get('code', False)
version = getattr(request, 'version', None)
@ -134,9 +127,9 @@ class ApiActions(dict):
actions = ApiActions()
def error(request):
def error(request, data):
'''
this action is used to test api error codes, it should return a 503 error
This action is used to test API error codes. It should return a 503 error.
'''
success = error_is_success
return render_to_json_response({})

View file

@ -2,6 +2,8 @@
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division, with_statement
import json
from django.shortcuts import render_to_response
from django.template import RequestContext
from django.conf import settings
@ -16,7 +18,9 @@ def api(request):
'text': 'use POST'}})
response['Access-Control-Allow-Origin'] = '*'
return response
if not 'action' in request.POST:
if request.META['REQUEST_METHOD'] != "POST" or (
not 'action' in request.POST and request.META.get('CONTENT_TYPE') != 'application/json'
):
methods = actions.keys()
api = []
for f in sorted(methods):
@ -28,14 +32,20 @@ def api(request):
'sitename': settings.SITENAME
})
return render_to_response('api.html', context)
action = request.POST['action']
if request.META.get('CONTENT_TYPE') == 'application/json':
r = json.loads(request.body)
action = r['action']
data = r.get('data', {})
else:
action = request.POST['action']
data = json.loads(request.POST.get('data', '{}'))
version = getattr(request, 'version', None)
if version:
f = actions.versions.get(version, {}).get(action, actions.get(action))
else:
f = actions.get(action)
if f:
response = f(request)
response = f(request, data)
else:
response = render_to_json_response(json_response(status=400,
text='Unknown action %s' % action))

View file

@ -5,6 +5,7 @@ import datetime
from django.db import models
from django.utils import datetime_safe
from six import string_types
from ox.utils import json
@ -66,7 +67,7 @@ class DictField(models.TextField):
"""Convert our JSON object to a string before we save"""
if value == None:
return value
if isinstance(value, basestring):
if isinstance(value, string_types):
value = eval(value)
assert isinstance(value, dict)
value = json.dumps(value, default=to_json)
@ -92,7 +93,7 @@ class TupleField(models.TextField):
def get_db_prep_save(self, value, connection):
"""Convert our JSON object to a string before we save"""
if isinstance(value, basestring):
if isinstance(value, string_types):
value = eval(value)
if isinstance(value, list):
value = tuple(value)

View file

@ -3,7 +3,7 @@
import os
import mimetypes
from datetime import datetime, timedelta
from urllib import quote
from six.moves.urllib.parse import quote
from django.http import HttpResponse, Http404
from django.conf import settings
@ -26,14 +26,14 @@ def HttpFileResponse(path, content_type=None, filename=None):
url = getattr(settings, PREFIX+'_URL', '')
if root and path.startswith(root):
path = url + path[len(root)+1:]
if isinstance(path, unicode):
if not isinstance(path, bytes):
path = path.encode('utf-8')
response['X-Accel-Redirect'] = path
if content_type:
response['Content-Type'] = content_type
elif getattr(settings, 'XSENDFILE', False):
response = HttpResponse()
if isinstance(path, unicode):
if not isinstance(path, bytes):
path = path.encode('utf-8')
response['X-Sendfile'] = path
if content_type:
@ -42,7 +42,7 @@ def HttpFileResponse(path, content_type=None, filename=None):
else:
response = HttpResponse(open(path), content_type=content_type)
if filename:
if isinstance(filename, unicode):
if not isinstance(filename, bytes):
filename = filename.encode('utf-8')
response['Content-Disposition'] = "attachment; filename*=UTF=8''%s" % quote(filename)

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from shortcuts import HttpErrorJson, render_to_json_response
from .shortcuts import HttpErrorJson, render_to_json_response
class ExceptionMiddleware(object):
def process_exception(self, request, exception):

View file

@ -1,26 +1,27 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import print_function
import os
import sys
import time
import signal
import threading
import atexit
import Queue
from six.moves.queue import Queue
_interval = 1.0
_times = {}
_files = []
_running = False
_queue = Queue.Queue()
_queue = Queue()
_lock = threading.Lock()
def _restart(path):
_queue.put(True)
prefix = 'monitor (pid=%d):' % os.getpid()
print >> sys.stderr, '%s Change detected to \'%s\'.' % (prefix, path)
print >> sys.stderr, '%s Triggering process restart.' % prefix
print('%s Change detected to \'%s\'.' % (prefix, path), file=sys.stderr)
print('%s Triggering process restart.' % prefix, file=sys.stderr)
os.kill(os.getpid(), signal.SIGINT)
def _modified(path):
@ -59,7 +60,7 @@ def _monitor():
while 1:
# Check modification times on all files in sys.modules.
for module in sys.modules.values():
for module in list(sys.modules.values()):
if not hasattr(module, '__file__'):
continue
path = getattr(module, '__file__')

View file

@ -1,8 +1,8 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import cookielib
import urllib2
from StringIO import StringIO
from six import StringIO, PY2
from six.moves import urllib
from six.moves import http_cookiejar as cookielib
from celery.utils import get_full_cls_name
from celery.backends import default_backend
@ -49,15 +49,15 @@ def api_proxy(request):
cj = SessionCookieJar()
if 'cj' in request.session:
cj.load(request.session['cj'])
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
opener.addheaders = [
('User-Agent', request.META.get('HTTP_USER_AGENT'))
]
form = ox.MultiPartForm()
for key in request.POST:
form.add_field(key, request.POST[key])
r = urllib2.Request(url)
body = str(form)
r = urllib.request.Request(url)
body = form.body()
r.add_header('Content-type', form.get_content_type())
r.add_header('Content-length', len(body))
r.add_data(body)

View file

@ -9,6 +9,7 @@ import shutil
import struct
import subprocess
import sqlite3
from distutils.spawn import find_executable
from .utils import json
@ -47,7 +48,7 @@ def _get_file_cache():
path = path[3:]
return os.path.join(path, 'files.sqlite')
def cache(filename, type='oshash'):
def cache(filename, type='oshash', update=False):
conn = sqlite3.connect(_get_file_cache(), timeout=10)
conn.row_factory = sqlite3.Row
@ -67,11 +68,12 @@ def cache(filename, type='oshash'):
info = ''
for row in c:
if stat.st_size == row['size'] and int(stat.st_mtime) == int(row['mtime']):
value = row[type]
if value:
if type == 'info':
value = json.loads(value)
return value
if not update:
value = row[type]
if value:
if type == 'info':
value = json.loads(value)
return value
h = row['oshash']
sha1 = row['sha1']
info = row['info']
@ -154,6 +156,8 @@ def avinfo(filename, cached=True):
if cached:
return cache(filename, 'info')
if os.path.getsize(filename):
if find_executable('ffprobe'):
return ffprobe(filename)
ffmpeg2theora = cmd('ffmpeg2theora')
p = subprocess.Popen([ffmpeg2theora], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
info, error = p.communicate()
@ -219,62 +223,71 @@ def ffprobe(filename):
return value
info = {}
for key in ('duration', 'size', 'bit_rate'):
info[{
'bit_rate': 'bitrate'
}.get(key, key)] = fix_value(key, ffinfo['format'][key])
info['audio'] = []
info['video'] = []
info['metadata'] = ffinfo['format'].get('tags', {})
for s in ffinfo['streams']:
tags = s.pop('tags', {})
language = None
for t in tags:
if t == 'language':
language = tags[t]
else:
info['metadata'][t] = tags[t]
if s.get('codec_type') in ('audio', 'video'):
stream = {}
if language and language != 'und':
stream['language'] = language
keys = [
'codec_name',
'width',
'height',
'bit_rate',
'index',
'display_aspect_ratio',
'sample_rate',
'channels',
]
if s['codec_type'] == 'video':
keys += [
'sample_aspect_ratio',
'r_frame_rate',
'pix_fmt',
if not 'format' in ffinfo:
info['error'] = 'badfile'
else:
for key in ('duration', 'size', 'bit_rate'):
if key in ffinfo['format']:
info[{
'bit_rate': 'bitrate'
}.get(key, key)] = fix_value(key, ffinfo['format'][key])
info['audio'] = []
info['video'] = []
info['metadata'] = ffinfo['format'].get('tags', {})
for s in ffinfo['streams']:
tags = s.pop('tags', {})
language = None
for t in tags:
if t == 'language':
language = tags[t]
else:
info['metadata'][t] = tags[t]
if s.get('codec_type') in ('audio', 'video'):
stream = {}
if language and language != 'und':
stream['language'] = language
keys = [
'codec_name',
'width',
'height',
'bit_rate',
'index',
'display_aspect_ratio',
'sample_rate',
'channels',
]
if s['codec_type'] == 'video':
keys += [
'sample_aspect_ratio',
'r_frame_rate',
'pix_fmt',
]
for key in keys:
if key in s:
stream[{
'codec_name': 'codec',
'bit_rate': 'bitrate',
'index': 'id',
'r_frame_rate': 'framerate',
'sample_rate': 'samplerate',
'pix_fmt': 'pixel_format',
}.get(key, key)] = fix_value(key, s[key])
info[s['codec_type']].append(stream)
else:
pass
#print s
for v in info['video']:
if not 'display_aspect_ratio' in v and 'width' in v:
v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height'])
v['pixel_aspect_ratio'] = '1:1'
for key in keys:
if key in s:
stream[{
'codec_name': 'codec',
'bit_rate': 'bitrate',
'index': 'id',
'r_frame_rate': 'framerate',
'sample_rate': 'samplerate',
'pix_fmt': 'pixel_format',
'sample_aspect_ratio': 'pixel_aspect_ratio',
}.get(key, key)] = fix_value(key, s[key])
info[s['codec_type']].append(stream)
else:
pass
#print s
for v in info['video']:
k = 'display_aspect_ratio'
if not k in v and 'width' in v \
or (k in v and v[k] == '0:1'):
v[k] = '%d:%d' % (v['width'], v['height'])
v['pixel_aspect_ratio'] = '1:1'
info['oshash'] = oshash(filename)
info['path'] = os.path.basename(filename)
info['path'] = filename
if not 'size' in info:
info['size'] = os.path.getsize(filename)
return info
def makedirs(path):

View file

@ -6,7 +6,7 @@ from __future__ import print_function
import unicodedata
from six import unichr
from six import unichr, PY3
__all__ = ['fix_bad_unicode']
@ -75,7 +75,7 @@ def fix_bad_unicode(text):
>>> fix_bad_unicode('This text was never Unicode at all\x85')
'This text was never Unicode at all…'
"""
if not isinstance(text, str):
if isinstance(text, bytes):
raise TypeError("This isn't even decoded into Unicode yet. "
"Decode it first.")
if len(text) == 0:
@ -151,7 +151,10 @@ def text_badness(text):
- Improbable single-byte characters, such as ƒ or ¬
- Letters in somewhat rare scripts
'''
assert isinstance(text, str)
if PY3:
assert isinstance(text, str)
else:
assert isinstance(text, unicode)
errors = 0
very_weird_things = 0
weird_things = 0

View file

@ -68,7 +68,7 @@ class MultiPartForm(object):
return body
def body(self):
"""Return a string representing the form data, including attached files."""
"""Return a byte string representing the form data, including attached files."""
# Build a list of lists, each containing "lines" of the
# request. Each part is separated by a boundary string.
# Once the list is built, return a string where each

View file

@ -30,6 +30,8 @@ def toAZ(num):
az = digits[r] + az
return az
encode_base26=toAZ
def fromAZ(num):
"""
Converts a bijective base 26 string to an integer
@ -71,6 +73,8 @@ def to26(q):
converted.insert(0, l)
return "".join(converted) or 'A'
decode_base26=toAZ
def from26(q):
"""
Converts an base 26 string to an integer
@ -402,6 +406,37 @@ def format_duration(ms, verbosity=0, years=True, hours=True, milliseconds=True):
duration = ' '.join(durations)
return duration
def format_timecode(seconds):
'''
>>> format_timecode(3599.999)
'00:59:59.999'
'''
seconds = float(seconds)
d = int(seconds / 86400)
h = int(seconds % 86400 / 3600)
m = int(seconds % 3600 / 60)
s = float(seconds % 60)
duration = "%s%02d:%02d:%06.3f" % ('%d:' % d if d else '', h, m, s)
return duration
def parse_timecode(string):
'''
Takes a formatted timecode, returns seconds
>> parse_timecode('1:02:03:04.05')
93784.05
>> parse_timecode('3')
3.0
>> parse_timecode('2:')
120
>> parse_timecode('1::')
3600.0
'''
timecode = 0
for i, v in enumerate(list(reversed(string.split(':')))[:4]):
timecode += float(v) * ( 86400 if i == 3 else pow(60, i))
return timecode
def ms2runtime(ms, shortenLong=False):
# deprecated - use format_duration
'''

View file

@ -259,6 +259,10 @@ def sanitize_html(html, tags=None, global_attributes=[]):
{'name': 'li'},
{'name': 'ol'},
{'name': 'ul'},
# definition lists
{'name': 'dl'},
{'name': 'dt'},
{'name': 'dd'},
# tables
{'name': 'table'},
{'name': 'tbody'},

View file

@ -25,7 +25,13 @@ def drawText(image, position, text, font_file, font_size, color):
draw = ImageDraw.Draw(image)
font = ImageFont.truetype(font_file, font_size, encoding='unic')
draw.text(position, text, fill=color, font=font)
return draw.textsize(text, font=font)
size = draw.textsize(text, font=font)
version = getattr(Image, 'PILLOW_VERSION', None)
if version and version > '2.1.0' and version < '2.6.1':
offset = font.getoffset(text)
else:
offset = (0, 0)
return (size[0] + offset[0], size[1] + offset[1])
def getHSL(rgb):
rgb = [x / 255 for x in rgb]
@ -141,7 +147,13 @@ def getRGB(hsl):
def getTextSize(image, text, font_file, font_size):
draw = ImageDraw.Draw(image)
font = ImageFont.truetype(font_file, font_size, encoding='unic')
return draw.textsize(text, font=font)
size = draw.textsize(text, font=font)
version = getattr(Image, 'PILLOW_VERSION', None)
if version and version > '2.1.0' and version < '2.6.1':
offset = font.getoffset(text)
else:
offset = (0, 0)
return (size[0] + offset[0], size[1] + offset[1])
def wrapText(text, max_width, max_lines, font_file, font_size):
# wraps text to max_width and max_lines

View file

@ -29,7 +29,7 @@ def format_path(data, directory_key='director'):
director = data['directorSort'] or ['Unknown Director']
title = data['seriesTitle' if data['isEpisode'] else 'title'] or 'Untitled'
year = data['seriesYear' if data['isEpisode'] else 'year'] or None
parts = map(format_underscores, filter(lambda x: x != None, [
parts = list(map(format_underscores, filter(lambda x: x != None, [
u'; '.join(director[:10]),
u'%s%s' % (title, u' (%s)' % year if year else ''),
u'%s%s%s%s%s%s' % (
@ -40,7 +40,7 @@ def format_path(data, directory_key='director'):
u'.%s' % data['language'] if data['language'] else '',
u'.%s' % data['extension'] if data['extension'] else ''
)
]))
])))
if data.get('subdirectory'):
parts.insert(-1, data['subdirectory'])
return unicodedata.normalize('NFD', u'/'.join(parts))
@ -188,8 +188,6 @@ def parse_path(path, directory_key='director'):
# TODO: '.com.avi'
'''
def parse_title(string):
return title, year
def parse_type(string):
for type in EXTENSIONS:
if string in EXTENSIONS[type]:
@ -210,7 +208,7 @@ def parse_path(path, directory_key='director'):
string = re.sub('(?<=\w)_ ', ': ', string)
return string
data = {}
parts = map(lambda x: parse_underscores(x.strip()), path.split('/'))
parts = list(map(lambda x: parse_underscores(x.strip()), unicodedata.normalize('NFD', path).split('/')))
# subdirectory
if len(parts) > 4:
data['subdirectory'] = '/'.join(parts[3:-1])
@ -226,14 +224,14 @@ def parse_path(path, directory_key='director'):
# directorSort, director
data['directorSort'] = data['director'] = []
if director:
data['directorSort'] = filter(
data['directorSort'] = list(filter(
lambda x: x != 'Unknown Director',
director.split('; ')
)
data['director'] = map(
))
data['director'] = list(map(
lambda x: ' '.join(reversed(x.split(', '))),
data['directorSort']
)
))
# title, year
data['title'] = data['year'] = None
if title:
@ -327,7 +325,7 @@ def parse_movie_path(path):
"""
episodeTitle = episodeYear = seriesTitle = None
episodeDirector = []
parts = path.split('/')
parts = unicodedata.normalize('NFD', path).split('/')
#title/year
if len(parts) == 4:

View file

@ -2,20 +2,21 @@
# vi:si:et:sw=4:sts=4:ts=4
# GPL 2008
from __future__ import with_statement, print_function
import os
import gzip
import json
import os
import re
from six import BytesIO, PY3
import struct
from six.moves import urllib
from six import BytesIO, PY3
from six.moves import urllib
from chardet.universaldetector import UniversalDetector
DEBUG = False
# Default headers for HTTP requests.
DEFAULT_HEADERS = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:28.0) Gecko/20100101 Firefox/28.0',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-us,en;q=0.5',
@ -47,9 +48,16 @@ def get_headers(url, data=None, headers=DEFAULT_HEADERS):
headers = e.headers
return dict(headers)
def get_json(url, data=None, headers=DEFAULT_HEADERS):
return json.loads(read_url(url, data, headers).decode('utf-8'))
def open_url(url, data=None, headers=DEFAULT_HEADERS):
if isinstance(url, bytes):
url = url.decode('utf-8')
if PY3:
if isinstance(url, bytes):
url = url.decode('utf-8')
else:
if not isinstance(url, bytes):
url = url.encode('utf-8')
url = url.replace(' ', '%20')
if data and PY3 and not isinstance(data, bytes):
data = data.encode('utf-8')
@ -100,10 +108,12 @@ def detect_encoding(data):
detector.close()
return detector.result['encoding']
get_url=read_url
def save_url(url, filename, overwrite=False):
if not os.path.exists(filename) or overwrite:
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
if dirname and not os.path.exists(dirname):
os.makedirs(dirname)
data = read_url(url)
with open(filename, 'wb') as f:
@ -135,8 +145,9 @@ def oshash(url):
if filesize > 65536:
tail = get_range(url, filesize-65536, filesize)
if filesize < 65536:
for offset in range(0, filesize, bytesize):
buffer = head[offset:offset+bytesize]
f = BytesIO(head)
for x in range(int(filesize/bytesize)):
buffer = f.read(bytesize)
(l_value,)= struct.unpack(longlongformat, buffer)
hash += l_value
hash = hash & 0xFFFFFFFFFFFFFFFF #cut off 64bit overflow

View file

@ -87,6 +87,7 @@ UA_REGEXPS = {
'(Camino)\/(\d+)',
'(Chimera)\/(\d+)',
'(chromeframe)\/(\d+)',
'(Edge)\/(\d+)',
'(Epiphany)\/(\d+)', # before Chrome, Chromium and Safari
'(Chromium)\/(\d+)', # before Chrome
'(Chrome)\/(\d+)',
@ -178,6 +179,7 @@ UA_VERSIONS = {
'10.8': '10.8 (Mountain Lion)',
'10.9': '10.9 (Mavericks)',
'10.10': '10.10 (Yosemite)',
'10.11': '10.11 (El Capitan)',
'40': 'Series 40',
'60': 'Series 60',
'NT 3.1': 'NT 3.1 (3.1)',
@ -192,6 +194,7 @@ UA_VERSIONS = {
'NT 6.1': 'NT 6.1 (7)',
'NT 6.2': 'NT 6.2 (8)',
'NT 6.3': 'NT 6.3 (8.1)',
'NT 6.4': 'NT 6.4 (10)',
'16': 'NT 3.1 (3.1)',
'3.1': 'NT 3.1 (3.1)',
'95': 'NT 4.0 (95)',
@ -254,6 +257,8 @@ def get_sort_name(name):
last_names = []
if re.search('^[0-9]+$', first_names[-1]):
add_name()
if re.search('[(\[].+?[)\]]$', first_names[-1]):
add_name()
if find_name(SUFFIXES):
add_name()
add_name()

View file

@ -5,15 +5,19 @@
from threading import Event
from hashlib import sha1
import os
from six import PY2
from .bencode import bencode, bdecode
if PY2:
from .bencode import bencode, bdecode
else:
from .bencode3 import bencode, bdecode
__all__ = ['create_torrent', 'get_info_hash', 'get_torrent_info', 'get_files', 'get_torrent_size']
def create_torrent(file, url, params = {}, flag = Event(),
progress = lambda x: None, progress_percent = 1):
"Creates a torrent for a given file, using url as tracker url"
from makemetafile import make_meta_file
from .makemetafile import make_meta_file
return make_meta_file(file, url, params, flag, progress, progress_percent)
def get_info_hash(torrentFile):

View file

@ -0,0 +1,151 @@
##
#
# bencode.py python3 compatable bencode / bdecode
#
##
def _decode_int(data):
"""
decode integer from bytearray
return int, remaining data
"""
data = data[1:]
end = data.index(b'e')
return int(data[:end],10), data[end+1:]
def _decode_str(data):
"""
decode string from bytearray
return string, remaining data
"""
start = data.index(b':')
l = int(data[:start].decode(),10)
if l <= 0:
raise Exception('invalid string size: %d'%d)
start += 1
ret = bytes(data[start:start+l])
data = data[start+l:]
return ret, data
def _decode_list(data):
"""
decode list from bytearray
return list, remaining data
"""
ls = []
data = data[1:]
while data[0] != ord(b'e'):
elem, data = _decode(data)
ls.append(elem)
return ls, data[1:]
def _decode_dict(data):
"""
decode dict from bytearray
return dict, remaining data
"""
d = {}
data = data[1:]
while data[0] != ord(b'e'):
k, data = _decode_str(data)
v, data = _decode(data)
d[k.decode()] = v
return d, data[1:]
def _decode(data):
"""
decode a bytearray
return deserialized object, remaining data
"""
ch = chr(data[0])
if ch == 'l':
return _decode_list(data)
elif ch == 'i':
return _decode_int(data)
elif ch == 'd':
return _decode_dict(data)
elif ch.isdigit():
return _decode_str(data)
else:
raise Exception('could not deserialize data: %s'%data)
def bdecode(data):
"""
decode a bytearray
return deserialized object
"""
obj , data = _decode(data)
if len(data) > 0:
raise Exception('failed to deserialize, extra data: %s'%data)
return obj
def _encode_str(s,buff):
"""
encode string to a buffer
"""
s = bytearray(s)
l = len(s)
buff.append(bytearray(str(l)+':','utf-8'))
buff.append(s)
def _encode_int(i,buff):
"""
encode integer to a buffer
"""
buff.append(b'i')
buff.append(bytearray(str(i),'ascii'))
buff.append(b'e')
def _encode_list(l,buff):
"""
encode list of elements to a buffer
"""
buff.append(b'l')
for i in l:
_encode(i,buff)
buff.append(b'e')
def _encode_dict(d,buff):
"""
encode dict
"""
buff.append(b'd')
l = list(d.keys())
l.sort()
for k in l:
_encode(str(k),buff)
_encode(d[k],buff)
buff.append(b'e')
def _encode(obj,buff):
"""
encode element obj to a buffer buff
"""
if isinstance(obj,str):
_encode_str(bytearray(obj,'utf-8'),buff)
elif isinstance(obj,bytes):
_encode_str(bytearray(obj),buff)
elif isinstance(obj,bytearray):
_encode_str(obj,buff)
elif str(obj).isdigit():
_encode_int(obj,buff)
elif isinstance(obj,list):
_encode_list(obj,buff)
elif hasattr(obj,'keys') and hasattr(obj,'values'):
_encode_dict(obj,buff)
elif str(obj) in ['True','False']:
_encode_int(int(obj and '1' or '0'),buff)
else:
raise Exception('non serializable object: %s'%obj)
def bencode(obj):
"""
bencode element, return bytearray
"""
buff = []
_encode(obj,buff)
ret = bytearray()
for ba in buff:
ret += ba
return bytes(ret)

View file

@ -1,100 +0,0 @@
# Written by Bram Cohen
# see LICENSE.txt for license information
from types import StringType, LongType, IntType, ListType, DictType
from re import compile
reg = compile(r'^[^/\\.~][^/\\]*$')
ints = (LongType, IntType)
def check_info(info):
if type(info) != DictType:
raise ValueError, 'bad metainfo - not a dictionary'
pieces = info.get('pieces')
if type(pieces) != StringType or len(pieces) % 20 != 0:
raise ValueError, 'bad metainfo - bad pieces key'
piecelength = info.get('piece length')
if type(piecelength) not in ints or piecelength <= 0:
raise ValueError, 'bad metainfo - illegal piece length'
name = info.get('name')
if type(name) != StringType:
raise ValueError, 'bad metainfo - bad name'
if not reg.match(name):
raise ValueError, 'name %s disallowed for security reasons' % name
if info.has_key('files') == info.has_key('length'):
raise ValueError, 'single/multiple file mix'
if info.has_key('length'):
length = info.get('length')
if type(length) not in ints or length < 0:
raise ValueError, 'bad metainfo - bad length'
else:
files = info.get('files')
if type(files) != ListType:
raise ValueError
for f in files:
if type(f) != DictType:
raise ValueError, 'bad metainfo - bad file value'
length = f.get('length')
if type(length) not in ints or length < 0:
raise ValueError, 'bad metainfo - bad length'
path = f.get('path')
if type(path) != ListType or path == []:
raise ValueError, 'bad metainfo - bad path'
for p in path:
if type(p) != StringType:
raise ValueError, 'bad metainfo - bad path dir'
if not reg.match(p):
raise ValueError, 'path %s disallowed for security reasons' % p
for i in xrange(len(files)):
for j in xrange(i):
if files[i]['path'] == files[j]['path']:
raise ValueError, 'bad metainfo - duplicate path'
def check_message(message):
if type(message) != DictType:
raise ValueError
check_info(message.get('info'))
if type(message.get('announce')) != StringType:
raise ValueError
def check_peers(message):
if type(message) != DictType:
raise ValueError
if message.has_key('failure reason'):
if type(message['failure reason']) != StringType:
raise ValueError
return
peers = message.get('peers')
if type(peers) == ListType:
for p in peers:
if type(p) != DictType:
raise ValueError
if type(p.get('ip')) != StringType:
raise ValueError
port = p.get('port')
if type(port) not in ints or p <= 0:
raise ValueError
if p.has_key('peer id'):
id = p['peer id']
if type(id) != StringType or len(id) != 20:
raise ValueError
elif type(peers) != StringType or len(peers) % 6 != 0:
raise ValueError
interval = message.get('interval', 1)
if type(interval) not in ints or interval <= 0:
raise ValueError
minint = message.get('min interval', 1)
if type(minint) not in ints or minint <= 0:
raise ValueError
if type(message.get('tracker id', '')) != StringType:
raise ValueError
npeers = message.get('num peers', 0)
if type(npeers) not in ints or npeers < 0:
raise ValueError
dpeers = message.get('done peers', 0)
if type(dpeers) not in ints or dpeers < 0:
raise ValueError
last = message.get('last', 0)
if type(last) not in ints or last < 0:
raise ValueError

View file

@ -6,9 +6,13 @@ from os.path import getsize, split, join, abspath, isdir
from os import listdir
from hashlib import sha1 as sha
from copy import copy
from string import strip
from bencode import bencode
from btformats import check_info
import re
from six import PY2
if PY2:
from .bencode import bencode
else:
from .bencode3 import bencode
from threading import Event
from time import time
from traceback import print_exc
@ -57,14 +61,63 @@ def print_announcelist_details():
print ('')
print (' httpseeds = optional list of http-seed URLs, in the format:')
print (' url[|url...]')
reg = re.compile(r'^[^/\\.~][^/\\]*$')
def is_number(value):
return isinstance(value, int) or isinstance(value,float)
def check_info(info):
if not isinstance(info, dict):
raise ValueError('bad metainfo - not a dictionary')
pieces = info.get('pieces')
if not isinstance(pieces, bytes) or len(pieces) % 20 != 0:
raise ValueError('bad metainfo - bad pieces key')
piecelength = info.get('piece length')
if not is_number(piecelength) or piecelength <= 0:
raise ValueError('bad metainfo - illegal piece length')
name = info.get('name')
if not isinstance(name, bytes):
raise ValueError('bad metainfo - bad name')
if not reg.match(name.decode('utf-8')):
raise ValueError('name %s disallowed for security reasons' % name)
if ('files' in info) == ('length' in info):
raise ValueError('single/multiple file mix')
if 'length' in info:
length = info.get('length')
if not is_number(length) or length < 0:
raise ValueError('bad metainfo - bad length')
else:
files = info.get('files')
if not isinstance(files, list):
raise ValueError
for f in files:
if not isinstance(f, dict):
raise ValueError('bad metainfo - bad file value')
length = f.get('length')
if not is_number(length) or length < 0:
raise ValueError('bad metainfo - bad length')
path = f.get('path')
if not isinstance(path, list) or path == []:
raise ValueError('bad metainfo - bad path')
for p in path:
if not isinstance(p, bytes):
raise ValueError('bad metainfo - bad path dir')
if not reg.match(p.decode('utf-8')):
raise ValueError('path %s disallowed for security reasons' % p)
for i in range(len(files)):
for j in range(i):
if files[i]['path'] == files[j]['path']:
raise ValueError('bad metainfo - duplicate path')
def make_meta_file(file, url, params = {}, flag = Event(),
progress = lambda x: None, progress_percent = 1):
if params.has_key('piece_size_pow2'):
if 'piece_size_pow2' in params:
piece_len_exp = params['piece_size_pow2']
else:
piece_len_exp = default_piece_len_exp
if params.has_key('target') and params['target'] != '':
if 'target' in params and params['target'] != '':
f = params['target']
else:
a, b = split(file)
@ -75,7 +128,7 @@ def make_meta_file(file, url, params = {}, flag = Event(),
if piece_len_exp == 0: # automatic
size = calcsize(file)
if size > 8L*1024*1024*1024: # > 8 gig =
if size > 8*1024*1024*1024: # > 8 gig =
piece_len_exp = 21 # 2 meg pieces
elif size > 2*1024*1024*1024: # > 2 gig =
piece_len_exp = 20 # 1 meg pieces
@ -92,7 +145,7 @@ def make_meta_file(file, url, params = {}, flag = Event(),
piece_length = 2 ** piece_len_exp
encoding = None
if params.has_key('filesystem_encoding'):
if 'filesystem_encoding' in params:
encoding = params['filesystem_encoding']
if not encoding:
encoding = ENCODING
@ -103,29 +156,29 @@ def make_meta_file(file, url, params = {}, flag = Event(),
if flag.isSet():
return
check_info(info)
h = open(f, 'wb')
data = {'info': info, 'announce': strip(url), 'creation date': long(time())}
h = open(f.encode(encoding), 'wb')
data = {'info': info, 'announce': url.strip(), 'creation date': int(time())}
if params.has_key('comment') and params['comment']:
if 'comment' in params and params['comment']:
data['comment'] = params['comment']
if params.has_key('real_announce_list'): # shortcut for progs calling in from outside
if 'real_announce_list' in params: # shortcut for progs calling in from outside
data['announce-list'] = params['real_announce_list']
elif params.has_key('announce_list') and params['announce_list']:
elif 'announce_list' in params and params['announce_list']:
l = []
for tier in params['announce_list'].split('|'):
l.append(tier.split(','))
data['announce-list'] = l
if params.has_key('real_httpseeds'): # shortcut for progs calling in from outside
if 'real_httpseeds' in params: # shortcut for progs calling in from outside
data['httpseeds'] = params['real_httpseeds']
elif params.has_key('httpseeds') and params['httpseeds']:
elif 'httpseeds' in params and params['httpseeds']:
data['httpseeds'] = params['httpseeds'].split('|')
if params.has_key('url-list') and params['url-list']:
if 'url-list' in params and params['url-list']:
data['url-list'] = params['url-list'].split('|')
if params.has_key('playtime') and params['playtime']:
if 'playtime' in params and params['playtime']:
data['info']['playtime'] = params['playtime']
h.write(bencode(data))
@ -134,7 +187,7 @@ def make_meta_file(file, url, params = {}, flag = Event(),
def calcsize(file):
if not isdir(file):
return getsize(file)
total = 0L
total = 0
for s in subfiles(abspath(file)):
total += getsize(s[1])
return total
@ -151,8 +204,8 @@ def uniconvertl(l, e):
def uniconvert(s, e):
try:
if s.__class__.__name__ != 'unicode':
s = unicode(s,e)
if isinstance(s, bytes):
s = s.decode(e)
except UnicodeError:
raise UnicodeError('bad filename: '+s)
return s.encode('utf-8')
@ -164,15 +217,15 @@ def makeinfo(file, piece_length, encoding, flag, progress, progress_percent=1):
subs.sort()
pieces = []
sh = sha()
done = 0L
done = 0
fs = []
totalsize = 0.0
totalhashed = 0L
totalhashed = 0
for p, f in subs:
totalsize += getsize(f)
for p, f in subs:
pos = 0L
pos = 0
size = getsize(f)
fs.append({'length': size, 'path': uniconvertl(p, encoding)})
h = open(f, 'rb')
@ -196,13 +249,13 @@ def makeinfo(file, piece_length, encoding, flag, progress, progress_percent=1):
h.close()
if done > 0:
pieces.append(sh.digest())
return {'pieces': ''.join(pieces),
return {'pieces': b''.join(pieces),
'piece length': piece_length, 'files': fs,
'name': uniconvert(split(file)[1], encoding) }
else:
size = getsize(file)
pieces = []
p = 0L
p = 0
h = open(file, 'rb')
while p < size:
x = h.read(min(piece_length, size - p))
@ -217,7 +270,7 @@ def makeinfo(file, piece_length, encoding, flag, progress, progress_percent=1):
else:
progress(min(piece_length, size - p))
h.close()
return {'pieces': ''.join(pieces),
return {'pieces': b''.join(pieces),
'piece length': piece_length, 'length': size,
'name': uniconvert(split(file)[1], encoding) }
@ -240,7 +293,7 @@ def completedir(dir, url, params = {}, flag = Event(),
files = listdir(dir)
files.sort()
ext = '.torrent'
if params.has_key('target'):
if 'target' in params:
target = params['target']
else:
target = ''

View file

@ -7,7 +7,7 @@ from six.moves.urllib.parse import quote
from ox import find_re, strip_tags, decode_html
from ox.cache import read_url
import lxml
import lxml.html
def findISBN(title, author):

View file

@ -15,9 +15,14 @@ def get_data(id):
details = cache.read_url('%s?output=json' % url)
details = json.loads(details)
for key in ('title', 'description', 'runtime'):
data[key] = details['metadata'][key]
if isinstance(data[key], list):
data[key] = data[key][0]
if key in details['metadata']:
data[key] = details['metadata'][key]
if isinstance(data[key], list):
data[key] = data[key][0]
if isinstance(data[key], basestring):
data[key] = data[key].strip()
if data[key][0] == '[' and data[key][-1] == ']':
data[key] = data[key][1:-1]
data['url'] = url
data['image'] = 'http://archive.org/download/%s/format=thumbnail' % id
data['ogg'] = 'http://archive.org/download/%s/format=Ogg+video' % id

View file

@ -5,7 +5,7 @@ import re
import ox.cache
from ox.cache import read_url
from ox.html import strip_tags
from ox.html import strip_tags, decode_html
from ox.text import find_re
import imdb
@ -36,14 +36,15 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
html = ox.cache.read_url(data["url"], timeout=timeout)
data["number"] = find_re(html, "<li>Spine #(\d+)")
data["title"] = find_re(html, "<h1 class=\"movietitle\">(.*?)</h1>")
data["title"] = data["title"].split(u' \u2014 The Television Version')[0]
data["title"] = decode_html(find_re(html, "<h1 class=\"movietitle\">(.*?)</h1>"))
data["title"] = data["title"].split(u' \u2014 The Television Version')[0].strip()
data["director"] = strip_tags(find_re(html, "<h2 class=\"director\">(.*?)</h2>"))
results = find_re(html, '<div class="left_column">(.*?)</div>')
results = re.compile("<li>(.*?)</li>").findall(results)
data["country"] = results[0]
data["year"] = results[1]
data["synopsis"] = strip_tags(find_re(html, "<div class=\"content_block last\">.*?<p>(.*?)</p>"))
data["synopsis"] = decode_html(strip_tags(find_re(html,
"<div class=\"content_block last\">.*?<p>(.*?)</p>")))
result = find_re(html, "<div class=\"purchase\">(.*?)</div>")
if 'Blu-Ray' in result or 'Essential Art House DVD' in result:

View file

@ -6,7 +6,7 @@ import re
import time
import unicodedata
from six.moves import urllib
from six.moves.urllib.parse import urlencode
from six import string_types
from .. import find_re, strip_tags, decode_html
@ -37,7 +37,7 @@ class Imdb(SiteParser):
'alternativeTitles': {
'page': 'releaseinfo',
're': [
'name="akas".*?<table.*?>(.*?)</table>',
'<table[^>]*?id="akas"[^>]*?>(.*?)</table>',
"td>(.*?)</td>.*?<td>(.*?)</td>"
],
'type': 'list'
@ -74,7 +74,7 @@ class Imdb(SiteParser):
'type': 'list'
},
'connections': {
'page': 'trivia?tab=mc',
'page': 'movieconnections',
're': '<h4 class="li_group">(.*?)</h4>(.*?)(<\/div>\n <a|<script)',
'type': 'list'
},
@ -476,9 +476,8 @@ class Imdb(SiteParser):
alt[title].append(c)
self['alternativeTitles'] = []
for t in sorted(alt, key=lambda a: sorted(alt[a])):
if alt[t]:
countries = sorted([normalize_country_name(c) or c for c in alt[t]])
self['alternativeTitles'].append((t, countries))
countries = sorted([normalize_country_name(c) or c for c in alt[t]])
self['alternativeTitles'].append((t, countries))
if not self['alternativeTitles']:
del self['alternativeTitles']
@ -521,7 +520,7 @@ class Imdb(SiteParser):
if len(description) == 2 and description[-1].strip() != '-':
r['description'] = description[-1].strip()
return r
cc[rel] = list(map(get_conn, re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data)))
cc[rel] = list(map(get_conn, re.compile('<a href="/title/tt(\d{7})/?">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data)))
self['connections'] = cc
@ -665,7 +664,7 @@ def get_movie_by_title(title, timeout=-1):
params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1')
except:
params['q'] = params['q'].encode('utf-8')
params = urllib.urlencode(params)
params = urlencode(params)
url = "http://akas.imdb.com/find?" + params
data = read_url(url, timeout=timeout, unicode=True)
#if search results in redirect, get id of current page
@ -741,7 +740,7 @@ def get_movie_id(title, director='', year='', timeout=-1):
params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1')
except:
params['q'] = params['q'].encode('utf-8')
params = urllib.urlencode(params)
params = urlencode(params)
url = "http://akas.imdb.com/find?" + params
#print url

View file

@ -2,7 +2,7 @@
# encoding: utf-8
from __future__ import print_function
import re
from six.moves import urllib
from six.moves.urllib.parse import urlencode
from ox.cache import read_url
from ox.html import decode_html, strip_tags
@ -29,7 +29,7 @@ def compose_url(request, parameters):
if request == 'advancedSearch':
url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?'
if parameters['media'] == 'music':
url += urllib.urlencode({
url += urlencode({
'albumTerm': parameters['title'],
'allArtistNames': parameters['artist'],
'composerTerm': '',
@ -42,7 +42,7 @@ def compose_url(request, parameters):
'songTerm': ''
})
elif parameters['media'] == 'movie':
url += urllib.urlencode({
url += urlencode({
'actorTerm': '',
'closedCaption': 0,
'descriptionTerm': '',

View file

@ -7,12 +7,6 @@ from ox import find_re, strip_tags
def get_url(id=None, imdb=None):
#this would also wor but does not cache:
'''
from urllib2 import urlopen
u = urlopen(url)
return u.url
'''
if imdb:
url = "http://www.rottentomatoes.com/alias?type=imdbid&s=%s" % imdb
data = read_url(url)

View file

@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from six.moves import urllib
import lxml.html
import ox
DEFAULT_MAX_RESULTS = 10
DEFAULT_TIMEOUT = 24*60*60
def read_url(url, data=None, headers=ox.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT):
return ox.cache.read_url(url, data, headers, timeout, unicode=True)
def quote_plus(s):
if not isinstance(s, bytes):
s = s.encode('utf-8')
return urllib.parse.quote_plus(s)
def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
"""
Return max_results tuples with title, url, description
>>> find("The Matrix site:imdb.com", 1)[0][0]
u'The Matrix (1999) - IMDb'
>>> find("The Matrix site:imdb.com", 1)[0][1]
u'http://www.imdb.com/title/tt0133093/'
"""
results = []
url = 'https://eu1.startpage.com/do/search?nosteeraway=1&abp=1&language=english&cmd=process_search&query=%s&x=0&y=0&cat=web&engine0=v1all' % quote_plus(query)
data = read_url(url, timeout=timeout)
doc = lxml.html.document_fromstring(data)
for r in doc.xpath("//div[contains(@class, 'result')]"):
t = r.find('h3')
if t is not None:
title = t.text_content().strip()
url = t.find('a').attrib['href']
description = r.find_class('desc')[0].text_content()
results.append((title, url, description))
if len(results) >= max_results:
break
return results

View file

@ -25,7 +25,7 @@ def find_movies(query=None, imdb=None, max_results=10):
if imdb:
query = "tt" + normalize_imdbid(imdb)
results = []
next = ["http://thepiratebay.org/search/%s/0/3/200" % quote(query), ]
next = ["https://thepiratebay.se/search/%s/0/3/200" % quote(query), ]
page_count = 1
while next and page_count < 4:
page_count += 1
@ -33,12 +33,12 @@ def find_movies(query=None, imdb=None, max_results=10):
if not url.startswith('http'):
if not url.startswith('/'):
url = "/" + url
url = "http://thepiratebay.org" + url
url = "https://thepiratebay.se" + url
data = read_url(url, timeout=cache_timeout, unicode=True)
regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/torrent/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>'''
for row in re.compile(regexp, re.DOTALL).findall(data):
torrentType = row[0]
torrentLink = "http://thepiratebay.org" + row[1]
torrentLink = "https://thepiratebay.se" + row[1]
torrentTitle = decode_html(row[2])
# 201 = Movies , 202 = Movie DVDR, 205 TV Shows
if torrentType in ['201']:
@ -61,7 +61,7 @@ def get_id(piratebayId):
def exists(piratebayId):
piratebayId = get_id(piratebayId)
return ox.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId)
return ox.net.exists("https://thepiratebay.se/torrent/%s" % piratebayId)
def get_data(piratebayId):
_key_map = {
@ -75,7 +75,7 @@ def get_data(piratebayId):
torrent = dict()
torrent[u'id'] = piratebayId
torrent[u'domain'] = 'thepiratebay.org'
torrent[u'comment_link'] = 'http://thepiratebay.org/torrent/%s' % piratebayId
torrent[u'comment_link'] = 'https://thepiratebay.se/torrent/%s' % piratebayId
data = read_url(torrent['comment_link'], unicode=True)
torrent[u'title'] = find_re(data, '<title>(.*?) \(download torrent\) - TPB</title>')

View file

@ -3,12 +3,14 @@
from __future__ import print_function
import re
from ox import find_re, strip_tags, decode_html
import lxml.html
from ox import strip_tags, decode_html
from ox.cache import read_url
def get_id(url):
return url.replace('http://www.ubu.com/', '').split('.html')[0]
return url.replace('http://www.ubu.com/', '').split('.html')[0].replace('/./', '/')
def get_url(id):
return 'http://www.ubu.com/%s.html' % id
@ -22,51 +24,92 @@ def get_data(url):
'url': url,
'type': re.compile('ubu.com/(.*?)/').findall(url)[0]
}
for videourl, title in re.compile('<a href="(http://ubumexico.centro.org.mx/.*?)">(.*?)</a>').findall(data):
if videourl.endswith('.srt'):
m['srt'] = videourl
elif not 'video' in m:
m['video'] = videourl
m['video'] = m['video'].replace('/video/ ', '/video/').replace(' ', '%20')
if m['video'] == 'http://ubumexico.centro.org.mx/video/':
del m['video']
m['title'] = strip_tags(decode_html(title)).strip()
if not 'url' in m:
print(url, 'missing')
if 'title' in m:
m['title'] = re.sub('(.*?) \(\d{4}\)$', '\\1', m['title'])
match = re.compile("flashvars','file=(.*?.flv)'").findall(data)
if match:
m['flv'] = match[0]
m['flv'] = m['flv'].replace('/video/ ', '/video/').replace(' ', '%20')
y = re.compile('\((\d{4})\)').findall(data)
if y:
m['year'] = int(y[0])
d = re.compile('Director: (.+)').findall(data)
if d:
m['director'] = strip_tags(decode_html(d[0])).strip()
a = re.compile('<a href="(.*?)">Back to (.*?)</a>', re.DOTALL).findall(data)
if a:
m['artist'] = strip_tags(decode_html(a[0][1])).strip()
if m['type'] == 'sound':
m['tracks'] = [{
'title': strip_tags(decode_html(t[1])).strip(),
'url': t[0]
} for t in re.compile('"(http.*?.mp3)"[^>]*>(.+)</a', re.IGNORECASE).findall(data)]
else:
a = re.compile('<a href="(.*?)">(.*?) in UbuWeb Film').findall(data)
for videourl, title in re.compile('href="(http://ubumexico.centro.org.mx/.*?)">(.*?)</a>').findall(data):
if videourl.endswith('.srt'):
m['srt'] = videourl
elif not 'video' in m:
m['video'] = videourl
m['video'] = m['video'].replace('/video/ ', '/video/').replace(' ', '%20')
if m['video'] == 'http://ubumexico.centro.org.mx/video/':
del m['video']
if not 'title' in m:
m['title'] = strip_tags(decode_html(title)).strip()
if not 'url' in m:
print(url, 'missing')
if 'title' in m:
m['title'] = re.sub('(.*?) \(\d{4}\)$', '\\1', m['title'])
if not 'title' in m:
match = re.compile('<span id="ubuwork">(.*?)</span>').findall(data)
if match:
m['title'] = strip_tags(decode_html(match[0])).strip()
if not 'title' in m:
match = re.compile("<title>.*?&amp;(.*?)</title>", re.DOTALL).findall(data)
if match:
m['title'] = re.sub('\s+', ' ', match[0]).strip()
if ' - ' in m['title']:
m['title'] = m['title'].split(' - ', 1)[-1]
if 'title' in m:
m['title'] = strip_tags(decode_html(m['title']).strip())
match = re.compile("flashvars','file=(.*?.flv)'").findall(data)
if match:
m['flv'] = match[0]
m['flv'] = m['flv'].replace('/video/ ', '/video/').replace(' ', '%20')
match = re.compile('''src=(.*?) type="video/mp4"''').findall(data)
if match:
m['mp4'] = match[0].strip('"').strip("'").replace(' ', '%20')
if not m['mp4'].startswith('http'):
m['mp4'] = 'http://ubumexico.centro.org.mx/video/' + m['mp4']
elif 'video' in m and (m['video'].endswith('.mp4') or m['video'].endswith('.m4v')):
m['mp4'] = m['video']
doc = lxml.html.document_fromstring(read_url(url))
desc = doc.xpath("//div[contains(@id, 'ubudesc')]")
if len(desc):
txt = []
for part in desc[0].text_content().split('\n\n'):
if part == 'RESOURCES:':
break
if part.strip():
txt.append(part)
if txt:
if len(txt) > 1 and txt[0].strip() == m.get('title'):
txt = txt[1:]
m['description'] = '\n\n'.join(txt).split('RESOURCES')[0].split('RELATED')[0].strip()
y = re.compile('\((\d{4})\)').findall(data)
if y:
m['year'] = int(y[0])
d = re.compile('Director: (.+)').findall(data)
if d:
m['director'] = strip_tags(decode_html(d[0])).strip()
a = re.compile('<a href="(.*?)">Back to (.*?)</a>', re.DOTALL).findall(data)
if a:
m['artist'] = strip_tags(decode_html(a[0][1])).strip()
else:
a = re.compile('<b>(.*?)\(b\..*?\d{4}\)').findall(data)
a = re.compile('<a href="(.*?)">(.*?) in UbuWeb Film').findall(data)
if a:
m['artist'] = strip_tags(decode_html(a[0])).strip()
elif m['id'] == 'film/lawder_color':
m['artist'] = 'Standish Lawder'
if 'artist' in m:
m['artist'] = m['artist'].replace('in UbuWeb Film', '')
m['artist'] = m['artist'].replace('on UbuWeb Film', '').strip()
if m['id'] == 'film/coulibeuf':
m['title'] = 'Balkan Baroque'
m['year'] = 1999
m['artist'] = strip_tags(decode_html(a[0][1])).strip()
else:
a = re.compile('<b>(.*?)\(b\..*?\d{4}\)').findall(data)
if a:
m['artist'] = strip_tags(decode_html(a[0])).strip()
elif m['id'] == 'film/lawder_color':
m['artist'] = 'Standish Lawder'
if 'artist' in m:
m['artist'] = m['artist'].replace('in UbuWeb Film', '')
m['artist'] = m['artist'].replace('on UbuWeb Film', '').strip()
if m['id'] == 'film/coulibeuf':
m['title'] = 'Balkan Baroque'
m['year'] = 1999
return m
def get_films():
@ -98,3 +141,12 @@ def get_ids():
ids.append(u)
ids = [get_id(url) for url in list(set(ids))]
return ids
def get_sound_ids():
data = read_url('http://www.ubu.com/sound/')
ids = []
for url, author in re.compile('<a href="(\./.*?)">(.*?)</a>').findall(data):
url = 'http://www.ubu.com/sound' + url[1:]
ids.append(url)
ids = [get_id(url) for url in sorted(set(ids))]
return ids

Some files were not shown because too many files have changed in this diff Show more