52 lines
2.6 KiB
Cython
52 lines
2.6 KiB
Cython
|
from libc.string cimport const_char
|
||
|
|
||
|
from lxml.includes.tree cimport xmlDoc, xmlDict
|
||
|
from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback
|
||
|
from lxml.includes.xmlparser cimport xmlParserCtxt, xmlSAXHandler, xmlSAXHandlerV1
|
||
|
from lxml.includes.xmlerror cimport xmlError
|
||
|
|
||
|
cdef extern from "libxml/HTMLparser.h":
|
||
|
ctypedef enum htmlParserOption:
|
||
|
HTML_PARSE_NOERROR # suppress error reports
|
||
|
HTML_PARSE_NOWARNING # suppress warning reports
|
||
|
HTML_PARSE_PEDANTIC # pedantic error reporting
|
||
|
HTML_PARSE_NOBLANKS # remove blank nodes
|
||
|
HTML_PARSE_NONET # Forbid network access
|
||
|
# libxml2 2.6.21+ only:
|
||
|
HTML_PARSE_RECOVER # Relaxed parsing
|
||
|
HTML_PARSE_COMPACT # compact small text nodes
|
||
|
|
||
|
xmlSAXHandlerV1 htmlDefaultSAXHandler
|
||
|
|
||
|
cdef xmlParserCtxt* htmlCreateMemoryParserCtxt(
|
||
|
char* buffer, int size) nogil
|
||
|
cdef xmlParserCtxt* htmlCreateFileParserCtxt(
|
||
|
char* filename, char* encoding) nogil
|
||
|
cdef xmlParserCtxt* htmlCreatePushParserCtxt(xmlSAXHandler* sax,
|
||
|
void* user_data,
|
||
|
char* chunk, int size,
|
||
|
char* filename, int enc) nogil
|
||
|
cdef void htmlFreeParserCtxt(xmlParserCtxt* ctxt) nogil
|
||
|
cdef void htmlCtxtReset(xmlParserCtxt* ctxt) nogil
|
||
|
cdef int htmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) nogil
|
||
|
cdef int htmlParseDocument(xmlParserCtxt* ctxt) nogil
|
||
|
cdef int htmlParseChunk(xmlParserCtxt* ctxt,
|
||
|
char* chunk, int size, int terminate) nogil
|
||
|
|
||
|
cdef xmlDoc* htmlCtxtReadFile(xmlParserCtxt* ctxt,
|
||
|
char* filename, const_char* encoding,
|
||
|
int options) nogil
|
||
|
cdef xmlDoc* htmlCtxtReadDoc(xmlParserCtxt* ctxt,
|
||
|
char* buffer, char* URL, const_char* encoding,
|
||
|
int options) nogil
|
||
|
cdef xmlDoc* htmlCtxtReadIO(xmlParserCtxt* ctxt,
|
||
|
xmlInputReadCallback ioread,
|
||
|
xmlInputCloseCallback ioclose,
|
||
|
void* ioctx,
|
||
|
char* URL, const_char* encoding,
|
||
|
int options) nogil
|
||
|
cdef xmlDoc* htmlCtxtReadMemory(xmlParserCtxt* ctxt,
|
||
|
char* buffer, int size,
|
||
|
char* filename, const_char* encoding,
|
||
|
int options) nogil
|