better ubu parser
This commit is contained in:
parent
47bdf3c897
commit
5c883e19e6
1 changed files with 6 additions and 2 deletions
|
@ -45,6 +45,8 @@ def get_data(url):
|
||||||
m['title'] = match[0].strip()
|
m['title'] = match[0].strip()
|
||||||
if ' - ' in m['title']:
|
if ' - ' in m['title']:
|
||||||
m['title'] = m['title'].split(' - ', 1)[-1]
|
m['title'] = m['title'].split(' - ', 1)[-1]
|
||||||
|
if 'title' in m:
|
||||||
|
m['title'] = strip_tags(decode_html(m['title']).strip())
|
||||||
match = re.compile("flashvars','file=(.*?.flv)'").findall(data)
|
match = re.compile("flashvars','file=(.*?.flv)'").findall(data)
|
||||||
if match:
|
if match:
|
||||||
m['flv'] = match[0]
|
m['flv'] = match[0]
|
||||||
|
@ -52,8 +54,10 @@ def get_data(url):
|
||||||
|
|
||||||
match = re.compile('''src=(.*?) type="video/mp4"''').findall(data)
|
match = re.compile('''src=(.*?) type="video/mp4"''').findall(data)
|
||||||
if match:
|
if match:
|
||||||
m['mp4'] = match[0].strip('"').strip("'")
|
m['mp4'] = match[0].strip('"').strip("'").replace(' ', '%20')
|
||||||
elif 'video' in m and m['video'].endswith('.mp4'):
|
if not m['mp4'].startswith('http'):
|
||||||
|
m['mp4'] = 'http://ubumexico.centro.org.mx/video/' + m['mp4']
|
||||||
|
elif 'video' in m and (m['video'].endswith('.mp4') or m['video'].endswith('.m4v')):
|
||||||
m['mp4'] = m['video']
|
m['mp4'] = m['video']
|
||||||
|
|
||||||
doc = lxml.html.document_fromstring(read_url(url))
|
doc = lxml.html.document_fromstring(read_url(url))
|
||||||
|
|
Loading…
Reference in a new issue