better ubu parser
This commit is contained in:
parent
47bdf3c897
commit
5c883e19e6
1 changed files with 6 additions and 2 deletions
|
@ -45,6 +45,8 @@ def get_data(url):
|
|||
m['title'] = match[0].strip()
|
||||
if ' - ' in m['title']:
|
||||
m['title'] = m['title'].split(' - ', 1)[-1]
|
||||
if 'title' in m:
|
||||
m['title'] = strip_tags(decode_html(m['title']).strip())
|
||||
match = re.compile("flashvars','file=(.*?.flv)'").findall(data)
|
||||
if match:
|
||||
m['flv'] = match[0]
|
||||
|
@ -52,8 +54,10 @@ def get_data(url):
|
|||
|
||||
match = re.compile('''src=(.*?) type="video/mp4"''').findall(data)
|
||||
if match:
|
||||
m['mp4'] = match[0].strip('"').strip("'")
|
||||
elif 'video' in m and m['video'].endswith('.mp4'):
|
||||
m['mp4'] = match[0].strip('"').strip("'").replace(' ', '%20')
|
||||
if not m['mp4'].startswith('http'):
|
||||
m['mp4'] = 'http://ubumexico.centro.org.mx/video/' + m['mp4']
|
||||
elif 'video' in m and (m['video'].endswith('.mp4') or m['video'].endswith('.m4v')):
|
||||
m['mp4'] = m['video']
|
||||
|
||||
doc = lxml.html.document_fromstring(read_url(url))
|
||||
|
|
Loading…
Reference in a new issue