cleanup pylint errors and py2/3 issues

2016-06-08 15:32:46 +02:00 · 2016-06-08 15:32:46 +02:00 · 77f8876fca
commit 77f8876fca
parent 4e7898ae57
20 changed files with 232 additions and 197 deletions
--- a/ox/file.py
+++ b/ox/file.py
@ -149,7 +149,7 @@ def oshash(filename, cached=True):
        f.close()
        returnedhash = "%016x" % hash
        return returnedhash
-    except(IOError):
+    except IOError:
        return "IOError"
 def avinfo(filename, cached=True):
@ -160,23 +160,25 @@ def avinfo(filename, cached=True):
            return ffprobe(filename)
        ffmpeg2theora = cmd('ffmpeg2theora')
        p = subprocess.Popen([ffmpeg2theora], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        info, error = p.communicate()
+        stdout, error = p.communicate()
-        version = info.split('\n')[0].split(' - ')[0].split(' ')[-1]
+        stdout = stdout.decode('utf-8')
        version = stdout.split('\n')[0].split(' - ')[0].split(' ')[-1]
        if version < '0.27':
            raise EnvironmentError('version of ffmpeg2theora needs to be 0.27 or later, found %s' % version)
        p = subprocess.Popen([ffmpeg2theora, '--info', filename],
                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        info, error = p.communicate()
+        stdout, error = p.communicate()
        try:
-            info = json.loads(info)
+            info = json.loads(stdout)
        except:
            # remove metadata, can be broken
            stdout = stdout.decode('utf-8')
            reg = re.compile('"metadata": {.*?},', re.DOTALL)
-            info = re.sub(reg, '', info)
+            stdout = re.sub(reg, '', stdout)
-            info = json.loads(info)
+            info = json.loads(stdout)
        if 'video' in info:
            for v in info['video']:
-                if not 'display_aspect_ratio' in v and 'width' in v:
+                if 'display_aspect_ratio' not in v and 'width' in v:
                    v['display_aspect_ratio'] = '%d:%d' % (v['width'], v['height'])
                    v['pixel_aspect_ratio'] = '1:1'
        if len(info.get('audio', [])) > 1:
@ -189,6 +191,7 @@ def avinfo(filename, cached=True):
                ffmpeg = cmd('ffmpeg')
                p = subprocess.Popen([ffmpeg, '-i', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                stdout, stderr = p.communicate()
                stderr = stderr.decode('utf-8')
                languages = [re.compile('\((.+?)\):').findall(l) for l in stderr.split('\n') if 'Stream' in l and 'Audio' in l]
                if len(languages) == len(info['audio']):
                    for i, stream in enumerate(info['audio']):
@ -281,13 +284,13 @@ def ffprobe(filename):
                # print s
        for v in info['video']:
            k = 'display_aspect_ratio'
-            if not k in v and 'width' in v \
+            if k not in v and 'width' in v \
                    or (k in v and v[k] == '0:1'):
                v[k] = '%d:%d' % (v['width'], v['height'])
                v['pixel_aspect_ratio'] = '1:1'
    info['oshash'] = oshash(filename)
    info['path'] = filename
-    if not 'size' in info:
+    if 'size' not in info:
        info['size'] = os.path.getsize(filename)
    return info
--- a/ox/fixunicode.py
+++ b/ox/fixunicode.py
@ -6,7 +6,7 @@ from __future__ import print_function
 import unicodedata
-from six import unichr, PY2
+from six import unichr, text_type
 __all__ = ['fix_bad_unicode']
@ -151,10 +151,7 @@ def text_badness(text):
    - Improbable single-byte characters, such as ƒ or ¬
    - Letters in somewhat rare scripts
    '''
-    if PY2:
+    assert isinstance(text, text_type)
        assert isinstance(text, unicode)
    else:
        assert isinstance(text, str)
    errors = 0
    very_weird_things = 0
    weird_things = 0
--- a/ox/format.py
+++ b/ox/format.py
@ -4,6 +4,7 @@ import math
 import re
 import string
 from six import text_type
 def toAZ(num):
    """
@ -20,7 +21,8 @@ def toAZ(num):
    >>> toAZ(1234567890)
    'CYWOQVJ'
    """
-    if num < 1: raise ValueError("must supply a positive integer")
+    if num < 1:
        raise ValueError("must supply a positive integer")
    digits = string.ascii_uppercase
    az = ''
    while num != 0:
@ -64,7 +66,8 @@ def to26(q):
    >>> to26(347485647)
    'BDGKMAP'
    """
-    if q < 0: raise ValueError("must supply a positive integer")
+    if q < 0:
        raise ValueError("must supply a positive integer")
    base26 = string.ascii_uppercase
    converted = []
    while q != 0:
@ -123,7 +126,8 @@ def to32(q):
    ValueError: must supply a positive integer
    """
-    if q < 0: raise ValueError("must supply a positive integer")
+    if q < 0:
        raise ValueError("must supply a positive integer")
    letters = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
    converted = []
    while q != 0:
@ -210,7 +214,8 @@ def to36(q):
        ...
    ValueError: must supply a positive integer
    """
-    if q < 0: raise ValueError("must supply a positive integer")
+    if q < 0:
        raise ValueError("must supply a positive integer")
    letters = "0123456789abcdefghijklmnopqrstuvwxyz"
    converted = []
    while q != 0:
@ -233,7 +238,7 @@ def int_value(strValue, default=u''):
    u''
    """
    try:
-        val = re.compile('(\d+)').findall(unicode(strValue).strip())[0]
+        val = re.compile('(\d+)').findall(text_type(strValue).strip())[0]
    except:
        val = default
    return val
@ -250,7 +255,7 @@ def float_value(strValue, default=u''):
    u''
    """
    try:
-        val = re.compile('([\d.]+)').findall(unicode(strValue).strip())[0]
+        val = re.compile('([\d.]+)').findall(text_type(strValue).strip())[0]
    except:
        val = default
    return val
@ -339,7 +344,8 @@ def plural(amount, unit, plural='s'):
    if abs(amount) != 1:
        if plural == 's':
            unit = unit + plural
-        else: unit = plural
+        else:
            unit = plural
    return "%s %s" % (format_thousands(amount), unit)
 def format_duration(ms, verbosity=0, years=True, hours=True, milliseconds=True):
@ -482,7 +488,8 @@ def time2ms(timeString):
    p = timeString.split(':')
    for i in range(len(p)):
        _p = p[i]
-        if _p.endswith('.'): _p =_p[:-1]
+        if _p.endswith('.'):
            _p = _p[:-1]
        ms = ms * 60 + float(_p)
    return int(ms * 1000)
--- a/ox/html.py
+++ b/ox/html.py
@ -18,8 +18,8 @@ DOTS = ['&middot;', '*', '\xe2\x80\xa2', '&#149;', '&bull;', '&#8226;']
 unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
 word_split_re = re.compile(r'(\s+)')
-punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \
+punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
-    ('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]),
+                            '|'.join([re.escape(x) for x in LEADING_PUNCTUATION]),
                            '|'.join([re.escape(x) for x in TRAILING_PUNCTUATION])))
 simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
 link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
@ -90,13 +90,15 @@ def add_links(text, trim_url_limit=None, nofollow=False):
        match = punctuation_re.match(word)
        if match:
            lead, middle, trail = match.groups()
-            if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \
+            if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and
-                    len(middle) > 0 and middle[0] in letters + string.digits and \
+                                             len(middle) > 0 and middle[0] in letters + string.digits and
-                    (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
+                                             (middle.endswith('.org') or
                                              middle.endswith('.net') or
                                              middle.endswith('.com'))):
                middle = '<a href="http://%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
            if middle.startswith('http://') or middle.startswith('https://'):
                middle = '<a href="%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle))
-            if '@' in middle and not middle.startswith('www.') and not ':' in middle \
+            if '@' in middle and not middle.startswith('www.') and ':' not in middle \
                    and simple_email_re.match(middle):
                middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
            if lead + middle + trail != word:
@ -127,6 +129,7 @@ def clean_html(text):
    # Trim stupid HTML such as <br clear="all">.
    text = html_gunk_re.sub('', text)
    # Convert hard-coded bullets into HTML unordered lists.
    def replace_p_tags(match):
        s = match.group().replace('</p>', '</li>')
        for d in DOTS:
@ -153,6 +156,7 @@ def decode_html(html):
    if isinstance(html, bytes):
        html = html.decode('utf-8')
    uchr = unichr
    def entitydecode(match, uchr=uchr):
        entity = match.group(1)
        if entity == '#x80':
@ -328,15 +332,14 @@ def sanitize_html(html, tags=None, global_attributes=[]):
    for tag in tags:
        valid_attributes[tag['name']] = tag.get('required', []) \
-                + tag.get('optional', []) \
+            + tag.get('optional', []) + global_attributes
                + global_attributes
        required_attributes[tag['name']] = tag.get('required', [])
        validation[tag['name']] = tag.get('validation', {})
    if '[]' in validation:
        html = re.sub(
            re.compile('\[((https?:\/\/|\/).+?) (.+?)\]', re.IGNORECASE),
-            '<a href="\\1">\\3</a>', html);
+            '<a href="\\1">\\3</a>', html)
    parts = split_tags(html)
    for i, part in enumerate(parts):
@ -351,7 +354,7 @@ def sanitize_html(html, tags=None, global_attributes=[]):
            a = attr_re.findall(attributes)
            attrs = dict(a)
-            if not closing and not name in non_closing_tags:
+            if not closing and name not in non_closing_tags:
                level += 1
            if not attrs and attributes or name not in valid_tags:
@ -361,7 +364,7 @@ def sanitize_html(html, tags=None, global_attributes=[]):
                for key in set(attrs) - set(valid_attributes[name]):
                    del attrs[key]
                for key in required_attributes[tag['name']]:
-                    if not key in attrs:
+                    if key not in attrs:
                        valid = False
            if valid:
@ -395,6 +398,7 @@ def sanitize_html(html, tags=None, global_attributes=[]):
 def split_tags(string):
    tags = []
    def collect(match):
        tags.append(match.group(0))
        return '\0'
--- a/ox/js.py
+++ b/ox/js.py
@ -11,9 +11,9 @@ def minify(source, comment=''):
        pass
    # python2 performance with unicode string is terrible
    if PY2:
-        if isinstance(source, unicode):
+        if isinstance(source, unicode):  # pylint: disable=undefined-variable
            source = source.encode('utf-8')
-        if isinstance(comment, unicode):
+        if isinstance(comment, unicode):  # pylint: disable=undefined-variable
            comment = comment.encode('utf-8')
    tokens = tokenize(source)
    length = len(tokens)
@ -30,9 +30,9 @@ def minify(source, comment=''):
            # numbers or strings or unary operators or grouping operators
            # with a single newline, otherwise remove it
            if prevToken and nextToken\
-                    and (prevToken['type'] in ['identifier', 'number', 'string']\
+                    and (prevToken['type'] in ['identifier', 'number', 'string']
                         or prevToken['value'] in ['++', '--', ')', ']', '}']) \
-                    and (nextToken['type'] in ['identifier', 'number', 'string']\
+                    and (nextToken['type'] in ['identifier', 'number', 'string']
                         or nextToken['value'] in ['+', '-', '++', '--', '~', '!', '(', '[', '{']):
                minified += '\n'
        elif token['type'] == 'whitespace':
@ -40,10 +40,10 @@ def minify(source, comment=''):
            # numbers, or between a token that ends with "+" or "-" and one that
            # begins with "+" or "-", with a single space, otherwise remove it
            if prevToken and nextToken \
-                    and ((prevToken['type'] in ['identifier', 'number']\
+                    and ((prevToken['type'] in ['identifier', 'number'] and
-                        and nextToken['type'] in ['identifier', 'number'])
+                          nextToken['type'] in ['identifier', 'number']) or
-                    or (prevToken['value'] in ['+', '-', '++', '--']
+                         (prevToken['value'] in ['+', '-', '++', '--'] and
-                        and nextToken['value'] in ['+', '-', '++', '--'])):
+                          nextToken['value'] in ['+', '-', '++', '--'])):
                minified += ' '
        elif token['type'] != 'comment':
            # remove comments and leave all other tokens untouched
@ -178,7 +178,7 @@ def tokenize(source):
            'value': value
        })
        if type == 'comment':
-            lines = value.split('\n');
+            lines = value.split('\n')
            column = len(lines[-1])
            line += len(lines) - 1
        elif type == 'linebreak':
--- a/ox/jsonc.py
+++ b/ox/jsonc.py
@ -23,7 +23,7 @@ def loads(source):
        try:
            m = re.search(r'line (\d+) column (\d+)', msg)
            if m:
-                (lineno, colno) = map(int, m.groups())
+                (lineno, colno) = [int(n) for n in m.groups()]
        except:
            pass
        if lineno and colno:
--- a/ox/normalize.py
+++ b/ox/normalize.py
@ -18,7 +18,8 @@ _articles = ('the', 'la', 'a', 'die', 'der', 'le', 'el',
 _articlesDict = dict([(x, x) for x in _articles])
 _spArticles = []
 for article in _articles:
-    if article[-1] not in ("'", '-'): article += ' '
+    if article[-1] not in ("'", '-'):
        article += ' '
    _spArticles.append(article)
 _noarticles = (
@ -50,8 +51,10 @@ def canonical_title(title):
    'Los Angeles Plays Itself'
    """
    try:
-        if _articlesDict.has_key(title.split(', ')[-1].lower()): return title
+        if title.split(', ')[-1].lower() in _articlesDict:
-    except IndexError: pass
+            return title
    except IndexError:
        pass
    ltitle = title.lower()
    for start in _noarticles:
        if ltitle.startswith(start):
@ -60,7 +63,8 @@ def canonical_title(title):
        if ltitle.startswith(article):
            lart = len(article)
            title = '%s, %s' % (title[lart:], title[:lart])
-            if article[-1] == ' ': title = title[:-1]
+            if article[-1] == ' ':
                title = title[:-1]
            break
    ## XXX: an attempt using a dictionary lookup.
    ##for artSeparator in (' ', "'", '-'):
@ -82,9 +86,10 @@ def normalize_title(title):
    'The Movie Title'
    """
    stitle = title.split(', ')
-    if len(stitle) > 1 and _articlesDict.has_key(stitle[-1].lower()):
+    if len(stitle) > 1 and stitle[-1].lower() in _articlesDict:
        sep = ' '
        if stitle[-1][-1] in ("'", '-'):
            sep = ''
        if stitle[-1][-1] in ("'", '-'): sep = ''
        title = '%s%s%s' % (stitle[-1], sep, ', '.join(stitle[:-1]))
    return title
@ -139,7 +144,8 @@ def canonical_name(name):
    # Don't convert names already in the canonical format.
    if name in ('Unknown Director', ):
        return name
-    if name.find(', ') != -1: return name
+    if name.find(', ') != -1:
        return name
    sname = name.split(' ')
    snl = len(sname)
    if snl == 2:
@ -147,11 +153,14 @@ def canonical_name(name):
        name = '%s, %s' % (sname[1], sname[0])
    elif snl > 2:
        lsname = [x.lower() for x in sname]
-        if snl == 3: _indexes = (0, snl-2)
+        if snl == 3:
-        else: _indexes = (0, snl-2, snl-3)
+            _indexes = (0, snl-2)
        else:
            _indexes = (0, snl-2, snl-3)
        # Check for common surname prefixes at the beginning and near the end.
        for index in _indexes:
-            if lsname[index] not in _sname_suffixes: continue
+            if lsname[index] not in _sname_suffixes:
                continue
            try:
                # Build the surname.
                surn = '%s %s' % (sname[index], sname[index+1])
@ -194,11 +203,12 @@ def normalize_name(name):
 def normalize_path(path):
    path = path.replace(':', '_').replace('/', '_')
-    if path.endswith('.'): path = path[:-1] + '_'
+    if path.endswith('.'):
        path = path[:-1] + '_'
    return path
 def strip_accents(s):
    if isinstance(s, str):
-        s = unicode(s)
+        s = s.decode('utf-8')
    return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
--- a/ox/oembed.py
+++ b/ox/oembed.py
@ -6,13 +6,16 @@ from . import cache
 from .text import find_re
 from .utils import json, ET
 def get_embed_code(url, maxwidth=None, maxheight=None):
    embed = {}
    header = cache.get_headers(url)
    if header.get('content-type', '').startswith('text/html'):
        html = cache.read_url(url)
-        json_oembed = filter(lambda l: 'json+oembed' in l, re.compile('<link.*?>').findall(html))
+        links = re.compile('<link.*?>').findall(html)
-        xml_oembed = filter(lambda l: 'xml+oembed' in l, re.compile('<link.*?>').findall(html))
+        json_oembed = [l for l in links if 'json+oembed' in l]
        xml_oembed = [l for l in links if 'xml+oembed' in l]
        if json_oembed:
            oembed_url = find_re(json_oembed[0], 'href="(.*?)"')
            if maxwidth:
@ -21,7 +24,7 @@ def get_embed_code(url, maxwidth=None, maxheight=None):
                oembed_url += '&maxheight=%d' % maxheight
            embed = json.loads(cache.read_url(oembed_url))
        elif xml_oembed:
-            oembed_url = find_re(json_oembed[0], 'href="(.*?)"')
+            oembed_url = find_re(xml_oembed[0], 'href="(.*?)"')
            if maxwidth:
                oembed_url += '&maxwidth=%d' % maxwidth
            if maxheight:
--- a/ox/torrent/bencode3.py
+++ b/ox/torrent/bencode3.py
@ -21,7 +21,7 @@ def _decode_str(data):
    start = data.index(b':')
    l = int(data[:start].decode(), 10)
    if l <= 0:
-        raise Exception('invalid string size: %d'%d)
+        raise Exception('invalid string size: %d' % l)
    start += 1
    ret = bytes(data[start:start+l])
    data = data[start+l:]
--- a/ox/web/apple.py
+++ b/ox/web/apple.py
@ -2,6 +2,7 @@ from __future__ import print_function
 import json
 import re
 from six import text_type
 from ox.cache import read_url
 HEADERS = {
@ -16,9 +17,9 @@ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7) '
 USER_AGENT += 'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 Safari/534.48.3'
 def get_movie_data(title, director):
-    if isinstance(title, unicode):
+    if isinstance(title, text_type):
        title = title.encode('utf-8')
-    if isinstance(director, unicode):
+    if isinstance(director, text_type):
        director = director.encode('utf-8')
    data = {}
    # itunes section (preferred source for link)
@ -45,7 +46,7 @@ def get_movie_data(title, director):
    results = js['results']
    if results:
        url = host + results[0]['location']
-        if not 'link' in data:
+        if 'link' not in data:
            data['link'] = url
        headers = {
            'User-Agent': USER_AGENT
--- a/ox/web/epguides.py
+++ b/ox/web/epguides.py
@ -7,7 +7,7 @@ import time
 from ox import strip_tags, find_re
 from ox.cache import read_url
-import google
+from . import google
 def get_show_url(title):
--- a/ox/web/metacritic.py
+++ b/ox/web/metacritic.py
@ -28,22 +28,32 @@ def get_show_url(title):
 def get_data(url):
    data = read_url(url, unicode=True)
    doc = document_fromstring(data)
-    score = filter(lambda s: s.attrib.get('property') == 'v:average',
+    score = [s for s in doc.xpath('//span[@class="score_value"]')
-                   doc.xpath('//span[@class="score_value"]'))
+             if s.attrib.get('property') == 'v:average']
    if score:
        score = int(score[0].text)
    else:
        score = -1
-    authors = [a.text
+    authors = [
-        for a in doc.xpath('//div[@class="review_content"]//div[@class="author"]//a')]
+        a.text
-    sources = [d.text
+        for a in doc.xpath('//div[@class="review_content"]//div[@class="author"]//a')
-        for d in doc.xpath('//div[@class="review_content"]//div[@class="source"]/a')]
+    ]
-    reviews = [d.text
+    sources = [
-        for d in doc.xpath('//div[@class="review_content"]//div[@class="review_body"]')]
+        d.text
-    scores = [int(d.text.strip())
+        for d in doc.xpath('//div[@class="review_content"]//div[@class="source"]/a')
-        for d in doc.xpath('//div[@class="review_content"]//div[contains(@class, "critscore")]')]
+    ]
-    urls = [a.attrib['href']
+    reviews = [
-        for a in doc.xpath('//div[@class="review_content"]//a[contains(@class, "external")]')]
+        d.text
        for d in doc.xpath('//div[@class="review_content"]//div[@class="review_body"]')
    ]
    scores = [
        int(d.text.strip())
        for d in doc.xpath('//div[@class="review_content"]//div[contains(@class, "critscore")]')
    ]
    urls = [
        a.attrib['href']
        for a in doc.xpath('//div[@class="review_content"]//a[contains(@class, "external")]')
    ]
    metacritics = []
    for i in range(len(authors)):
--- a/ox/web/rottentomatoes.py
+++ b/ox/web/rottentomatoes.py
@ -32,7 +32,7 @@ def get_data(url):
        r['summary'] = get_og(data, 'description')
    meter = re.compile('<span id="all-critics-meter" class="meter(.*?)">(.*?)</span>').findall(data)
-    meter = filter(lambda m: m[1].isdigit(), meter)
+    meter = [m for m in meter if m[1].isdigit()]
    if meter:
        r['tomatometer'] = meter[0][1]
    r['rating'] = find_re(data, 'Average Rating: <span>([\d.]+)/10</span>')
--- a/ox/web/spiegel.py
+++ b/ox/web/spiegel.py
@ -95,7 +95,7 @@ def format_subsection(string):
        'ussports': 'US-Sports',
        'wunderbar': 'wunderBAR'
    }
-    if subsection.has_key(string):
+    if string in subsection:
        return subsection[string].replace(u'\xc3', 'ae')
    return string[:1].upper() + string[1:]
@ -219,8 +219,8 @@ def archive_news():
            else:
                dMax = days[m]
            for d in range(dMax, 0, -1):
-                print('getNews(%d, %d, %d)' % (y, m, d))
+                print('get_news(%d, %d, %d)' % (y, m, d))
-                news = getNews(y, m ,d)
+                news = get_news(y, m, d)
                for new in news:
                    dirname = archivePath + '/' + new['date'][0:4] + '/' + new['date'][5:7] + new['date'][8:10] + '/' + new['date'][11:13] + new['date'][14:16]
                    if not os.path.exists(dirname):
@ -253,7 +253,7 @@ def archive_news():
                    string = strings[3]
                    if len(strings) == 6:
                        string += '/' + strings[4]
-                    if not count.has_key(string):
+                    if string not in count:
                        count[string] = {'count': 1, 'string': '%s %s http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (new['date'], new['date'], new['section'].lower(), y, int(datetime(y, m, d).strftime('%j')))}
                    else:
                        count[string] = {'count': count[string]['count'] + 1, 'string': '%s %s' % (new['date'], count[string]['string'][17:])}
@ -269,12 +269,12 @@ if __name__ == '__main__':
    # spiegel = Spiegel(2008, 8)
    # print(spiegel.getContents())
    # news = News(2001, 9, 10)
-    # output(news.getNews())
+    # output(news.get_news())
    '''
    x = []
    for d in range(10, 30):
        print('2/%d' % d)
-        news = getNews(2008, 2, d)
+        news = get_news(2008, 2, d)
        for new in news:
            strings = new['url'].split('/')
            string = format_section(strings[3])