python-ox/ox/js.py

#!/usr/bin/python
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4

def minify(source, comment=''):
    # see https://github.com/douglascrockford/JSMin/blob/master/README
    def get_next_non_whitespace_token():
        pass
    tokens = tokenize(source)
    length = len(tokens)
    minified = '/*' + comment + '*/' if comment else ''
    for i, token in enumerate(tokens):
        if token['type'] in ['linebreak', 'whitespace']:
            prevToken = None if i == 0 else tokens[i - 1]
            next = i + 1
            while next < length and tokens[next]['type'] in ['comment', 'linebreak', 'whitespace']:
                next += 1
            nextToken = None if next == length else tokens[next]            
        if token['type'] == 'linebreak':
            # replace a linebreak between two tokens that are identifiers or
            # numbers or strings or unary operators or grouping operators
            # with a single newline, otherwise remove it
            if prevToken and nextToken\
                    and (prevToken['type'] in ['identifier', 'number', 'string']\
                        or prevToken['value'] in ['++', '--', ')', ']', '}'])\
                    and (nextToken['type'] in ['identifier', 'number', 'string']\
                        or nextToken['value'] in ['+', '-', '++', '--', '~', '!', '(', '[', '{']):
                minified += '\n'
        elif token['type'] == 'whitespace':
            # replace whitespace between two tokens that are identifiers or
            # numbers, or between a token that ends with "+" or "-" and one that
            # begins with "+" or "-", with a single space, otherwise remove it
            if prevToken and nextToken\
                    and ((prevToken['type'] in ['identifier', 'number']\
                        and nextToken['type'] in ['identifier', 'number'])
                    or (prevToken['value'] in ['+', '-', '++', '--']
                        and nextToken['value'] in ['+', '-', '++', '--'])):
                minified += ' '
        elif token['type'] != 'comment':
            # remove comments and leave all other tokens untouched
            minified += token['value']
    return minified

def tokenize(source):
    # see https://github.com/mozilla/narcissus/blob/master/lib/jslex.js
    IDENTIFIER = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_'
    LINEBREAK = '\n\r'
    NUMBER = '01234567890'
    OPERATOR = [
        # arithmetic
        '+', '-', '*', '/', '%', '++', '--',
        # assignment
        '=', '+=', '-=', '*=', '/=', '%=',
        '&=', '|=', '^=', '<<=', '>>=', '>>>=',
        # bitwise
        '&', '|', '^', '~', '<<', '>>', '>>>',
        # comparison
        '==', '!=', '===', '!==', '>', '>=', '<', '<=',
        # conditional
        '?', ':',
        # grouping
        '(', ')', '[', ']', '{', '}',
        # logical
        '&&', '||', '!',
        # other
        '.', ',', ';'
    ]
    REGEXP = 'abcdefghijklmnopqrstuvwxyz'
    STRING = '\'"'
    WHITESPACE = ' \t'
    def is_regexp():
        # checks if a forward slash is the beginning of a regexp,
        # as opposed to the beginning of an operator
        i = len(tokens) - 1
        # scan back to the previous significant token,
        # or to the beginnig of the source
        while i >= 0 and tokens[i]['type'] in ['comment', 'linebreak', 'whitespace']:
            i -= 1
        if i == -1:
            # source begins with forward slash
            is_regexp = True
        else:
            token = tokens[i]
            is_regexp = (
                token['type'] == 'keyword' and not token['value'] in ['false', 'null', 'true']
            ) or (
                token['type'] == 'operator' and not token['value'] in ['++', '--', ')', ']', '}']
            )
        return is_regexp
    cursor = 0
    length = len(source)
    tokens = []
    while cursor < length:
        char = source[cursor]
        start = cursor
        cursor += 1
        if char == '/' and cursor < length - 1 and source[cursor] in '/*':
            type = 'comment'
            cursor += 1
            while cursor < length:
                cursor += 1
                if source[start + 1] == '/' and source[cursor] == '\n':
                    break
                elif source[start + 1] == '*' and source[cursor:cursor + 2] == '*/':
                    cursor += 2
                    break
        elif char in IDENTIFIER:
            type = 'identifier'
            while cursor < length and source[cursor] in IDENTIFIER + NUMBER:
                cursor += 1
        elif char in LINEBREAK:
            type = 'linebreak'
            while cursor < length and source[cursor] in LINEBREAK:
                cursor += 1
        elif char in NUMBER:
            type = 'number'
            while cursor < length and source[cursor] in NUMBER + '.':
                cursor += 1
        elif char == '/' and is_regexp():
            type = 'regexp'
            while cursor < length and source[cursor] != '/':
                cursor += (2 if source[cursor] == '\\' else 1)
            cursor += 1
            while cursor < length and source[cursor] in REGEXP:
                cursor += 1
        elif char in OPERATOR:
            type = 'operator'
            string = char + source[cursor]
            while cursor < length and string in OPERATOR:
                cursor += 1
                string += source[cursor]
        elif char in STRING:
            type = 'string'
            while cursor < length and source[cursor] != source[start]:
                cursor += (2 if source[cursor] == '\\' else 1)
            cursor += 1
        elif char in WHITESPACE:
            type = 'whitespace'
            while cursor < length and source[cursor] in WHITESPACE:
                cursor += 1
        tokens.append({
            'type': type,
            'value': source[start:cursor]
        })
    return tokens
add js module (tokenize, minify) 2011-10-06 20:05:01 +00:00			`#!/usr/bin/python`
			`# -- coding: utf-8 --`
			`# vi:si:et:sw=4:sts=4:ts=4`

			`def minify(source, comment=''):`
			`# see https://github.com/douglascrockford/JSMin/blob/master/README`
			`def get_next_non_whitespace_token():`
			`pass`
			`tokens = tokenize(source)`
			`length = len(tokens)`
			`minified = '/' + comment + '/' if comment else ''`
			`for i, token in enumerate(tokens):`
			`if token['type'] in ['linebreak', 'whitespace']:`
			`prevToken = None if i == 0 else tokens[i - 1]`
			`next = i + 1`
			`while next < length and tokens[next]['type'] in ['comment', 'linebreak', 'whitespace']:`
			`next += 1`
			`nextToken = None if next == length else tokens[next]`
			`if token['type'] == 'linebreak':`
			`# replace a linebreak between two tokens that are identifiers or`
			`# numbers or strings or unary operators or grouping operators`
			`# with a single newline, otherwise remove it`
			`if prevToken and nextToken\`
			`and (prevToken['type'] in ['identifier', 'number', 'string']\`
			`or prevToken['value'] in ['++', '--', ')', ']', '}'])\`
			`and (nextToken['type'] in ['identifier', 'number', 'string']\`
			`or nextToken['value'] in ['+', '-', '++', '--', '~', '!', '(', '[', '{']):`
			`minified += '\n'`
			`elif token['type'] == 'whitespace':`
			`# replace whitespace between two tokens that are identifiers or`
			`# numbers, or between a token that ends with "+" or "-" and one that`
			`# begins with "+" or "-", with a single space, otherwise remove it`
			`if prevToken and nextToken\`
			`and ((prevToken['type'] in ['identifier', 'number']\`
			`and nextToken['type'] in ['identifier', 'number'])`
			`or (prevToken['value'] in ['+', '-', '++', '--']`
			`and nextToken['value'] in ['+', '-', '++', '--'])):`
			`minified += ' '`
			`elif token['type'] != 'comment':`
			`# remove comments and leave all other tokens untouched`
			`minified += token['value']`
			`return minified`

			`def tokenize(source):`
			`# see https://github.com/mozilla/narcissus/blob/master/lib/jslex.js`
			`IDENTIFIER = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_'`
			`LINEBREAK = '\n\r'`
			`NUMBER = '01234567890'`
			`OPERATOR = [`
			`# arithmetic`
			`'+', '-', '*', '/', '%', '++', '--',`
			`# assignment`
			`'=', '+=', '-=', '*=', '/=', '%=',`
			`'&=', '\|=', '^=', '<<=', '>>=', '>>>=',`
			`# bitwise`
			`'&', '\|', '^', '~', '<<', '>>', '>>>',`
			`# comparison`
			`'==', '!=', '===', '!==', '>', '>=', '<', '<=',`
			`# conditional`
			`'?', ':',`
			`# grouping`
			`'(', ')', '[', ']', '{', '}',`
			`# logical`
			`'&&', '\|\|', '!',`
			`# other`
			`'.', ',', ';'`
			`]`
			`REGEXP = 'abcdefghijklmnopqrstuvwxyz'`
			`STRING = '\'"'`
			`WHITESPACE = ' \t'`
			`def is_regexp():`
			`# checks if a forward slash is the beginning of a regexp,`
			`# as opposed to the beginning of an operator`
			`i = len(tokens) - 1`
			`# scan back to the previous significant token,`
			`# or to the beginnig of the source`
			`while i >= 0 and tokens[i]['type'] in ['comment', 'linebreak', 'whitespace']:`
			`i -= 1`
			`if i == -1:`
			`# source begins with forward slash`
			`is_regexp = True`
			`else:`
			`token = tokens[i]`
			`is_regexp = (`
			`token['type'] == 'keyword' and not token['value'] in ['false', 'null', 'true']`
			`) or (`
			`token['type'] == 'operator' and not token['value'] in ['++', '--', ')', ']', '}']`
			`)`
			`return is_regexp`
			`cursor = 0`
			`length = len(source)`
			`tokens = []`
			`while cursor < length:`
			`char = source[cursor]`
			`start = cursor`
			`cursor += 1`
			`if char == '/' and cursor < length - 1 and source[cursor] in '/*':`
			`type = 'comment'`
			`cursor += 1`
			`while cursor < length:`
			`cursor += 1`
			`if source[start + 1] == '/' and source[cursor] == '\n':`
			`break`
			`elif source[start + 1] == '' and source[cursor:cursor + 2] == '/':`
			`cursor += 2`
			`break`
			`elif char in IDENTIFIER:`
			`type = 'identifier'`
			`while cursor < length and source[cursor] in IDENTIFIER + NUMBER:`
			`cursor += 1`
			`elif char in LINEBREAK:`
			`type = 'linebreak'`
			`while cursor < length and source[cursor] in LINEBREAK:`
			`cursor += 1`
			`elif char in NUMBER:`
			`type = 'number'`
			`while cursor < length and source[cursor] in NUMBER + '.':`
			`cursor += 1`
			`elif char == '/' and is_regexp():`
			`type = 'regexp'`
			`while cursor < length and source[cursor] != '/':`
			`cursor += (2 if source[cursor] == '\\' else 1)`
			`cursor += 1`
			`while cursor < length and source[cursor] in REGEXP:`
			`cursor += 1`
			`elif char in OPERATOR:`
			`type = 'operator'`
			`string = char + source[cursor]`
			`while cursor < length and string in OPERATOR:`
			`cursor += 1`
			`string += source[cursor]`
			`elif char in STRING:`
			`type = 'string'`
			`while cursor < length and source[cursor] != source[start]:`
			`cursor += (2 if source[cursor] == '\\' else 1)`
			`cursor += 1`
			`elif char in WHITESPACE:`
			`type = 'whitespace'`
			`while cursor < length and source[cursor] in WHITESPACE:`
			`cursor += 1`
			`tokens.append({`
			`'type': type,`
			`'value': source[start:cursor]`
			`})`
			`return tokens`