python-ox/ox/js.py

184 lines
6.8 KiB
Python
Raw Permalink Normal View History

2011-10-06 20:05:01 +00:00
#!/usr/bin/python
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
2014-10-01 08:48:06 +00:00
from .utils import json
2011-10-12 10:19:57 +00:00
2011-10-06 20:05:01 +00:00
def minify(source, comment=''):
# see https://github.com/douglascrockford/JSMin/blob/master/README
def get_next_non_whitespace_token():
pass
tokens = tokenize(source)
length = len(tokens)
minified = '/*' + comment + '*/' if comment else ''
for i, token in enumerate(tokens):
if token['type'] in ['linebreak', 'whitespace']:
prevToken = None if i == 0 else tokens[i - 1]
next = i + 1
while next < length and tokens[next]['type'] in ['comment', 'linebreak', 'whitespace']:
next += 1
nextToken = None if next == length else tokens[next]
if token['type'] == 'linebreak':
# replace a linebreak between two tokens that are identifiers or
# numbers or strings or unary operators or grouping operators
# with a single newline, otherwise remove it
if prevToken and nextToken\
2016-06-08 13:32:46 +00:00
and (prevToken['type'] in ['identifier', 'number', 'string']
or prevToken['value'] in ['++', '--', ')', ']', '}']) \
and (nextToken['type'] in ['identifier', 'number', 'string']
or nextToken['value'] in ['+', '-', '++', '--', '~', '!', '(', '[', '{']):
2011-10-06 20:05:01 +00:00
minified += '\n'
elif token['type'] == 'whitespace':
# replace whitespace between two tokens that are identifiers or
# numbers, or between a token that ends with "+" or "-" and one that
# begins with "+" or "-", with a single space, otherwise remove it
2016-06-08 13:32:46 +00:00
if prevToken and nextToken \
and ((prevToken['type'] in ['identifier', 'number'] and
nextToken['type'] in ['identifier', 'number']) or
(prevToken['value'] in ['+', '-', '++', '--'] and
nextToken['value'] in ['+', '-', '++', '--'])):
2011-10-06 20:05:01 +00:00
minified += ' '
elif token['type'] != 'comment':
# remove comments and leave all other tokens untouched
minified += token['value']
return minified
2011-10-12 10:19:57 +00:00
def parse_JSONC(source):
return json.loads(minify(source))
2011-10-06 20:05:01 +00:00
def tokenize(source):
# see https://github.com/mozilla/narcissus/blob/master/lib/jslex.js
IDENTIFIER = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_'
2012-05-26 10:52:49 +00:00
KEYWORD = [
'break',
'case', 'catch', 'class', 'const', 'continue',
'debugger', 'default', 'delete', 'do',
'else', 'enum', 'export', 'extends',
'finally', 'for', 'function',
'if', 'implements', 'import', 'in', 'instanceof', 'interface',
'let', 'module',
'new',
'package', 'private', 'protected', 'public',
'return',
'super', 'switch', 'static',
'this', 'throw', 'try', 'typeof',
'var', 'void',
'yield',
'while', 'with'
]
2011-10-06 20:05:01 +00:00
LINEBREAK = '\n\r'
NUMBER = '01234567890'
OPERATOR = [
# arithmetic
'+', '-', '*', '/', '%', '++', '--',
# assignment
'=', '+=', '-=', '*=', '/=', '%=',
'&=', '|=', '^=', '<<=', '>>=', '>>>=',
# bitwise
'&', '|', '^', '~', '<<', '>>', '>>>',
# comparison
'==', '!=', '===', '!==', '>', '>=', '<', '<=',
# conditional
'?', ':',
# grouping
'(', ')', '[', ']', '{', '}',
# logical
'&&', '||', '!',
# other
'.', ',', ';'
]
REGEXP = 'abcdefghijklmnopqrstuvwxyz'
2020-05-26 08:22:58 +00:00
STRING = '\'"`'
2011-10-06 20:05:01 +00:00
WHITESPACE = ' \t'
def is_regexp():
# checks if a forward slash is the beginning of a regexp,
# as opposed to the beginning of an operator
i = len(tokens) - 1
# scan back to the previous significant token,
# or to the beginnig of the source
while i >= 0 and tokens[i]['type'] in ['comment', 'linebreak', 'whitespace']:
i -= 1
if i == -1:
# source begins with forward slash
is_regexp = True
else:
token = tokens[i]
is_regexp = (
2012-05-27 10:59:15 +00:00
token['type'] == 'identifier' and token['value'] in KEYWORD
2011-10-06 20:05:01 +00:00
) or (
token['type'] == 'operator' and not token['value'] in ['++', '--', ')', ']', '}']
)
return is_regexp
column = 1
2011-10-06 20:05:01 +00:00
cursor = 0
length = len(source)
tokens = []
line = 1
2011-10-06 20:05:01 +00:00
while cursor < length:
char = source[cursor]
start = cursor
cursor += 1
if char == '/' and cursor < length - 1 and source[cursor] in '/*':
type = 'comment'
cursor += 1
while cursor < length:
cursor += 1
if source[start + 1] == '/' and source[cursor] == '\n':
break
elif source[start + 1] == '*' and source[cursor:cursor + 2] == '*/':
cursor += 2
break
elif char in IDENTIFIER:
type = 'identifier'
while cursor < length and source[cursor] in IDENTIFIER + NUMBER:
cursor += 1
elif char in LINEBREAK:
type = 'linebreak'
while cursor < length and source[cursor] in LINEBREAK:
cursor += 1
elif char in NUMBER:
type = 'number'
while cursor < length and source[cursor] in NUMBER + '.':
cursor += 1
elif char == '/' and is_regexp():
type = 'regexp'
while cursor < length and source[cursor] != '/':
cursor += (2 if source[cursor] == '\\' else 1)
cursor += 1
while cursor < length and source[cursor] in REGEXP:
cursor += 1
elif char in OPERATOR:
type = 'operator'
if cursor < length:
string = char + source[cursor]
while cursor < length and string in OPERATOR:
cursor += 1
string += source[cursor]
2011-10-06 20:05:01 +00:00
elif char in STRING:
type = 'string'
while cursor < length and source[cursor] != source[start]:
cursor += (2 if source[cursor] == '\\' else 1)
cursor += 1
elif char in WHITESPACE:
type = 'whitespace'
while cursor < length and source[cursor] in WHITESPACE:
cursor += 1
value = source[start:cursor]
2011-10-06 20:05:01 +00:00
tokens.append({
'column': column,
'line': line,
2011-10-06 20:05:01 +00:00
'type': type,
'value': value
2011-10-06 20:05:01 +00:00
})
if type == 'comment':
2016-06-08 13:32:46 +00:00
lines = value.split('\n')
column = len(lines[-1])
line += len(lines) - 1
elif type == 'linebreak':
column = 1
column = 1
line += len(value)
else:
column += len(value)
return tokens