oxjs/source/Ox/js/String.js

'use strict';

Ox.basename = function(string) {
    /*
    fixme: deprecate
    >>> Ox.basename("foo/bar/foo.bar")
    "foo.bar"
    >>> Ox.basename("foo.bar")
    "foo.bar"
    */
    return string.replace(/^.*[\/\\]/g, '');
};

/*@
Ox.char <f> Alias for String.fromCharCode
@*/
Ox.char = String.fromCharCode;

/*@
Ox.clean <f> Remove leading, trailing and double whitespace from a string
    > Ox.clean("foo  bar")
    "foo bar"
    > Ox.clean(" foo  bar ")
    "foo bar"
    > Ox.clean(" foo \n bar ")
    "foo\nbar"
    > Ox.clean(" \nfoo\n\nbar\n ")
    "foo\nbar"
    > Ox.clean(" foo\tbar ")
    "foo bar"
@*/
Ox.clean = function(string) {
    return Ox.filter(Ox.map(string.split('\n'), function(string) {
        return string.replace(/\s+/g, ' ').trim() || '';
    })).join('\n');
};

/*@
Ox.endsWith <f> Checks if a string ends with a given substring
    If the substring is a string literal (and not a variable),
    <code>/sub$/.test(str)</code> or <code>!!/sub$/.exec(str)</code>
    is shorter than <code>Ox.ends(str, sub)</code>.
    > Ox.endsWith('foobar', 'bar')
    true
@*/
Ox.ends = Ox.endsWith = function(string, substring) {
    // fixme: rename to ends
    string = string.toString();
    substring = substring.toString();
    return string.slice(string.length - substring.length) == substring;
};

/*@
Ox.highlight <f> Highlight matches in a string
    > Ox.highlight('foobar', 'foo', 'match')
    '<span class="match">foo</span>bar'
@*/
// fixme: with regexp, special chars have to be escaped
Ox.highlight = function(txt, str, classname) {
    return str && str.length ? txt.replace(
        new RegExp('(' + str + ')', 'ig'),
        '<span class="' + classname + '">$1</span>'
    ) : txt;
};

/*@
Ox.highlightHTML <f> Highlight matches in an HTML string
    > Ox.highlightHTML('<b>foo</b>bar', 'foobar', 'h')
    '<b><span class="match">foo</span></b><span class="h">bar</span>'
    > Ox.highlightHTML('<a href="/foo">foo</a>bar', 'foobar', 'h')
    '<a href="/foo"><span class="h">foo</span></a><span class="h">bar</span>'
    > Ox.highlightHTML('foo<br>bar', 'foobar', 'h')
    'foo<br>bar'
    > Ox.highlightHTML('AT&amp;T', 'AT&T', 'h')
    '<span class="h">AT&amp;T</span>'
    > Ox.highlightHTML('AT&amp;T', 'amp', 'h')
    'AT&amp;T'
    > Ox.highlightHTML('a &lt;b&gt; c', '<b>', 'h')
    'a <span class="h">&lt;b&gt;</span> c'
    > Ox.highlightHTML('a <br> c', 'b', 'h')
    'a <br> c'
@*/
Ox.highlightHTML = function(html, str, classname, tags) {
    var count = 0,
        isEntity = false,
        isTag = false,
        position,
        positions = [];
    //fixme: default tags should be same as in parseHTML
    tags = (tags || []).concat([ 
        // inline formatting
        'b', 'code', 'i', 's', 'sub', 'sup', 'u',
        // block formatting
        'blockquote', 'h1', 'h2', 'h3', 'p', 'pre',
        // lists
        'li', 'ol', 'ul',
        // tables
        'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr',
        // other
        'a', 'br', 'img',
    ]);
    str = Ox.encodeHTML(str).toLowerCase();
    Ox.forEach(html.toLowerCase(), function(chr, i) {
        // check for entity or tag start
        if (!isEntity && chr == '&') {
            isEntity = true;
        } else if (!isTag && chr == '<') {
            Ox.forEach(tags, function(tag) {
                if (html.slice(i + 1).match(new RegExp('^/?' + tag + '\\W'))) {
                    isTag = true;
                    Ox.Break();
                }
            });
        }
        // if outside entity or tag
        if (!isEntity && !isTag) {
            // if character matches
            if (chr == str[count]) {
                if (count == 0) {
                    position = i;
                }
                count++;
                if (count == str.length) {
                    // make sure matches are last to first
                    positions.unshift([position, i + 1]);
                }
            } else {
                count = 0;
            }
        }
        // check for entity or tag end
        if (isEntity && chr == ';') {
            isEntity = false;
        } else if (isTag && chr == '>') {
            isTag = false;
        }
    });
    positions.forEach(function(position) {
        var match = '<span class="' + classname + '">'
            + html.slice(position[0], position[1])
                .replace(/(<.*?>)/g, '</span>$1<span class="' + classname + '">')
            + '</span>';
        html = html.slice(0, position[0]) + match + html.slice(position[1]);
    });
    return html;
}

/*@
Ox.isValidEmail <f> Tests if a string is a valid e-mail address
    (str) -> <b> True if the string is a valid e-mail address
    str <s> Any string
    > Ox.isValidEmail("foo@bar.com")
    true
    > Ox.isValidEmail("foo.bar@foobar.co.uk")
    true
    > Ox.isValidEmail("foo@bar")
    false
    > Ox.isValidEmail("foo@bar..com")
    false
@*/
Ox.isValidEmail = function(string) {
    return !!/^[0-9A-Z\.\+\-_]+@(?:[0-9A-Z\-]+\.)+[A-Z]{2,6}$/i.test(string);
};

/*@
Ox.pad <f> Pad a string to a given length
    > Ox.pad(1, 2)
    "01"
    > Ox.pad("abc", -6, ".")
    "abc..."
    > Ox.pad("foobar", -3, ".")
    "foo"
    > Ox.pad("abc", -6, "123456")
    "abc123"
    > Ox.pad("abc", 6, "123456")
    "456abc"
@*/
Ox.pad = function(string, length, padding) {
    // fixme: slighly obscure signature
    // fixme: weird for negative numbers
    var pos = length / (length = Math.abs(length));
    string = string.toString().slice(0, length);
    padding = Ox.repeat(padding || '0', length - string.length);
    return pos == 1
        ? (padding + string).slice(-length)
        : (string + padding).slice(0, length);
};

/*@
Ox.parsePath <f> Returns the components of a path
    (str) -> <o> Path
        extension <s> File extension
        filename <s> Filename
        pathname <s> Pathname
    > Ox.parsePath('/foo/bar/foo.bar')
    {extension: 'bar', filename: 'foo.bar', pathname: '/foo/bar/'}
    > Ox.parsePath('foo/')
    {extension: '', filename: '', pathname: 'foo/'}
    > Ox.parsePath('foo')
    {extension: '', filename: 'foo', pathname: ''}
    > Ox.parsePath('.foo')
    {extension: '', filename: '.foo', pathname: ''}
@*/
Ox.parsePath = function(string) {
    var matches = /^(.+\/)?(.+?(\..+)?)?$/.exec(string);
    return {
        pathname: matches[1] || '',
        filename: matches[2] || '',
        extension: matches[3] ? matches[3].slice(1) : ''
    };
}

/*@
Ox.parseSRT <f> Parses an srt subtitle file
    (str) -> <o> Parsed subtitles
        in <n> In point (sec)
        out <n> Out point (sec)
        text <s> Text
    str <s> Contents of an srt subtitle file
    > Ox.parseSRT('1\n01:02:00,000 --> 01:02:03,400\nHello World')
    [{'in': 3720, out: 3723.4, text: 'Hello World'}]
@*/
Ox.parseSRT = function(string, fps) {
    return string.replace(/\r\n/g, '\n').replace(/\n+$/, '').split('\n\n')
        .map(function(block) {
            var lines = block.split('\n'), points;
            lines.shift();
            points = lines.shift().split(' --> ').map(function(point) {
                return point.replace(',', ':').split(':')
                    .reduce(function(previous, current, index) {
                        return previous + parseInt(current, 10) *
                            [3600, 60, 1, 0.001][index];
                    }, 0);
            });
            if (fps) {
                points = points.map(function(point) {
                    return Math.round(point * fps) / fps;
                });
            }
            return {
                'in': points[0],
                out: points[1],
                text: lines.join('\n')
            };
        });
};

Ox.parseUserAgent = function(userAgent) {
    var aliases = {
            browser: {
                'Firefox': /(Fennec|Firebird|Iceweasel|Minefield|Namoroka|Phoenix|SeaMonkey|Shiretoko)/
            },
            system: {
                'BSD': /(FreeBSD|NetBSD|OpenBSD)/,
                'Linux': /(CrOS|MeeGo|webOS)/,
                'Unix': /(AIX|HP-UX|IRIX|SunOS)/
            }
        },
        names = {
            browser: {
                'chromeframe': 'Chrome Frame',
                'MSIE': 'Internet Explorer'
            },
            system: {
                'CPU OS': 'iOS',
                'iPhone OS': 'iOS',
                'Macintosh': 'Mac OS X'
            }
        },    
        regexps = {
            browser: [
                /(Camino)\/(\d+)/,
                /(chromeframe)\/(\d+)/,
                /(Chrome)\/(\d+)/,
                /(Epiphany)\/(\d+)/,
                /(Firefox)\/(\d+)/,
                /(Galeon)\/(\d+)/,
                /(Googlebot)\/(\d+)/,
                /(Konqueror)\/(\d+)/,
                /(MSIE) (\d+)/,
                /(Netscape)\d?\/(\d+)/,
                /(NokiaBrowser)\/(\d+)/,
                /(Opera) (\d+)/,
                /(Opera)\/.+Version\/(\d+)/,
                /Version\/(\d+).+(Safari)/
            ],
            system: [
                /(Android) (\d+)/,
                /(BeOS)/,
                /(BlackBerry) (\d+)/,
                /(Darwin)/,
                /(BSD) (FreeBSD|NetBSD|OpenBSD)/,
                /(CPU OS) (\d+)/,
                /(iPhone OS) (\d+)/,
                /(Linux).+(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS)/,
                /(CentOS|CrOS|Debian|Fedora|Gentoo|Mandriva|MeeGo|Mint|Red Hat|SUSE|Ubuntu|webOS).+(Linux)/,
                /(Linux)/,
                /(Mac OS X) (10.\d)/,
                /(Mac OS X)/,
                /(Macintosh)/,
                /(SymbianOS)\/(\d+)/,
                /(SymbOS)/,
                /(OS\/2)/,
                /(Unix) (AIX|HP-UX|IRIX|SunOS)/,
                /(Unix)/,
                /(Windows) (NT \d\.\d)/,
                /(Windows) (95|98|2000|2003|ME|NT|XP)/, // Opera
                /(Windows).+(Win 9x 4\.90)/, // Firefox
                /(Windows).+(Win9\d)/, // Firefox
                /(Windows).+(WinNT4.0)/ // Firefox
            ]
        },
        versions = {
            browser: {},
            system: {
                '10.0': '10.0 (Cheetah)',
                '10.1': '10.1 (Puma)',
                '10.2': '10.2 (Jaguar)',
                '10.3': '10.3 (Panther)',
                '10.4': '10.4 (Tiger)',
                '10.5': '10.5 (Leopard)',
                '10.6': '10.6 (Snow Leopard)',
                '10.7': '10.7 (Lion)',
                '10.8': '10.8 (Mountain Lion)',
                'CrOS': 'Chrome OS',
                'NT 4.0': 'NT 4.0 (Windows NT)',
                'NT 4.1': 'NT 4.1 (Windows 98)',
                'Win 9x 4.90': 'NT 4.9 (Windows ME)',
                'NT 5.0': 'NT 5.0 (Windows 2000)',
                'NT 5.1': 'NT 5.1 (Windows XP)',
                'NT 5.2': 'NT 5.2 (Windows 2003)',
                'NT 6.0': 'NT 6.0 (Windows Vista)',
                'NT 6.1': 'NT 6.1 (Windows 7)',
                'NT 6.2': 'NT 6.2 (Windows 8)',
                '95': 'NT 4.0 (Windows 95)',
                'NT': 'NT 4.0 (Windows NT)',
                '98': 'NT 4.1 (Windows 98)',
                'ME': 'NT 4.9 (Windows ME)',
                '2000': 'NT 5.0 (Windows 2000)',
                '2003': 'NT 5.2 (Windows 2003)',
                'XP': 'NT 5.1 (Windows XP)',
                'Win95': 'NT 4.0 (Windows 95)',
                'WinNT4.0': 'NT 4.0 (Windows NT)',
                'Win98': 'NT 4.1 (Windows 98)'
            }
        },
        userAgentData = {};
    Ox.forEach(regexps, function(regexps, key) {
        userAgentData[key] = {name: '', string: '', version: ''};
        Ox.forEach(aliases[key], function(regexp, alias) {
            userAgent = userAgent.replace(
                regexp, key == 'browser' ? alias : alias + ' $1'
            );
        });
        Ox.forEach(regexps, function(regexp) {
            var matches = userAgent.match(regexp),
                name, string, swap, version;
            if (matches) {
                matches[2] = matches[2] || '';
                swap = matches[1].match(/^\d/) || matches[2] == 'Linux';
                name = matches[swap ? 2 : 1];
                version = matches[swap ? 1 : 2].replace('_', '.');
                name = names[key][name] || name,
                version = versions[key][version] || version;
                string = name;
                if (version) {
                    string += ' ' + (
                        ['BSD', 'Linux', 'Unix'].indexOf(name) > -1
                        ? '(' + version + ')'
                        : version
                    )
                }
                userAgentData[key] = {
                    name: names[name] || name,
                    string: string,
                    version: versions[version] || version
                };
                Ox.Break();
            }
        });
    });
    return userAgentData;
};

/*@
Ox.repeat <f> Repeat a value multiple times
    Works for arrays, numbers and strings
    > Ox.repeat(1, 3)
    "111"
    > Ox.repeat("foo", 3)
    "foofoofoo"
    > Ox.repeat([1, 2], 3)
    [1, 2, 1, 2, 1, 2]
    > Ox.repeat([{k: "v"}], 3)
    [{k: "v"}, {k: "v"}, {k: "v"}]
@*/
Ox.repeat = function(value, times) {
    var ret;
    if (Ox.isArray(value)) {
        ret = [];
        Ox.loop(times, function() {
            ret = ret.concat(value);
        });
    } else {
        ret = times >= 1 ? new Array(times + 1).join(value.toString()) : '';
    }
    return ret;
};

/*@
Ox.startsWith <f> Checks if a string starts with a given substring
    If the substring is a string literal (and not a variable),
    <code>/^sub/.test(str)</code> or <code>!!/^sub/.exec(str)</code>
    is shorter than <code>Ox.starts(str, sub)</code>.
    > Ox.startsWith('foobar', 'foo')
    true
@*/
Ox.starts = Ox.startsWith = function(string, substring) {
    // fixme: rename to starts
    string = string.toString();
    substring = substring.toString();
    return string.slice(0, substring.length) == substring;
};

/*@
Ox.stripTags <f> Strips HTML tags from a string
    > Ox.stripTags('f<span>o</span>o')
    'foo'
@*/
Ox.stripTags = function(string) {
    return string.replace(/<.*?>/g, '');
};

/*@
Ox.toCamelCase <f> Takes a string with '-', '/' or '_', returns a camelCase string
    > Ox.toCamelCase('foo-bar-baz')
    'fooBarBaz'
    > Ox.toCamelCase('foo/bar/baz')
    'fooBarBaz'
    > Ox.toCamelCase('foo_bar_baz')
    'fooBarBaz'
@*/
Ox.toCamelCase = function(string) {
    return string.replace(/[\-\/_][a-z]/g, function(string) {
        return string[1].toUpperCase();
    });
};

/*@
Ox.toDashes <f> Takes a camelCase string, returns a string with dashes
    > Ox.toDashes('fooBarBaz')
    'foo-bar-baz'
@*/
Ox.toDashes = function(string) {
    return string.replace(/[A-Z]/g, function(string) {
        return '-' + string.toLowerCase();
    });
};

/*@
Ox.toSlashes <f> Takes a camelCase string, returns a string with slashes
    > Ox.toSlashes('fooBarBaz')
    'foo/bar/baz'
@*/
Ox.toSlashes = function(string) {
    return string.replace(/[A-Z]/g, function(string) {
        return '/' + string.toLowerCase();
    });
};

/*@
Ox.toTitleCase <f> Returns a string with capitalized words
    > Ox.toTitleCase('foo')
    'Foo'
    > Ox.toTitleCase('Apple releases iPhone, IBM stock plummets')
    'Apple Releases iPhone, IBM Stock Plummets'
@*/
Ox.toTitleCase = function(string) {
    return string.split(' ').map(function(value) {
        var substring = value.slice(1),
            lowercase = substring.toLowerCase();
        if (substring == lowercase) {
            value = value.slice(0, 1).toUpperCase() + lowercase;
        }
        return value;
    }).join(' ');
};

/*@
Ox.toUnderscores <f> Takes a camelCase string, returns string with underscores
    > Ox.toUnderscores('fooBarBaz')
    'foo_bar_baz'
@*/
Ox.toUnderscores = function(string) {
    return string.replace(/[A-Z]/g, function(string) {
        return '_' + string.toLowerCase();
    });
};

/*@
Ox.truncate <f> Truncate a string to a given length
    (string, length) <s> Truncated string
    (string, length, position) -> <s> Truncated string
    (string, length, placeholder) -> <s> Truncated string
    (string, length, position, placeholder) -> <s> Truncated string
    > Ox.truncate('anticonstitutionellement', 16)
    'anticonstitut...'
    > Ox.truncate('anticonstitutionellement', 16, '...', 'left')
    '...utionellement'
    > Ox.truncate('anticonstitutionellement', 16, '>')
    'anticonstitutio>'
    > Ox.truncate('anticonstitutionellement', 16, '...', 'center')
    'anticon...lement'
@*/
Ox.truncate = function(string, length, padding, position) {
    padding = padding || '...';
    position = position || 'right';
    if (string.length > length) {
        if (position == 'left') {
            string = padding
                + string.slice(padding.length + string.length - length);
        } else if (position == 'center') {
            string = string.slice(0, Math.ceil((length - padding.length) / 2))
                + padding
                + string.slice(-Math.floor((length - padding.length) / 2));
        } else if (position == 'right') {
            string = string.slice(0, length - padding.length) + padding;
        }
    }
    return string;
};

/*@
Ox.words <f> Splits a string into words, removing punctuation
    (string) -> <[s]> Array of words
    string <s> Any string
    > Ox.words('Let\'s "split" array-likes into key/value pairs--okay?')
    ["let's", "split", "array-likes", "into", "key", "value", "pairs", "okay"]
@*/
Ox.words = function(string) {
    var array = string.toLowerCase().split(/\b/),
        length = array.length,
        startsWithWord = /\w/.test(array[0]);
    array.forEach(function(v, i) {
        // find single occurrences of "-" or "'" that are not at the beginning
        // or end of the string, and join the surrounding words with them
        if (
            i > 0 && i < length - 1 && (v == '-' || v == '\'')
        ) {
            array[i + 1] = array[i - 1] + array[i] + array[i + 1];
            array[i - 1] = array[i] = '';
        }
    });
    // remove elements that have been emptied above
    array = array.filter(function(v) {
        return v.length;
    });
    // return words, not spaces or punctuation
    return array.filter(function(v, i) {
        return i % 2 == !startsWithWord;
    });
}

/*@
Ox.wordwrap <f> Wrap a string at word boundaries
    > Ox.wordwrap("Anticonstitutionellement, Paris s'eveille", 25, '<br/>')
    "Anticonstitutionellement, <br/>Paris s'eveille"
    > Ox.wordwrap("Anticonstitutionellement, Paris s'eveille", 16, '<br/>')
    "Anticonstitution<br/>ellement, Paris <br/>s'eveille"
    > Ox.wordwrap('These are short words', 16, '<br/>', true)
    'These are <br/>short words'
@*/
Ox.wordwrap = function(str, len, sep, bal, spa) {
    // fixme: bad API, sep/bal/spa should be in options object
    var str = str === null ? '' : str.toString(),
        len = len || 80,
        sep = sep || '<br/>',
        bal = bal || false,
        spa = Ox.isUndefined(spa) ? true : spa,
        words = str.split(' '),
        lines;
    if (bal) {
        // balance lines: test if same number of lines
        // can be achieved with a shorter line length
        lines = Ox.wordwrap(str, len, sep, false).split(sep);
        if (lines.length > 1) {
            // test shorter line, unless
            // that means cutting a word
            var max = Ox.max(words.map(function(word) {
                return word.length;
            }));
            while (len > max) {
                len--;
                if (Ox.wordwrap(str, len, sep, false).split(sep).length > lines.length) {
                    len++;
                    break;
                }
            }
        }
    }
    lines = [''];
    words.forEach(function(word) {
        var chr;
        if ((lines[lines.length - 1] + word + ' ').length <= len + 1) {
            // word fits in current line
            lines[lines.length - 1] += word + ' ';
        } else {
            if (word.length <= len) {
                // word fits in next line
                lines.push(word + ' ');
            } else {
                // word is longer than line
                chr = len - lines[lines.length - 1].length;
                lines[lines.length - 1] += word.slice(0, chr);
                Ox.loop(chr, word.length, len, function(pos) {
                    lines.push(word.substr(pos, len));
                });
                lines[lines.length - 1] += ' ';
            }
        }
    });
    if (!spa) {
        lines = lines.map(function(line) {
            return line.trim();
        });
    }
    return lines.join(sep).trim();
};