oilarchive/oilarchive/utils.py

# -*- Mode: Python; -*-
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2

import re

'''
Returns the given HTML with all unencoded ampersands encoded correctly
'''
unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
def fix_ampersands(value):
    return unencoded_ampersands_re.sub('&amp;', value)


'''
  highlight search term in text, scipping html tags and script elements
'''
def insideTag(s):
  return not (s.rfind('<') == -1 or s.rfind('>') > s.rfind('<'))

def insideScript(s):
  return not (s.rfind('<script') == -1 or s.rfind('/script>') > s.rfind('<script'))

def highlightText(text, term):
  highlightStart = u'<span class="textHighlight">'
  highlightEnd = u'</span>'
  output = u''
  if term.strip():
    term = term.lower()
    textLower = text.lower()
    fullTextLower = textLower
    termLength = len(term)
    fullPos = 0
    while text:
        i = textLower.find(term)
        if i == -1:
          output += text
          break
        if not insideTag(fullTextLower[:fullPos+i]) and not insideScript(fullTextLower[:fullPos+i]):
          output += text[:i] + highlightStart + text[i:i+termLength] + highlightEnd
        else:
          output += text[:i+termLength]
        text = text[i+termLength:]
        fullPos += i+termLength
        textLower = text.lower()
  else:
    output = text
  return output

'''
  Format the value like a 'human-readable' file size (i.e. 13 KB, 4.1 MB, 102
  bytes, etc).
'''
def formatFileSize(bytes):
  bytes = float(bytes)
  if bytes < 1024:
    return "%d byte%s" % (bytes, bytes != 1 and 's' or '')
  if bytes < 1024 * 1024:
    return "%d KB" % (bytes / 1024)
  if bytes < 1024 * 1024 * 1024:
    return "%.1f MB" % (bytes / (1024 * 1024))
  if bytes < 1024 * 1024 * 1024 * 1024:
    return "%.2f GB" % (bytes / (1024 * 1024 * 1024))
  return "%.3f TB" % (bytes / (1024 * 1024 * 1024 * 1024))