# -*- test-case-name: twisted.test.test_text -*- # # Copyright (c) Twisted Matrix Laboratories. # See LICENSE for details. """ Miscellany of text-munging functions. """ def stringyString(object, indentation=''): """ Expansive string formatting for sequence types. C{list.__str__} and C{dict.__str__} use C{repr()} to display their elements. This function also turns these sequence types into strings, but uses C{str()} on their elements instead. Sequence elements are also displayed on seperate lines, and nested sequences have nested indentation. """ braces = '' sl = [] if type(object) is dict: braces = '{}' for key, value in object.items(): value = stringyString(value, indentation + ' ') if isMultiline(value): if endsInNewline(value): value = value[:-len('\n')] sl.append("%s %s:\n%s" % (indentation, key, value)) else: # Oops. Will have to move that indentation. sl.append("%s %s: %s" % (indentation, key, value[len(indentation) + 3:])) elif type(object) is tuple or type(object) is list: if type(object) is tuple: braces = '()' else: braces = '[]' for element in object: element = stringyString(element, indentation + ' ') sl.append(element.rstrip() + ',') else: sl[:] = map(lambda s, i=indentation: i + s, str(object).split('\n')) if not sl: sl.append(indentation) if braces: sl[0] = indentation + braces[0] + sl[0][len(indentation) + 1:] sl[-1] = sl[-1] + braces[-1] s = "\n".join(sl) if isMultiline(s) and not endsInNewline(s): s = s + '\n' return s def isMultiline(s): """ Returns C{True} if this string has a newline in it. """ return (s.find('\n') != -1) def endsInNewline(s): """ Returns C{True} if this string ends in a newline. """ return (s[-len('\n'):] == '\n') def greedyWrap(inString, width=80): """ Given a string and a column width, return a list of lines. Caveat: I'm use a stupid greedy word-wrapping algorythm. I won't put two spaces at the end of a sentence. I don't do full justification. And no, I've never even *heard* of hypenation. """ outLines = [] #eww, evil hacks to allow paragraphs delimited by two \ns :( if inString.find('\n\n') >= 0: paragraphs = inString.split('\n\n') for para in paragraphs: outLines.extend(greedyWrap(para, width) + ['']) return outLines inWords = inString.split() column = 0 ptr_line = 0 while inWords: column = column + len(inWords[ptr_line]) ptr_line = ptr_line + 1 if (column > width): if ptr_line == 1: # This single word is too long, it will be the whole line. pass else: # We've gone too far, stop the line one word back. ptr_line = ptr_line - 1 (l, inWords) = (inWords[0:ptr_line], inWords[ptr_line:]) outLines.append(' '.join(l)) ptr_line = 0 column = 0 elif not (len(inWords) > ptr_line): # Clean up the last bit. outLines.append(' '.join(inWords)) del inWords[:] else: # Space column = column + 1 # next word return outLines wordWrap = greedyWrap def removeLeadingBlanks(lines): ret = [] for line in lines: if ret or line.strip(): ret.append(line) return ret def removeLeadingTrailingBlanks(s): lines = removeLeadingBlanks(s.split('\n')) lines.reverse() lines = removeLeadingBlanks(lines) lines.reverse() return '\n'.join(lines)+'\n' def splitQuoted(s): """ Like a string split, but don't break substrings inside quotes. >>> splitQuoted('the "hairy monkey" likes pie') ['the', 'hairy monkey', 'likes', 'pie'] Another one of those "someone must have a better solution for this" things. This implementation is a VERY DUMB hack done too quickly. """ out = [] quot = None phrase = None for word in s.split(): if phrase is None: if word and (word[0] in ("\"", "'")): quot = word[0] word = word[1:] phrase = [] if phrase is None: out.append(word) else: if word and (word[-1] == quot): word = word[:-1] phrase.append(word) out.append(" ".join(phrase)) phrase = None else: phrase.append(word) return out def strFile(p, f, caseSensitive=True): """ Find whether string C{p} occurs in a read()able object C{f}. @rtype: C{bool} """ buf = "" buf_len = max(len(p), 2**2**2**2) if not caseSensitive: p = p.lower() while 1: r = f.read(buf_len-len(p)) if not caseSensitive: r = r.lower() bytes_read = len(r) if bytes_read == 0: return False l = len(buf)+bytes_read-buf_len if l <= 0: buf = buf + r else: buf = buf[l:] + r if buf.find(p) != -1: return True