2014-09-05 17:40:34 +02:00
from __future__ import absolute_import
2013-10-11 19:28:32 +02:00
import os
import re
import sys
from distutils import log
import xml.dom.pulldom
import shlex
import locale
import codecs
import unicodedata
import warnings
2014-08-12 10:44:01 +02:00
from setuptools.compat import unicode, PY2
2013-10-11 19:28:32 +02:00
from setuptools.py31compat import TemporaryDirectory
from xml.sax.saxutils import unescape
import urlparse
except ImportError:
import urllib.parse as urlparse
from subprocess import Popen as _Popen, PIPE as _PIPE
#NOTE: Use of the command line options require SVN 1.3 or newer (December 2005)
# and SVN 1.3 hasn't been supported by the developers since mid 2008.
#subprocess is called several times with shell=(sys.platform=='win32')
#see the follow for more information:
# http://bugs.python.org/issue8557
# http://stackoverflow.com/questions/5658622/
# python-subprocess-popen-environment-path
def _run_command(args, stdout=_PIPE, stderr=_PIPE, encoding=None, stream=0):
#regarding the shell argument, see: http://bugs.python.org/issue8557
proc = _Popen(args, stdout=stdout, stderr=stderr,
shell=(sys.platform == 'win32'))
data = proc.communicate()[stream]
except OSError:
return 1, ''
#doubled checked and
data = decode_as_string(data, encoding)
#communciate calls wait()
return proc.returncode, data
def _get_entry_schedule(entry):
schedule = entry.getElementsByTagName('schedule')[0]
return "".join([t.nodeValue
for t in schedule.childNodes
if t.nodeType == t.TEXT_NODE])
def _get_target_property(target):
property_text = target.getElementsByTagName('property')[0]
return "".join([t.nodeValue
for t in property_text.childNodes
if t.nodeType == t.TEXT_NODE])
def _get_xml_data(decoded_str):
2014-08-12 10:44:01 +02:00
if PY2:
2013-10-11 19:28:32 +02:00
#old versions want an encoded string
data = decoded_str.encode('utf-8')
data = decoded_str
return data
def joinpath(prefix, *suffix):
if not prefix or prefix == '.':
return os.path.join(*suffix)
return os.path.join(prefix, *suffix)
def determine_console_encoding():
#try for the preferred encoding
encoding = locale.getpreferredencoding()
#see if the locale.getdefaultlocale returns null
#some versions of python\platforms return US-ASCII
#when it cannot determine an encoding
if not encoding or encoding == "US-ASCII":
encoding = locale.getdefaultlocale()[1]
if encoding:
codecs.lookup(encoding) # make sure a lookup error is not made
except (locale.Error, LookupError):
encoding = None
is_osx = sys.platform == "darwin"
if not encoding:
return ["US-ASCII", "utf-8"][is_osx]
elif encoding.startswith("mac-") and is_osx:
#certain versions of python would return mac-roman as default
#OSX as a left over of earlier mac versions.
return "utf-8"
return encoding
_console_encoding = determine_console_encoding()
def decode_as_string(text, encoding=None):
Decode the console or file output explicitly using getpreferredencoding.
The text paraemeter should be a encoded string, if not no decode occurs
If no encoding is given, getpreferredencoding is used. If encoding is
specified, that is used instead. This would be needed for SVN --xml
output. Unicode is explicitly put in composed NFC form.
--xml should be UTF-8 (SVN Issue 2938) the discussion on the Subversion
DEV List from 2007 seems to indicate the same.
#text should be a byte string
if encoding is None:
encoding = _console_encoding
if not isinstance(text, unicode):
text = text.decode(encoding)
text = unicodedata.normalize('NFC', text)
return text
def parse_dir_entries(decoded_str):
'''Parse the entries from a recursive info xml'''
doc = xml.dom.pulldom.parseString(_get_xml_data(decoded_str))
entries = list()
for event, node in doc:
if event == 'START_ELEMENT' and node.nodeName == 'entry':
if not _get_entry_schedule(node).startswith('delete'):
return entries[1:] # do not want the root directory
def parse_externals_xml(decoded_str, prefix=''):
'''Parse a propget svn:externals xml'''
prefix = os.path.normpath(prefix)
prefix = os.path.normcase(prefix)
doc = xml.dom.pulldom.parseString(_get_xml_data(decoded_str))
externals = list()
for event, node in doc:
if event == 'START_ELEMENT' and node.nodeName == 'target':
path = os.path.normpath(node.getAttribute('path'))
if os.path.normcase(path).startswith(prefix):
path = path[len(prefix)+1:]
data = _get_target_property(node)
#data should be decoded already
for external in parse_external_prop(data):
externals.append(joinpath(path, external))
return externals # do not want the root directory
def parse_external_prop(lines):
Parse the value of a retrieved svn:externals entry.
possible token setups (with quotng and backscaping in laters versions)
externals = []
for line in lines.splitlines():
line = line.lstrip() # there might be a "\ "
if not line:
2014-08-12 10:44:01 +02:00
if PY2:
2013-10-11 19:28:32 +02:00
#shlex handles NULLs just fine and shlex in 2.7 tries to encode
#as ascii automatiically
line = line.encode('utf-8')
line = shlex.split(line)
2014-08-12 10:44:01 +02:00
if PY2:
2013-10-11 19:28:32 +02:00
line = [x.decode('utf-8') for x in line]
#EXT_FOLDERNAME is either the first or last depending on where
#the URL falls
if urlparse.urlsplit(line[-1])[0]:
external = line[0]
external = line[-1]
external = decode_as_string(external, encoding="utf-8")
return externals
def parse_prop_file(filename, key):
found = False
f = open(filename, 'rt')
data = ''
for line in iter(f.readline, ''): # can't use direct iter!
parts = line.split()
if len(parts) == 2:
kind, length = parts
data = f.read(int(length))
if kind == 'K' and data == key:
found = True
elif kind == 'V' and found:
return data
class SvnInfo(object):
Generic svn_info object. No has little knowledge of how to extract
information. Use cls.load to instatiate according svn version.
Paths are not filesystem encoded.
def get_svn_version():
# Temp config directory should be enough to check for repository
2014-08-12 10:44:01 +02:00
# This is needed because .svn always creates .subversion and
2013-10-11 19:28:32 +02:00
# some operating systems do not handle dot directory correctly.
# Real queries in real svn repos with be concerned with it creation
with TemporaryDirectory() as tempdir:
2014-08-12 10:44:01 +02:00
code, data = _run_command(['svn',
2013-10-11 19:28:32 +02:00
'--config-dir', tempdir,
2014-08-12 10:44:01 +02:00
2013-10-11 19:28:32 +02:00
if code == 0 and data:
return data.strip()
return ''
#svnversion return values (previous implementations return max revision)
# 4123:4168 mixed revision working copy
# 4168M modified working copy
# 4123S switched working copy
# 4123:4168MS mixed revision, modified, switched working copy
revision_re = re.compile(r'(?:([\-0-9]+):)?(\d+)([a-z]*)\s*$', re.I)
def load(cls, dirname=''):
normdir = os.path.normpath(dirname)
# Temp config directory should be enough to check for repository
2014-08-12 10:44:01 +02:00
# This is needed because .svn always creates .subversion and
2013-10-11 19:28:32 +02:00
# some operating systems do not handle dot directory correctly.
# Real queries in real svn repos with be concerned with it creation
with TemporaryDirectory() as tempdir:
2014-08-12 10:44:01 +02:00
code, data = _run_command(['svn',
2013-10-11 19:28:32 +02:00
'--config-dir', tempdir,
'info', normdir])
# Must check for some contents, as some use empty directories
# in testcases, however only enteries is needed also the info
# command above MUST have worked
svn_dir = os.path.join(normdir, '.svn')
is_svn_wd = (not code or
os.path.isfile(os.path.join(svn_dir, 'entries')))
svn_version = tuple(cls.get_svn_version().split('.'))
base_svn_version = tuple(int(x) for x in svn_version[:2])
except ValueError:
base_svn_version = tuple()
if not is_svn_wd:
#return an instance of this NO-OP class
return SvnInfo(dirname)
if code or not base_svn_version or base_svn_version < (1, 3):
warnings.warn(("No SVN 1.3+ command found: falling back "
"on pre 1.7 .svn parsing"), DeprecationWarning)
return SvnFileInfo(dirname)
if base_svn_version < (1, 5):
return Svn13Info(dirname)
return Svn15Info(dirname)
def __init__(self, path=''):
self.path = path
self._entries = None
self._externals = None
def get_revision(self):
'Retrieve the directory revision informatino using svnversion'
code, data = _run_command(['svnversion', '-c', self.path])
if code:
log.warn("svnversion failed")
return 0
parsed = self.revision_re.match(data)
if parsed:
return int(parsed.group(2))
return 0
def entries(self):
if self._entries is None:
self._entries = self.get_entries()
return self._entries
def externals(self):
if self._externals is None:
self._externals = self.get_externals()
return self._externals
def iter_externals(self):
Iterate over the svn:external references in the repository path.
for item in self.externals:
yield item
def iter_files(self):
Iterate over the non-deleted file entries in the repository path
for item, kind in self.entries:
if kind.lower() == 'file':
yield item
def iter_dirs(self, include_root=True):
Iterate over the non-deleted file entries in the repository path
if include_root:
yield self.path
for item, kind in self.entries:
if kind.lower() == 'dir':
yield item
def get_entries(self):
return []
def get_externals(self):
return []
class Svn13Info(SvnInfo):
def get_entries(self):
code, data = _run_command(['svn', 'info', '-R', '--xml', self.path],
if code:
log.debug("svn info failed")
return []
return parse_dir_entries(data)
def get_externals(self):
#Previous to 1.5 --xml was not supported for svn propget and the -R
#output format breaks the shlex compatible semantics.
cmd = ['svn', 'propget', 'svn:externals']
result = []
for folder in self.iter_dirs():
code, lines = _run_command(cmd + [folder], encoding="utf-8")
if code != 0:
log.warn("svn propget failed")
return []
#lines should a str
for external in parse_external_prop(lines):
if folder:
external = os.path.join(folder, external)
return result
class Svn15Info(Svn13Info):
def get_externals(self):
cmd = ['svn', 'propget', 'svn:externals', self.path, '-R', '--xml']
code, lines = _run_command(cmd, encoding="utf-8")
if code:
log.debug("svn propget failed")
return []
return parse_externals_xml(lines, prefix=os.path.abspath(self.path))
class SvnFileInfo(SvnInfo):
def __init__(self, path=''):
super(SvnFileInfo, self).__init__(path)
self._directories = None
self._revision = None
def _walk_svn(self, base):
entry_file = joinpath(base, '.svn', 'entries')
if os.path.isfile(entry_file):
entries = SVNEntriesFile.load(base)
yield (base, False, entries.parse_revision())
for path in entries.get_undeleted_records():
path = decode_as_string(path)
path = joinpath(base, path)
if os.path.isfile(path):
yield (path, True, None)
elif os.path.isdir(path):
for item in self._walk_svn(path):
yield item
def _build_entries(self):
entries = list()
rev = 0
for path, isfile, dir_rev in self._walk_svn(self.path):
if isfile:
entries.append((path, 'file'))
entries.append((path, 'dir'))
rev = max(rev, dir_rev)
self._entries = entries
self._revision = rev
def get_entries(self):
if self._entries is None:
return self._entries
def get_revision(self):
if self._revision is None:
return self._revision
def get_externals(self):
prop_files = [['.svn', 'dir-prop-base'],
['.svn', 'dir-props']]
externals = []
for dirname in self.iter_dirs():
prop_file = None
for rel_parts in prop_files:
filename = joinpath(dirname, *rel_parts)
if os.path.isfile(filename):
prop_file = filename
if prop_file is not None:
ext_prop = parse_prop_file(prop_file, 'svn:externals')
#ext_prop should be utf-8 coming from svn:externals
ext_prop = decode_as_string(ext_prop, encoding="utf-8")
return externals
def svn_finder(dirname=''):
#combined externals due to common interface
#combined externals and entries due to lack of dir_props in 1.7
info = SvnInfo.load(dirname)
for path in info.iter_files():
yield path
for path in info.iter_externals():
sub_info = SvnInfo.load(path)
for sub_path in sub_info.iter_files():
yield sub_path
class SVNEntriesFile(object):
def __init__(self, data):
self.data = data
def load(class_, base):
filename = os.path.join(base, '.svn', 'entries')
f = open(filename)
result = SVNEntriesFile.read(f)
return result
def read(class_, fileobj):
data = fileobj.read()
is_xml = data.startswith('<?xml')
class_ = [SVNEntriesFileText, SVNEntriesFileXML][is_xml]
return class_(data)
def parse_revision(self):
all_revs = self.parse_revision_numbers() + [0]
return max(all_revs)
class SVNEntriesFileText(SVNEntriesFile):
known_svn_versions = {
'1.4.x': 8,
'1.5.x': 9,
'1.6.x': 10,
def __get_cached_sections(self):
return self.sections
def get_sections(self):
sections = self.data.split(SECTION_DIVIDER)
sections = [x for x in map(str.splitlines, sections)]
# remove the SVN version number from the first line
svn_version = int(sections[0].pop(0))
if not svn_version in self.known_svn_versions.values():
log.warn("Unknown subversion verson %d", svn_version)
except ValueError:
self.sections = sections
self.get_sections = self.__get_cached_sections
return self.sections
def is_valid(self):
return bool(self.get_sections())
def get_url(self):
return self.get_sections()[0][4]
def parse_revision_numbers(self):
revision_line_number = 9
rev_numbers = [
for section in self.get_sections()
if (len(section) > revision_line_number
and section[revision_line_number])
return rev_numbers
def get_undeleted_records(self):
undeleted = lambda s: s and s[0] and (len(s) < 6 or s[5] != 'delete')
result = [
for section in self.get_sections()
if undeleted(section)
return result
class SVNEntriesFileXML(SVNEntriesFile):
def is_valid(self):
return True
def get_url(self):
"Get repository URL"
urlre = re.compile('url="([^"]+)"')
return urlre.search(self.data).group(1)
def parse_revision_numbers(self):
revre = re.compile(r'committed-rev="(\d+)"')
return [
for m in revre.finditer(self.data)
def get_undeleted_records(self):
entries_pattern = \
re.compile(r'name="([^"]+)"(?![^>]+deleted="true")', re.I)
results = [
for match in entries_pattern.finditer(self.data)
return results
if __name__ == '__main__':
for name in svn_finder(sys.argv[1]):