131 lines
3.9 KiB
Python
131 lines
3.9 KiB
Python
|
#!/usr/bin/env python3
|
||
|
|
||
|
"""\
|
||
|
List python source files.
|
||
|
|
||
|
There are three functions to check whether a file is a Python source, listed
|
||
|
here with increasing complexity:
|
||
|
|
||
|
- has_python_ext() checks whether a file name ends in '.py[w]'.
|
||
|
- look_like_python() checks whether the file is not binary and either has
|
||
|
the '.py[w]' extension or the first line contains the word 'python'.
|
||
|
- can_be_compiled() checks whether the file can be compiled by compile().
|
||
|
|
||
|
The file also must be of appropriate size - not bigger than a megabyte.
|
||
|
|
||
|
walk_python_files() recursively lists all Python files under the given directories.
|
||
|
"""
|
||
|
__author__ = "Oleg Broytmann, Georg Brandl"
|
||
|
|
||
|
__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"]
|
||
|
|
||
|
|
||
|
import os, re
|
||
|
|
||
|
binary_re = re.compile(br'[\x00-\x08\x0E-\x1F\x7F]')
|
||
|
|
||
|
debug = False
|
||
|
|
||
|
def print_debug(msg):
|
||
|
if debug: print(msg)
|
||
|
|
||
|
|
||
|
def _open(fullpath):
|
||
|
try:
|
||
|
size = os.stat(fullpath).st_size
|
||
|
except OSError as err: # Permission denied - ignore the file
|
||
|
print_debug("%s: permission denied: %s" % (fullpath, err))
|
||
|
return None
|
||
|
|
||
|
if size > 1024*1024: # too big
|
||
|
print_debug("%s: the file is too big: %d bytes" % (fullpath, size))
|
||
|
return None
|
||
|
|
||
|
try:
|
||
|
return open(fullpath, "rb")
|
||
|
except IOError as err: # Access denied, or a special file - ignore it
|
||
|
print_debug("%s: access denied: %s" % (fullpath, err))
|
||
|
return None
|
||
|
|
||
|
def has_python_ext(fullpath):
|
||
|
return fullpath.endswith(".py") or fullpath.endswith(".pyw")
|
||
|
|
||
|
def looks_like_python(fullpath):
|
||
|
infile = _open(fullpath)
|
||
|
if infile is None:
|
||
|
return False
|
||
|
|
||
|
with infile:
|
||
|
line = infile.readline()
|
||
|
|
||
|
if binary_re.search(line):
|
||
|
# file appears to be binary
|
||
|
print_debug("%s: appears to be binary" % fullpath)
|
||
|
return False
|
||
|
|
||
|
if fullpath.endswith(".py") or fullpath.endswith(".pyw"):
|
||
|
return True
|
||
|
elif b"python" in line:
|
||
|
# disguised Python script (e.g. CGI)
|
||
|
return True
|
||
|
|
||
|
return False
|
||
|
|
||
|
def can_be_compiled(fullpath):
|
||
|
infile = _open(fullpath)
|
||
|
if infile is None:
|
||
|
return False
|
||
|
|
||
|
with infile:
|
||
|
code = infile.read()
|
||
|
|
||
|
try:
|
||
|
compile(code, fullpath, "exec")
|
||
|
except Exception as err:
|
||
|
print_debug("%s: cannot compile: %s" % (fullpath, err))
|
||
|
return False
|
||
|
|
||
|
return True
|
||
|
|
||
|
|
||
|
def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None):
|
||
|
"""\
|
||
|
Recursively yield all Python source files below the given paths.
|
||
|
|
||
|
paths: a list of files and/or directories to be checked.
|
||
|
is_python: a function that takes a file name and checks whether it is a
|
||
|
Python source file
|
||
|
exclude_dirs: a list of directory base names that should be excluded in
|
||
|
the search
|
||
|
"""
|
||
|
if exclude_dirs is None:
|
||
|
exclude_dirs=[]
|
||
|
|
||
|
for path in paths:
|
||
|
print_debug("testing: %s" % path)
|
||
|
if os.path.isfile(path):
|
||
|
if is_python(path):
|
||
|
yield path
|
||
|
elif os.path.isdir(path):
|
||
|
print_debug(" it is a directory")
|
||
|
for dirpath, dirnames, filenames in os.walk(path):
|
||
|
for exclude in exclude_dirs:
|
||
|
if exclude in dirnames:
|
||
|
dirnames.remove(exclude)
|
||
|
for filename in filenames:
|
||
|
fullpath = os.path.join(dirpath, filename)
|
||
|
print_debug("testing: %s" % fullpath)
|
||
|
if is_python(fullpath):
|
||
|
yield fullpath
|
||
|
else:
|
||
|
print_debug(" unknown type")
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
# Two simple examples/tests
|
||
|
for fullpath in walk_python_files(['.']):
|
||
|
print(fullpath)
|
||
|
print("----------")
|
||
|
for fullpath in walk_python_files(['.'], is_python=can_be_compiled):
|
||
|
print(fullpath)
|