586 lines
14 KiB
Python
586 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
|
|
from argparse import ArgumentParser
|
|
from pathlib import Path
|
|
import pathspec # use this for git style ignore
|
|
import sys
|
|
import os
|
|
import shutil
|
|
import re
|
|
|
|
metadatastore = '.draincleaner'
|
|
metadatafile = 'draincleaner.json'
|
|
ignorefile = '.ignorecomments'
|
|
|
|
|
|
def import_file(file):
|
|
openfile = open(file, "r")
|
|
openfile = openfile.read()
|
|
return openfile
|
|
|
|
import tokenize
|
|
from io import StringIO
|
|
|
|
def remove_special_comment_lines(path, prefix="# ¤"):
|
|
|
|
# Read file with proper encoding
|
|
with tokenize.open(path) as f:
|
|
code = f.read()
|
|
|
|
lines = code.splitlines(keepends=True)
|
|
|
|
tokens = list(tokenize.generate_tokens(iter(lines).__next__))
|
|
lines_to_skip = set()
|
|
comment_positions = []
|
|
|
|
# Detect lines that are ONLY the prefixed comment
|
|
for tok in tokens:
|
|
if tok.type == tokenize.COMMENT and tok.string.startswith(prefix):
|
|
start_line, start_col = tok.start
|
|
end_line, end_col = tok.end
|
|
line_text = lines[start_line - 1]
|
|
if line_text[:start_col].strip() == "": # nothing before comment
|
|
lines_to_skip.add(start_line)
|
|
else:
|
|
comment_positions.append((start_line, start_col, end_col))
|
|
|
|
# Remove trailing prefixed comments from code
|
|
new_lines = []
|
|
for i, line in enumerate(lines, start=1):
|
|
if i in lines_to_skip:
|
|
continue # remove whole line
|
|
# Remove trailing prefixed comment if it exists
|
|
for start_line, start_col, end_col in comment_positions:
|
|
if start_line == i:
|
|
line = line[:start_col].rstrip() + "\n"
|
|
new_lines.append(line)
|
|
|
|
# Write back to file
|
|
return "".join(new_lines)
|
|
|
|
def find_print_statements(filepath):
|
|
"""
|
|
Scans a Python source file and returns all print() statements
|
|
along with their line numbers.
|
|
|
|
Args:
|
|
filepath: Path to the Python source file.
|
|
|
|
Returns:
|
|
A list of PrintStatement named tuples with line_number and source_line.
|
|
"""
|
|
results = []
|
|
|
|
with open(filepath, "rb") as f:
|
|
tokens = list(tokenize.tokenize(f.readline))
|
|
|
|
for i, tok in enumerate(tokens):
|
|
# Look for NAME tokens with value "print"
|
|
if tok.type == tokenize.NAME and tok.string == "print":
|
|
# Confirm it's followed by an OP token "(" (i.e., a call, not a variable named print)
|
|
next_tok = tokens[i + 1] if i + 1 < len(tokens) else None
|
|
if next_tok and next_tok.type == tokenize.OP and next_tok.string == "(":
|
|
line_number = tok.start[0]
|
|
source_line = tok.line.strip()
|
|
# results.append(PrintStatement(line_number, source_line))
|
|
results.append((line_number, source_line))
|
|
|
|
return results
|
|
|
|
def find_strings(filepath):
|
|
"""
|
|
Scans a Python source file and returns all string literals
|
|
along with their line numbers.
|
|
|
|
Args:
|
|
filepath: Path to the Python source file.
|
|
|
|
Returns:
|
|
A list of tuples with (line_number, string_value).
|
|
"""
|
|
results = []
|
|
|
|
with open(filepath, "rb") as f:
|
|
tokens = list(tokenize.tokenize(f.readline))
|
|
|
|
for tok in tokens:
|
|
if tok.type == tokenize.STRING:
|
|
line_number = tok.start[0]
|
|
string_value = tok.string
|
|
results.append((line_number, string_value))
|
|
|
|
return results
|
|
|
|
def strip_ansi(text):
|
|
return re.sub(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])', '', text)
|
|
|
|
def main(args):
|
|
|
|
python_normal = '# '
|
|
python_prefix = '# ¤'
|
|
|
|
sourcepath = Path(args.sourcepath)
|
|
targetpath = Path(args.targetpath)
|
|
|
|
pathobjects = sourcepath.glob('**/*', recurse_symlinks=args.follow_symlinks)
|
|
|
|
for item in pathobjects:
|
|
|
|
sourcepath_relative = item.relative_to(sourcepath)
|
|
target = targetpath.joinpath(sourcepath_relative)
|
|
|
|
if any(part in metadatastore for part in item.parts) == False:
|
|
|
|
if (spec.match_file(sourcepath_relative) == False):
|
|
|
|
if item.is_dir(follow_symlinks=args.follow_symlinks) == True:
|
|
|
|
if target.exists() == False:
|
|
if args.verbose:
|
|
|
|
output_gendir = f'{target} does not exist, creating...'
|
|
|
|
if args.colored != True:
|
|
print(strip_ansi(output_gendir))
|
|
else:
|
|
print(output_gendir)
|
|
|
|
if args.dry_run != True:
|
|
|
|
os.makedirs(target, exist_ok=True)
|
|
|
|
if item.is_file() == True: # item is a file
|
|
|
|
if (item.suffix != '.py') and (item.suffix != '.md'):
|
|
|
|
if args.replace_all != True:
|
|
|
|
if str(sourcepath_relative) in modified_files:
|
|
shutil.copy2(item.resolve(), target, follow_symlinks=args.follow_symlinks)
|
|
print(item.resolve())
|
|
|
|
if args.verbose:
|
|
|
|
output_copy = f'{green}Copying{reset} {item.name} => {target}{reset}'
|
|
|
|
if args.colored != True:
|
|
|
|
print(strip_ansi(output_copy))
|
|
|
|
else:
|
|
print(output_copy)
|
|
else:
|
|
|
|
if args.verbose:
|
|
|
|
output_copy = f'{green}Copying {item.name} => {target}{reset}'
|
|
|
|
if args.colored != True:
|
|
|
|
print(strip_ansi(output_copy))
|
|
|
|
else:
|
|
|
|
print(output_copy)
|
|
|
|
shutil.copy2(item.resolve(), target, follow_symlinks=args.follow_symlinks)
|
|
|
|
|
|
elif item.suffix == '.py':
|
|
|
|
if args.replace_all != True:
|
|
|
|
if str(sourcepath_relative) in modified_files: # check if new/modified
|
|
|
|
if args.remove_all == True:
|
|
finalform = remove_special_comment_lines(item, prefix=python_normal)
|
|
|
|
if args.remove_all == False:
|
|
finalform = remove_special_comment_lines(item, prefix=python_prefix)
|
|
|
|
if args.dry_run != True:
|
|
|
|
with open(target, "w", encoding="utf-8") as f:
|
|
f.writelines(''.join(finalform))
|
|
|
|
else:
|
|
pass
|
|
|
|
if args.verbose:
|
|
output_writing = f'{white}Writing {item.name} => {target}{reset}'
|
|
|
|
if args.colored != True:
|
|
|
|
print(strip_ansi(output_writing))
|
|
|
|
else:
|
|
print(output_writing)
|
|
|
|
else:
|
|
|
|
# Remove all comments
|
|
if args.remove_all == True:
|
|
finalform = remove_special_comment_lines(item, prefix=python_normal)
|
|
|
|
if args.verbose:
|
|
|
|
output_commentremoval = f'{red}Removing all comments from {item.name} => {item}{reset}'
|
|
|
|
if args.colored != True:
|
|
print(strip_ansi(output_commentremoval))
|
|
else:
|
|
print(output_commentremoval)
|
|
|
|
if args.remove_all == False:
|
|
|
|
finalform = remove_special_comment_lines(item, prefix=python_prefix)
|
|
|
|
if args.verbose:
|
|
|
|
output_prefixremoval = f'{red}Removing prefixed comments from {item.name} => {item}{reset}'
|
|
|
|
if args.colored != True:
|
|
print(strip_ansi(output_prefixremoval))
|
|
|
|
else:
|
|
print(output_prefixremoval)
|
|
|
|
if args.dry_run != True:
|
|
|
|
with open(target, "w", encoding="utf-8") as f:
|
|
f.writelines(''.join(finalform))
|
|
|
|
else:
|
|
pass
|
|
|
|
if args.verbose:
|
|
|
|
output_writing = f'{white}Writing {item.name} => {target}{reset}'
|
|
|
|
if args.colored != True:
|
|
|
|
print(strip_ansi(output_writing))
|
|
|
|
else:
|
|
print(output_writing)
|
|
|
|
if args.show_prints:
|
|
printlines = find_print_statements(item)
|
|
|
|
if printlines != []:
|
|
|
|
outputpath = f'\n{yellow}print statements in {item}:{reset}\n'
|
|
|
|
if args.colored != True:
|
|
|
|
print(strip_ansi(outputpath))
|
|
|
|
else:
|
|
print(outputpath)
|
|
|
|
lastprint = printlines[-1:]
|
|
lastline = lastprint[0][0]
|
|
places = len(str(lastline))
|
|
|
|
for line in printlines:
|
|
|
|
numplaces = len(str(line[0]))
|
|
zeros = places - numplaces
|
|
|
|
linespacing = ' '
|
|
|
|
for p in range(zeros):
|
|
linespacing += ' '
|
|
|
|
outputprint = f'{orange}{linespacing + str(line[0])}:{reset} {blue}{line[1]}{reset}'
|
|
|
|
if args.colored != True:
|
|
|
|
print(strip_ansi(outputprint))
|
|
|
|
else:
|
|
print(outputprint)
|
|
|
|
print('')
|
|
|
|
if args.show_strings:
|
|
|
|
stringlines = find_strings(item)
|
|
|
|
if stringlines != []:
|
|
|
|
outputpath = f'\n{yellow}Strings in {item}:{reset}\n'
|
|
|
|
if args.colored != True:
|
|
print(strip_ansi(outputpath))
|
|
|
|
else:
|
|
|
|
print(outputpath)
|
|
|
|
laststring = stringlines[-1:]
|
|
lastline = laststring[0][0]
|
|
places = len(str(lastline))
|
|
|
|
for line in stringlines:
|
|
|
|
numplaces = len(str(line[0]))
|
|
zeros = places - numplaces
|
|
|
|
linespacing = ' '
|
|
|
|
for p in range(zeros):
|
|
linespacing += ' '
|
|
|
|
|
|
outputstring = f'{orange}{linespacing + str(line[0])}:{reset} {blue}{line[1]}{reset}'
|
|
|
|
if args.colored != True:
|
|
print(strip_ansi(outputstring))
|
|
else:
|
|
print(outputstring)
|
|
|
|
print('')
|
|
|
|
|
|
if item.suffix == '.md':
|
|
|
|
if args.replace_all != True:
|
|
|
|
|
|
if str(sourcepath_relative) in modified_files: # check if new/modified
|
|
|
|
markdownstring = import_file(item)
|
|
|
|
if args.remove_all != True:
|
|
|
|
# remove comments with prefix
|
|
finalform = re.sub(r'<!--¤.*?-->', '', markdownstring, flags=re.DOTALL)
|
|
|
|
else:
|
|
|
|
# Remove all HTML comments, including multiline ones
|
|
finalform = re.sub(r'<!--.*?-->', '', markdownstring, flags=re.DOTALL)
|
|
|
|
if args.dry_run != True:
|
|
|
|
with open(target, "w", encoding="utf-8") as f:
|
|
f.writelines(''.join(finalform))
|
|
else:
|
|
pass
|
|
|
|
if args.verbose:
|
|
|
|
output_writing = f'{white}Writing {item.name} => {target}{reset}'
|
|
|
|
if args.colored != True:
|
|
print(strip_ansi(output_writing))
|
|
else:
|
|
print(output_writing)
|
|
else:
|
|
|
|
markdownstring = import_file(item)
|
|
|
|
if args.remove_all != True:
|
|
|
|
finalform = re.sub(r'<!--¤.*?-->', '', markdownstring, flags=re.DOTALL)
|
|
|
|
else:
|
|
|
|
finalform = re.sub(r'<!--.*?-->', '', markdownstring, flags=re.DOTALL)
|
|
|
|
if args.dry_run != True:
|
|
|
|
with open(target, "w", encoding="utf-8") as f:
|
|
f.writelines(''.join(finalform))
|
|
|
|
else:
|
|
pass
|
|
|
|
if args.verbose:
|
|
|
|
output_writing = f'{white}Writing {item.name} => {target}{reset}'
|
|
|
|
if args.colored != True:
|
|
|
|
print(strip_ansi(output_writing))
|
|
|
|
else:
|
|
|
|
print(output_writing)
|
|
|
|
|
|
from random import choice
|
|
from info import logo, pink, green, yellow, red, orange, blue, white, reset, randomcolor
|
|
|
|
colors = [pink, green, yellow, red, orange, blue, white]
|
|
|
|
def randomcolor(colorlist):
|
|
return choice(colorlist)
|
|
|
|
|
|
title = f'draincleaner - Source file comment removal utility\n{logo}\n'
|
|
|
|
description_list = [
|
|
f'{randomcolor(colors)}Will ignore paths specified in file named ".ignorecomments" in source path.\n',
|
|
f'A directory ".draincleaner" is created in the source directory containing metadata.\n\n\n',
|
|
f'draincleaner can run without arguments, when doing so in the root path it will use the same source and target path as last run.\n\n\n{reset}'
|
|
]
|
|
|
|
description_string = ''.join(description_list)
|
|
|
|
parser = ArgumentParser(prog=title, description=description_string)
|
|
|
|
parser.add_argument('-v', '--verbose', action='store_true', help='Show verbose output')
|
|
parser.add_argument('-c', '--colored', action='store_true', help='Show colored output text')
|
|
parser.add_argument('-d', '--dry-run', action='store_true', help='Run utility without actually making any changes')
|
|
parser.add_argument('-r', '--remove-all', action='store_true', help='Remove all comments')
|
|
parser.add_argument('-a', '--replace-all', action='store_true', help='Replace everything in target path')
|
|
|
|
parser.add_argument('-p', '--show-prints', action='store_true', help='List all print statements')
|
|
parser.add_argument('-s', '--show-strings', action='store_true', help='List all strings')
|
|
|
|
parser.add_argument('-f', '--follow-symlinks', action='store_true', help='Walk symlinked paths')
|
|
|
|
parser.add_argument('sourcepath', type=Path)
|
|
parser.add_argument('targetpath', type=Path)
|
|
|
|
|
|
import json
|
|
|
|
def load_metadata():
|
|
|
|
if metadatapath.exists():
|
|
|
|
with open(metadata_filepath, "r") as f:
|
|
return json.load(f)
|
|
|
|
else:
|
|
return {"files": {}}
|
|
|
|
def save_metadata(data):
|
|
|
|
if metadatapath.exists() == False:
|
|
|
|
if arguments.verbose == True:
|
|
|
|
output_initmetadata = f'Initialize metadata storage: {metadatapath}'
|
|
|
|
if arguments.colored != True:
|
|
|
|
print(strip_ansi(output_initmetadata))
|
|
|
|
else:
|
|
print(output_initmetadata)
|
|
|
|
os.makedirs(metadatapath, exist_ok=False)
|
|
|
|
with metadata_filepath.open('w') as f:
|
|
|
|
json.dump(data, f, indent=4)
|
|
|
|
def build_snapshot(source_root):
|
|
|
|
snapshot = {}
|
|
|
|
for path in source_root.rglob('*', recurse_symlinks=arguments.follow_symlinks):
|
|
|
|
if path.is_file() and (metadatastore not in path.parts) and (ignorefile not in path.parts):
|
|
|
|
sourcepath_relative = path.relative_to(source_root)
|
|
|
|
if (spec.match_file(sourcepath_relative) == False):
|
|
|
|
|
|
rel = path.relative_to(sourcepath).as_posix()
|
|
stat = path.stat()
|
|
snapshot[rel] = {
|
|
"mtime_ns": stat.st_mtime_ns,
|
|
"size": stat.st_size
|
|
}
|
|
|
|
return snapshot
|
|
|
|
|
|
|
|
if len(sys.argv) == 1:
|
|
|
|
parser.print_help()
|
|
|
|
else:
|
|
|
|
arguments = parser.parse_args()
|
|
|
|
sourcepath = Path(arguments.sourcepath)
|
|
targetpath = Path(arguments.targetpath)
|
|
|
|
path_ignorefile = sourcepath.joinpath(ignorefile)
|
|
spec = pathspec.PathSpec.from_lines(
|
|
"gitwildmatch",
|
|
path_ignorefile.read_text().splitlines()
|
|
)
|
|
|
|
metadata = {}
|
|
metadatapath = sourcepath.joinpath(metadatastore)
|
|
metadata_filepath = metadatapath.joinpath(metadatafile)
|
|
|
|
|
|
metadata = load_metadata()
|
|
|
|
old_files = metadata.get('files', {})
|
|
new_files = build_snapshot(sourcepath)
|
|
|
|
modified_files = set()
|
|
|
|
for rel, info in new_files.items():
|
|
|
|
old_file = old_files.get(rel)
|
|
|
|
# new file
|
|
if old_files.get(rel) == None:
|
|
|
|
output_new = f'{green}NEW FILE: {Path(rel).name} in {rel}{reset}'
|
|
|
|
if arguments.colored != True:
|
|
print(strip_ansi(output_new))
|
|
else:
|
|
print(output_new)
|
|
|
|
modified_files.add(rel)
|
|
|
|
else:
|
|
|
|
# modified file
|
|
if info['mtime_ns'] != old_file['mtime_ns']:
|
|
|
|
output_modified = f'{yellow}MODIFIED: {Path(rel).name} in {rel}{reset}'
|
|
|
|
if arguments.colored != True:
|
|
print(strip_ansi(output_modified))
|
|
else:
|
|
print(output_modified)
|
|
|
|
modified_files.add(rel)
|
|
|
|
# check removed paths
|
|
for item in old_files:
|
|
if item not in new_files:
|
|
|
|
|
|
output_removed = f'{red}REMOVED: {Path(item).name} in {item}{reset}'
|
|
|
|
if arguments.colored != True:
|
|
print(strip_ansi(output_removed))
|
|
else:
|
|
print(output_removed)
|
|
|
|
|
|
metadata['files'] = new_files
|
|
|
|
if arguments.dry_run != True:
|
|
save_metadata(metadata)
|
|
|
|
main(arguments)
|
|
|
|
|