draincleaner/draincleaner.py

#!/usr/bin/env python3
from argparse import ArgumentParser
from pathlib import Path
import pathspec # use this for git style ignore
import sys
import os
import shutil
import re

metadatastore = '.draincleaner'
metadatafile = 'draincleaner.json'
ignorefile = '.ignorecomments'


def import_file(file):
	openfile = open(file, "r")
	openfile = openfile.read()
	return openfile

import tokenize
from io import StringIO

def remove_special_comment_lines(path, prefix="# ¤"):

	# Read file with proper encoding
	with tokenize.open(path) as f:
		code = f.read()

	lines = code.splitlines(keepends=True)

	tokens = list(tokenize.generate_tokens(iter(lines).__next__))
	lines_to_skip = set()
	comment_positions = []

	# Detect lines that are ONLY the prefixed comment
	for tok in tokens:
		if tok.type == tokenize.COMMENT and tok.string.startswith(prefix):
			start_line, start_col = tok.start
			end_line, end_col = tok.end
			line_text = lines[start_line - 1]
			if line_text[:start_col].strip() == "":  # nothing before comment
				lines_to_skip.add(start_line)
			else:
				comment_positions.append((start_line, start_col, end_col))

	# Remove trailing prefixed comments from code
	new_lines = []
	for i, line in enumerate(lines, start=1):
		if i in lines_to_skip:
			continue  # remove whole line
		# Remove trailing prefixed comment if it exists
		for start_line, start_col, end_col in comment_positions:
			if start_line == i:
				line = line[:start_col].rstrip() + "\n"
		new_lines.append(line)

	# Write back to file
	return "".join(new_lines)

def find_print_statements(filepath):
	"""
	Scans a Python source file and returns all print() statements
	along with their line numbers.

	Args:
		filepath: Path to the Python source file.

	Returns:
		A list of PrintStatement named tuples with line_number and source_line.
	"""
	results = []

	with open(filepath, "rb") as f:
		tokens = list(tokenize.tokenize(f.readline))

	for i, tok in enumerate(tokens):
		# Look for NAME tokens with value "print"
		if tok.type == tokenize.NAME and tok.string == "print":
			# Confirm it's followed by an OP token "(" (i.e., a call, not a variable named print)
			next_tok = tokens[i + 1] if i + 1 < len(tokens) else None
			if next_tok and next_tok.type == tokenize.OP and next_tok.string == "(":
				line_number = tok.start[0]
				source_line = tok.line.strip()
				# results.append(PrintStatement(line_number, source_line))
				results.append((line_number, source_line))

	return results

def find_strings(filepath):
	"""
	Scans a Python source file and returns all string literals
	along with their line numbers.

	Args:
		filepath: Path to the Python source file.

	Returns:
		A list of tuples with (line_number, string_value).
	"""
	results = []

	with open(filepath, "rb") as f:
		tokens = list(tokenize.tokenize(f.readline))

	for tok in tokens:
		if tok.type == tokenize.STRING:
			line_number = tok.start[0]
			string_value = tok.string
			results.append((line_number, string_value))

	return results

def strip_ansi(text):
	return re.sub(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])', '', text)

def main(args):

	python_normal = '# '
	python_prefix = '# ¤'

	sourcepath = Path(args.sourcepath)
	targetpath = Path(args.targetpath)

	pathobjects = sourcepath.glob('**/*', recurse_symlinks=args.follow_symlinks)

	for item in pathobjects:

		sourcepath_relative = item.relative_to(sourcepath)
		target = targetpath.joinpath(sourcepath_relative)

		if any(part in metadatastore for part in item.parts) == False:

			if (spec.match_file(sourcepath_relative) == False):

				if item.is_dir(follow_symlinks=args.follow_symlinks) == True:

					if target.exists() == False:
						if args.verbose:

							output_gendir = f'{target} does not exist, creating...'

							if args.colored != True:
								print(strip_ansi(output_gendir))
							else:
								print(output_gendir)

						if args.dry_run != True:

							os.makedirs(target, exist_ok=True)

				if item.is_file() == True: # item is a file

					if (item.suffix != '.py') and (item.suffix != '.md'):

						if args.replace_all != True:

							if str(sourcepath_relative) in modified_files:
								shutil.copy2(item.resolve(), target, follow_symlinks=args.follow_symlinks)
								print(item.resolve())

								if args.verbose:

									output_copy = f'{green}Copying{reset} {item.name}  =>  {target}{reset}'

									if args.colored != True:

										print(strip_ansi(output_copy))

									else:
										print(output_copy)
						else:

							if args.verbose:

								output_copy = f'{green}Copying {item.name}  =>  {target}{reset}'

								if args.colored != True:

									print(strip_ansi(output_copy))

								else:

									print(output_copy)

							shutil.copy2(item.resolve(), target, follow_symlinks=args.follow_symlinks)


					elif item.suffix == '.py':

						if args.replace_all != True:

							if str(sourcepath_relative) in modified_files: # check if new/modified

								if args.remove_all == True:
									finalform = remove_special_comment_lines(item, prefix=python_normal)

								if args.remove_all == False:
									finalform = remove_special_comment_lines(item, prefix=python_prefix)

								if args.dry_run != True:

									with open(target, "w", encoding="utf-8") as f:
										f.writelines(''.join(finalform))

								else:
									pass

								if args.verbose:
									output_writing = f'{white}Writing {item.name}  =>  {target}{reset}'

									if args.colored != True:

										print(strip_ansi(output_writing))

									else:
										print(output_writing)

						else:

							# Remove all comments
							if args.remove_all == True:
								finalform = remove_special_comment_lines(item, prefix=python_normal)

								if args.verbose:

									output_commentremoval = f'{red}Removing all comments from {item.name}  => {item}{reset}'

									if args.colored != True:
										print(strip_ansi(output_commentremoval))
									else:
										print(output_commentremoval)

							if args.remove_all == False:

								finalform = remove_special_comment_lines(item, prefix=python_prefix)

								if args.verbose:

									output_prefixremoval = f'{red}Removing prefixed comments from {item.name}  => {item}{reset}'

									if args.colored != True:
										print(strip_ansi(output_prefixremoval))

									else:
										print(output_prefixremoval)

							if args.dry_run != True:

								with open(target, "w", encoding="utf-8") as f:
									f.writelines(''.join(finalform))

							else:
								pass

							if args.verbose:

									output_writing = f'{white}Writing {item.name}  =>  {target}{reset}'

									if args.colored != True:

										print(strip_ansi(output_writing))

									else:
										print(output_writing)

						if args.show_prints:
							printlines = find_print_statements(item)

							if printlines != []:

								outputpath = f'\n{yellow}print statements in {item}:{reset}\n'

								if args.colored != True:

									print(strip_ansi(outputpath))

								else:
									print(outputpath)

								lastprint = printlines[-1:]
								lastline = lastprint[0][0]
								places = len(str(lastline))

								for line in printlines:

									numplaces = len(str(line[0]))
									zeros = places - numplaces

									linespacing = ' '

									for p in range(zeros):
										linespacing += ' '

									outputprint = f'{orange}{linespacing + str(line[0])}:{reset} {blue}{line[1]}{reset}'

									if args.colored != True:

										print(strip_ansi(outputprint))

									else:
										print(outputprint)

								print('')

						if args.show_strings:

							stringlines = find_strings(item)

							if stringlines != []:

								outputpath = f'\n{yellow}Strings in {item}:{reset}\n'

								if args.colored != True:
									print(strip_ansi(outputpath))

								else:

									print(outputpath)

								laststring = stringlines[-1:]
								lastline = laststring[0][0]
								places = len(str(lastline))

								for line in stringlines:

									numplaces = len(str(line[0]))
									zeros = places - numplaces

									linespacing = ' '

									for p in range(zeros):
										linespacing += ' '


									outputstring = f'{orange}{linespacing + str(line[0])}:{reset} {blue}{line[1]}{reset}'

									if args.colored != True:
										print(strip_ansi(outputstring))
									else:
										print(outputstring)

								print('')


					if item.suffix == '.md':

						if args.replace_all != True:


							if str(sourcepath_relative) in modified_files: # check if new/modified

								markdownstring = import_file(item)

								if args.remove_all != True:

									# remove comments with prefix
									finalform = re.sub(r'<!--¤.*?-->', '', markdownstring, flags=re.DOTALL)

								else:

									# Remove all HTML comments, including multiline ones
									finalform = re.sub(r'<!--.*?-->', '', markdownstring, flags=re.DOTALL)

								if args.dry_run != True:

									with open(target, "w", encoding="utf-8") as f:
										f.writelines(''.join(finalform))
								else:
									pass

								if args.verbose:

									output_writing = f'{white}Writing {item.name}  =>  {target}{reset}'

									if args.colored != True:
										print(strip_ansi(output_writing))
									else:
										print(output_writing)
						else:

							markdownstring = import_file(item)

							if args.remove_all != True:

								finalform = re.sub(r'<!--¤.*?-->', '', markdownstring, flags=re.DOTALL)

							else:

								finalform = re.sub(r'<!--.*?-->', '', markdownstring, flags=re.DOTALL)

							if args.dry_run != True:

								with open(target, "w", encoding="utf-8") as f:
									f.writelines(''.join(finalform))

							else:
								pass

							if args.verbose:

								output_writing = f'{white}Writing {item.name}  =>  {target}{reset}'

								if args.colored != True:

									print(strip_ansi(output_writing))

								else:

									print(output_writing)


from random import choice
from info import logo, pink, green, yellow, red, orange, blue, white, reset, randomcolor

colors = [pink, green, yellow, red, orange, blue, white]

def randomcolor(colorlist):
	return choice(colorlist)


title = f'draincleaner - Source file comment removal utility\n{logo}\n'

description_list = [
	f'{randomcolor(colors)}Will ignore paths specified in file named ".ignorecomments" in source path.\n',
	f'A directory ".draincleaner" is created in the source directory containing metadata.\n\n\n',
	f'draincleaner can run without arguments, when doing so in the root path it will use the same source and target path as last run.\n\n\n{reset}'
]

description_string = ''.join(description_list)

parser = ArgumentParser(prog=title, description=description_string)

parser.add_argument('-v', '--verbose', action='store_true', help='Show verbose output')
parser.add_argument('-c', '--colored', action='store_true', help='Show colored output text')
parser.add_argument('-d', '--dry-run', action='store_true', help='Run utility without actually making any changes')
parser.add_argument('-r', '--remove-all', action='store_true', help='Remove all comments')
parser.add_argument('-a', '--replace-all', action='store_true', help='Replace everything in target path')

parser.add_argument('-p', '--show-prints', action='store_true', help='List all print statements')
parser.add_argument('-s', '--show-strings', action='store_true', help='List all strings')

parser.add_argument('-f', '--follow-symlinks', action='store_true', help='Walk symlinked paths')

parser.add_argument('sourcepath', type=Path)
parser.add_argument('targetpath', type=Path)


import json

def load_metadata():

	if metadatapath.exists():

		with open(metadata_filepath, "r") as f:
			return json.load(f)

	else:
		return {"files": {}}

def save_metadata(data):

	if metadatapath.exists() == False:

		if arguments.verbose == True:

			output_initmetadata = f'Initialize metadata storage: {metadatapath}'

			if arguments.colored != True:

				print(strip_ansi(output_initmetadata))

			else:
				print(output_initmetadata)

		os.makedirs(metadatapath, exist_ok=False)

	with metadata_filepath.open('w') as f:

		json.dump(data, f, indent=4)

def build_snapshot(source_root):

	snapshot = {}

	for path in source_root.rglob('*', recurse_symlinks=arguments.follow_symlinks):

		if path.is_file() and (metadatastore not in path.parts) and (ignorefile not in path.parts):

			sourcepath_relative = path.relative_to(source_root)

			if (spec.match_file(sourcepath_relative) == False):


				rel = path.relative_to(sourcepath).as_posix()
				stat = path.stat()
				snapshot[rel] = {
					"mtime_ns": stat.st_mtime_ns,
					"size": stat.st_size
				}

	return snapshot


if len(sys.argv) == 1:

	parser.print_help()

else:

	arguments = parser.parse_args()

	sourcepath = Path(arguments.sourcepath)
	targetpath = Path(arguments.targetpath)

	path_ignorefile = sourcepath.joinpath(ignorefile)
	spec = pathspec.PathSpec.from_lines(
		"gitwildmatch",
		path_ignorefile.read_text().splitlines()
	)

	metadata = {}
	metadatapath = sourcepath.joinpath(metadatastore)
	metadata_filepath = metadatapath.joinpath(metadatafile)


	metadata = load_metadata()

	old_files = metadata.get('files', {})
	new_files = build_snapshot(sourcepath)

	modified_files = set()

	for rel, info in new_files.items():

		old_file = old_files.get(rel)

		# new file
		if old_files.get(rel) == None:

			output_new = f'{green}NEW FILE: {Path(rel).name} in {rel}{reset}'

			if arguments.colored != True:
				print(strip_ansi(output_new))
			else:
				print(output_new)

			modified_files.add(rel)

		else:

			# modified file
			if info['mtime_ns'] != old_file['mtime_ns']:

				output_modified = f'{yellow}MODIFIED: {Path(rel).name} in {rel}{reset}'

				if arguments.colored != True:
					print(strip_ansi(output_modified))
				else:
					print(output_modified)

				modified_files.add(rel)

	# check removed paths
	for item in old_files:
		if item not in new_files:


			output_removed = f'{red}REMOVED: {Path(item).name} in {item}{reset}'

			if arguments.colored != True:
				print(strip_ansi(output_removed))
			else:
				print(output_removed)


	metadata['files'] = new_files

	if arguments.dry_run != True:
		save_metadata(metadata)

	main(arguments)