diff --git a/README.md b/README.md new file mode 100644 index 0000000..5a5eec5 --- /dev/null +++ b/README.md @@ -0,0 +1,31 @@ +# draincleaner + +Remove comments from source files. + +**Disclaimer: This software is not extensively tested so should only be used used for testing right now** + +## Installation and use + +This is how I would install and use this, I haven't spent any time to package it properly. + +clone the repo to wherever you want it: +```bash +$ git clone +``` + +Make the draincleaner.py executable +```bash +$ chmod +x draincleaner.py +``` + +In your .bashrc file add the following, but change path to your draincleaner directory: +```bash +draincleaner() { + /path/to/draincleaner.py "$@" +} +``` + +Done. Now you can run it anywhere by typing: +``` +$ draincleaner +``` \ No newline at end of file diff --git a/draincleaner.py b/draincleaner.py new file mode 100644 index 0000000..9ad63c9 --- /dev/null +++ b/draincleaner.py @@ -0,0 +1,585 @@ +#!/usr/bin/env python3 + +from argparse import ArgumentParser +from pathlib import Path +import pathspec # use this for git style ignore +import sys +import os +import shutil +import re + +metadatastore = '.draincleaner' +metadatafile = 'draincleaner.json' +ignorefile = '.ignorecomments' + + +def import_file(file): + openfile = open(file, "r") + openfile = openfile.read() + return openfile + +import tokenize +from io import StringIO + +def remove_special_comment_lines(path, prefix="# ¤"): + + # Read file with proper encoding + with tokenize.open(path) as f: + code = f.read() + + lines = code.splitlines(keepends=True) + + tokens = list(tokenize.generate_tokens(iter(lines).__next__)) + lines_to_skip = set() + comment_positions = [] + + # Detect lines that are ONLY the prefixed comment + for tok in tokens: + if tok.type == tokenize.COMMENT and tok.string.startswith(prefix): + start_line, start_col = tok.start + end_line, end_col = tok.end + line_text = lines[start_line - 1] + if line_text[:start_col].strip() == "": # nothing before comment + lines_to_skip.add(start_line) + else: + comment_positions.append((start_line, start_col, end_col)) + + # Remove trailing prefixed comments from code + new_lines = [] + for i, line in enumerate(lines, start=1): + if i in lines_to_skip: + continue # remove whole line + # Remove trailing prefixed comment if it exists + for start_line, start_col, end_col in comment_positions: + if start_line == i: + line = line[:start_col].rstrip() + "\n" + new_lines.append(line) + + # Write back to file + return "".join(new_lines) + +def find_print_statements(filepath): + """ + Scans a Python source file and returns all print() statements + along with their line numbers. + + Args: + filepath: Path to the Python source file. + + Returns: + A list of PrintStatement named tuples with line_number and source_line. + """ + results = [] + + with open(filepath, "rb") as f: + tokens = list(tokenize.tokenize(f.readline)) + + for i, tok in enumerate(tokens): + # Look for NAME tokens with value "print" + if tok.type == tokenize.NAME and tok.string == "print": + # Confirm it's followed by an OP token "(" (i.e., a call, not a variable named print) + next_tok = tokens[i + 1] if i + 1 < len(tokens) else None + if next_tok and next_tok.type == tokenize.OP and next_tok.string == "(": + line_number = tok.start[0] + source_line = tok.line.strip() + # results.append(PrintStatement(line_number, source_line)) + results.append((line_number, source_line)) + + return results + +def find_strings(filepath): + """ + Scans a Python source file and returns all string literals + along with their line numbers. + + Args: + filepath: Path to the Python source file. + + Returns: + A list of tuples with (line_number, string_value). + """ + results = [] + + with open(filepath, "rb") as f: + tokens = list(tokenize.tokenize(f.readline)) + + for tok in tokens: + if tok.type == tokenize.STRING: + line_number = tok.start[0] + string_value = tok.string + results.append((line_number, string_value)) + + return results + +def strip_ansi(text): + return re.sub(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])', '', text) + +def main(args): + + python_normal = '# ' + python_prefix = '# ¤' + + sourcepath = Path(args.sourcepath) + targetpath = Path(args.targetpath) + + pathobjects = sourcepath.glob('**/*', recurse_symlinks=args.follow_symlinks) + + for item in pathobjects: + + sourcepath_relative = item.relative_to(sourcepath) + target = targetpath.joinpath(sourcepath_relative) + + if any(part in metadatastore for part in item.parts) == False: + + if (spec.match_file(sourcepath_relative) == False): + + if item.is_dir(follow_symlinks=args.follow_symlinks) == True: + + if target.exists() == False: + if args.verbose: + + output_gendir = f'{target} does not exist, creating...' + + if args.colored != True: + print(strip_ansi(output_gendir)) + else: + print(output_gendir) + + if args.dry_run != True: + + os.makedirs(target, exist_ok=True) + + if item.is_file() == True: # item is a file + + if (item.suffix != '.py') and (item.suffix != '.md'): + + if args.replace_all != True: + + if str(sourcepath_relative) in modified_files: + shutil.copy2(item.resolve(), target, follow_symlinks=args.follow_symlinks) + print(item.resolve()) + + if args.verbose: + + output_copy = f'{green}Copying{reset} {item.name} => {target}{reset}' + + if args.colored != True: + + print(strip_ansi(output_copy)) + + else: + print(output_copy) + else: + + if args.verbose: + + output_copy = f'{green}Copying {item.name} => {target}{reset}' + + if args.colored != True: + + print(strip_ansi(output_copy)) + + else: + + print(output_copy) + + shutil.copy2(item.resolve(), target, follow_symlinks=args.follow_symlinks) + + + elif item.suffix == '.py': + + if args.replace_all != True: + + if str(sourcepath_relative) in modified_files: # check if new/modified + + if args.remove_all == True: + finalform = remove_special_comment_lines(item, prefix=python_normal) + + if args.remove_all == False: + finalform = remove_special_comment_lines(item, prefix=python_prefix) + + if args.dry_run != True: + + with open(target, "w", encoding="utf-8") as f: + f.writelines(''.join(finalform)) + + else: + pass + + if args.verbose: + output_writing = f'{white}Writing {item.name} => {target}{reset}' + + if args.colored != True: + + print(strip_ansi(output_writing)) + + else: + print(output_writing) + + else: + + # Remove all comments + if args.remove_all == True: + finalform = remove_special_comment_lines(item, prefix=python_normal) + + if args.verbose: + + output_commentremoval = f'{red}Removing all comments from {item.name} => {item}{reset}' + + if args.colored != True: + print(strip_ansi(output_commentremoval)) + else: + print(output_commentremoval) + + if args.remove_all == False: + + finalform = remove_special_comment_lines(item, prefix=python_prefix) + + if args.verbose: + + output_prefixremoval = f'{red}Removing prefixed comments from {item.name} => {item}{reset}' + + if args.colored != True: + print(strip_ansi(output_prefixremoval)) + + else: + print(output_prefixremoval) + + if args.dry_run != True: + + with open(target, "w", encoding="utf-8") as f: + f.writelines(''.join(finalform)) + + else: + pass + + if args.verbose: + + output_writing = f'{white}Writing {item.name} => {target}{reset}' + + if args.colored != True: + + print(strip_ansi(output_writing)) + + else: + print(output_writing) + + if args.show_prints: + printlines = find_print_statements(item) + + if printlines != []: + + outputpath = f'\n{yellow}print statements in {item}:{reset}\n' + + if args.colored != True: + + print(strip_ansi(outputpath)) + + else: + print(outputpath) + + lastprint = printlines[-1:] + lastline = lastprint[0][0] + places = len(str(lastline)) + + for line in printlines: + + numplaces = len(str(line[0])) + zeros = places - numplaces + + linespacing = ' ' + + for p in range(zeros): + linespacing += ' ' + + outputprint = f'{orange}{linespacing + str(line[0])}:{reset} {blue}{line[1]}{reset}' + + if args.colored != True: + + print(strip_ansi(outputprint)) + + else: + print(outputprint) + + print('') + + if args.show_strings: + + stringlines = find_strings(item) + + if stringlines != []: + + outputpath = f'\n{yellow}Strings in {item}:{reset}\n' + + if args.colored != True: + print(strip_ansi(outputpath)) + + else: + + print(outputpath) + + laststring = stringlines[-1:] + lastline = laststring[0][0] + places = len(str(lastline)) + + for line in stringlines: + + numplaces = len(str(line[0])) + zeros = places - numplaces + + linespacing = ' ' + + for p in range(zeros): + linespacing += ' ' + + + outputstring = f'{orange}{linespacing + str(line[0])}:{reset} {blue}{line[1]}{reset}' + + if args.colored != True: + print(strip_ansi(outputstring)) + else: + print(outputstring) + + print('') + + + if item.suffix == '.md': + + if args.replace_all != True: + + + if str(sourcepath_relative) in modified_files: # check if new/modified + + markdownstring = import_file(item) + + if args.remove_all != True: + + # remove comments with prefix + finalform = re.sub(r'', '', markdownstring, flags=re.DOTALL) + + else: + + # Remove all HTML comments, including multiline ones + finalform = re.sub(r'', '', markdownstring, flags=re.DOTALL) + + if args.dry_run != True: + + with open(target, "w", encoding="utf-8") as f: + f.writelines(''.join(finalform)) + else: + pass + + if args.verbose: + + output_writing = f'{white}Writing {item.name} => {target}{reset}' + + if args.colored != True: + print(strip_ansi(output_writing)) + else: + print(output_writing) + else: + + markdownstring = import_file(item) + + if args.remove_all != True: + + finalform = re.sub(r'', '', markdownstring, flags=re.DOTALL) + + else: + + finalform = re.sub(r'', '', markdownstring, flags=re.DOTALL) + + if args.dry_run != True: + + with open(target, "w", encoding="utf-8") as f: + f.writelines(''.join(finalform)) + + else: + pass + + if args.verbose: + + output_writing = f'{white}Writing {item.name} => {target}{reset}' + + if args.colored != True: + + print(strip_ansi(output_writing)) + + else: + + print(output_writing) + + +from random import choice +from info import logo, pink, green, yellow, red, orange, blue, white, reset, randomcolor + +colors = [pink, green, yellow, red, orange, blue, white] + +def randomcolor(colorlist): + return choice(colorlist) + + +title = f'draincleaner - Source file comment removal utility\n{logo}\n' + +description_list = [ + f'{randomcolor(colors)}Will ignore paths specified in file named ".ignorecomments" in source path.\n', + f'A directory ".draincleaner" is created in the source directory containing metadata.\n\n\n', + f'draincleaner can run without arguments, when doing so in the root path it will use the same source and target path as last run.\n\n\n{reset}' +] + +description_string = ''.join(description_list) + +parser = ArgumentParser(prog=title, description=description_string) + +parser.add_argument('-v', '--verbose', action='store_true', help='Show verbose output') +parser.add_argument('-c', '--colored', action='store_true', help='Show colored output text') +parser.add_argument('-d', '--dry-run', action='store_true', help='Run utility without actually making any changes') +parser.add_argument('-r', '--remove-all', action='store_true', help='Remove all comments') +parser.add_argument('-a', '--replace-all', action='store_true', help='Replace everything in target path') + +parser.add_argument('-p', '--show-prints', action='store_true', help='List all print statements') +parser.add_argument('-s', '--show-strings', action='store_true', help='List all strings') + +parser.add_argument('-f', '--follow-symlinks', action='store_true', help='Walk symlinked paths') + +parser.add_argument('sourcepath', type=Path) +parser.add_argument('targetpath', type=Path) + + +import json + +def load_metadata(): + + if metadatapath.exists(): + + with open(metadata_filepath, "r") as f: + return json.load(f) + + else: + return {"files": {}} + +def save_metadata(data): + + if metadatapath.exists() == False: + + if arguments.verbose == True: + + output_initmetadata = f'Initialize metadata storage: {metadatapath}' + + if arguments.colored != True: + + print(strip_ansi(output_initmetadata)) + + else: + print(output_initmetadata) + + os.makedirs(metadatapath, exist_ok=False) + + with metadata_filepath.open('w') as f: + + json.dump(data, f, indent=4) + +def build_snapshot(source_root): + + snapshot = {} + + for path in source_root.rglob('*', recurse_symlinks=arguments.follow_symlinks): + + if path.is_file() and (metadatastore not in path.parts) and (ignorefile not in path.parts): + + sourcepath_relative = path.relative_to(source_root) + + if (spec.match_file(sourcepath_relative) == False): + + + rel = path.relative_to(sourcepath).as_posix() + stat = path.stat() + snapshot[rel] = { + "mtime_ns": stat.st_mtime_ns, + "size": stat.st_size + } + + return snapshot + + + +if len(sys.argv) == 1: + + parser.print_help() + +else: + + arguments = parser.parse_args() + + sourcepath = Path(arguments.sourcepath) + targetpath = Path(arguments.targetpath) + + path_ignorefile = sourcepath.joinpath(ignorefile) + spec = pathspec.PathSpec.from_lines( + "gitwildmatch", + path_ignorefile.read_text().splitlines() + ) + + metadata = {} + metadatapath = sourcepath.joinpath(metadatastore) + metadata_filepath = metadatapath.joinpath(metadatafile) + + + metadata = load_metadata() + + old_files = metadata.get('files', {}) + new_files = build_snapshot(sourcepath) + + modified_files = set() + + for rel, info in new_files.items(): + + old_file = old_files.get(rel) + + # new file + if old_files.get(rel) == None: + + output_new = f'{green}NEW FILE: {Path(rel).name} in {rel}{reset}' + + if arguments.colored != True: + print(strip_ansi(output_new)) + else: + print(output_new) + + modified_files.add(rel) + + else: + + # modified file + if info['mtime_ns'] != old_file['mtime_ns']: + + output_modified = f'{yellow}MODIFIED: {Path(rel).name} in {rel}{reset}' + + if arguments.colored != True: + print(strip_ansi(output_modified)) + else: + print(output_modified) + + modified_files.add(rel) + + # check removed paths + for item in old_files: + if item not in new_files: + + + output_removed = f'{red}REMOVED: {Path(item).name} in {item}{reset}' + + if arguments.colored != True: + print(strip_ansi(output_removed)) + else: + print(output_removed) + + + metadata['files'] = new_files + + if arguments.dry_run != True: + save_metadata(metadata) + + main(arguments) + + diff --git a/info.py b/info.py new file mode 100644 index 0000000..234166a --- /dev/null +++ b/info.py @@ -0,0 +1,108 @@ +from random import choice + +pink = "\x1b[38;2;255;105;180m" # hot pink +green = "\x1b[38;2;0;255;0m" +yellow = "\x1b[38;2;255;255;0m" +red = "\x1b[38;2;255;0;0m" +orange = "\x1b[38;2;255;130;0m" +blue = "\x1b[38;2;40;160;255m" +white = "\x1b[38;2;255;255;255m" +reset = "\x1b[0m" + +def randomcolor(): + return choice([pink, green, yellow, red, orange, blue]) + +logo0 = f"""{randomcolor()} + ▎▎ + ▊█▉▊▍▍▍▎▏▍▋▁▂▋ + ▏▃█▋▋▅▇████▆▉▎ + ▏▅██▄▂▄▆▇▇██▅▍ + ▏▅██████▋▉▁▉▊▃▊ + ▏▃████▄▃▆▄▅▅▍▏▅▂ + ▏▍▎▎ ▎▄████▅▅▉▊▉▉▏▎▊▉▌ ▏ + ▂▏▎▎▍ ▏▅█████▅▊▌▏▏▎▍▍▋▎ ▏▏▏▏ + ▅██▇▎ ▏▃██████▁▉▉▌ ▏▎▎▌ ▏ + ▅▇▃▅▏ ▁███████▃▁▋▏ ▍▏▏▌▁▂▁▁▁▁▊ + ▂███▋ ▊███████▆▄▇▇▄▍▎▎▏▊▂▆▇▅▆██▄▂█▄▏ + ▍███▅ ▍████████████▊ ▎▋▃███▆▇█▇▌▌█▆▍ + ▁███▁▏ ▎▆████████████▄▌▎ ▏▎▄███▆▆██▅███▌ + ▃███▄▍ ▏▄█████████▇▉▏ ▁█▅▁▃▅█████▇██████▁ + ▉▇███▅▁▄████████▃▁▆▃▊ ▊████████████████▊ + ▎▉▇███████████▆▃▃▋▏▏ ▊▆████████▇▁▋▄▇▂▌ + ▏▋▅██████████▇▉▎▏ ▎▎▏▃█████▃▌ ▋▂▌▎ + ▍███████▆▅▇▆▋▎▏ ▏▎ ▌▉▉▊▏ ▇▌▌ + ▎██████▇▂▋▉▅▊ ▍▋ ▂▄▋ + ▅██████▆▉ ▋▏ ▋▃▃▉ ▋█▋▎ + ▍██████▂▌▏▋▌▂▇▂▁▆▍ ▏▎▌█▂▋▍▍▍▎▍▏▏ + ▏█████▄▉▂▇███▅▃▃▍ ▏▎▎▎▎▎▍▌▋▆▁▁▉▊▊▋▋▌▍▎▎▎▎▎▏ + ▁███▇▆▆▄▅██▆▇▂▋▏ ▏▎▎▏▎▉▃▆████▇▆▃███████▇▅▂▋▏▎▎▎ + ▊██▅▅▂▍ ▎▃█▄▋▋▎▏ ▍▎ ▌▅█████████▇▉████████████▁▏▏▍▏ + ▊▊▍▏▍▎ ▍▂▎ ▏▎▎▍ ▏▍ ▍████████████▋█████████████▃ ▌ + ▍▎▏ ▎▎▍▏ ▏▎▎▍▎▌▌ ▏▍ ▏▅█████████▇█▃▅████████████▊ ▏▍ + ▍▍▎▍▎▋▎ ▎▍▏▏▊▅█████▆▌▏▆▂▋▎▎▂▆▅▇███▇▂▎ ▎▍ + ▏▏▏ ▎▎▎▎▏▋▉▃▃▃▃▃▊▌▊▁▂▁▂▃▂▁▊▍▏▎▎▎▏ + ▏▎▎▎▎▎▎▎▎▎▎▎▎▎▎▎▎▍▎▎▎▏ + ▏▏▏▏▏▏ + {reset}{randomcolor()}A basic tool for making your sources presentable{reset}""" + +logo1 = f""" + ,--, + ,---, ,--, ,--.'| + ,---.'| __ ,-. ,--.'| ,---, | | : ,---, __ ,-. + | | :,' ,'/ /| | |, ,-+-. / | : : ' ,-+-. / | ,' ,'/ /| + | | |' | |' | ,--.--. `--'_ ,--.'|' | ,---. | ' | ,---. ,--.--. ,--.'|' | ,---. ' | |' | + ,--.__| || | ,'/ \ ,' ,'| | | ,"' | / \ ' | | / \ / \ | | ,"' | / \ | | ,' + / ,' |' : / .--. .-. | ' | | | | / | | / / ' | | : / / | .--. .-. | | | / | | / / |' : / +. ' / || | ' \__\/: . . | | : | | | | |. ' / ' : |__ . ' / | \__\/: . . | | | | |. ' / || | ' +' ; |: |; : | ," .--.; | ' : |__ | | | |/ ' ; :__ | | '.'|' ; /| ," .--.; | | | | |/ ' ; /|; : | +| | '/ '| , ; / / ,. | | | '.'|| | |--' ' | '.'|; : ;' | / | / / ,. | | | |--' ' | / || , ; +| : :| ---' ; : .' \; : ;| |/ | : :| , / | : |; : .' \| |/ | : | ---' + \ \ / | , .-./| , / '---' \ \ / ---`-' \ \ / | , .-./'---' \ \ / + `----' `--`---' ---`-' `----' `----' `--`---' `----' + +""" + +logo2 = f"""{randomcolor()} + _______ ________ ________ ________ ________ ________ _______ ________ ________ ________ ________ ________ + _/ \/ \/ \ / \/ / \/ \/ \ / \/ \/ / \/ \/ \ + / / / /_/ // / / // / / / / / +/ / _/ // / / --/ // _/ / / _/ _/ +\________/\____/___/\___/____/ \________/\__/_____/\________/\________/\________/\___/____/\__/_____/\________/\____/___/ +""" + +logo3 = f"""{randomcolor()} + + ▎▌▌▎▏▏ ▏▌▉▊▎▏ + ▊▉▁▌▏ ▎▍▍▍▍▎▏ ▆████▇▅▁▍ + ▊▋▃█▁ ▎▏ ▎▉▍▍▍▉▆███▉ + ▆▎▇▂▎▌▊▍▏ ▏▍▍▏ ▎▋▍▋▄▉▏▇████▏ + ▊▂▍▏▁████▅▂▋▎▏ ▏ ▏▌▌▊▆▌ ▌▆▃▆█▅▎ + ▏▃▅▇▇▇▆██▄██▂▆▃▉▊▌▎▌▁▋ ▏▋▋▁▅▂▏ + ▁▏▃▁▎▌▁▂▄▇▃▃▁▏ ▎▊▁▂▂▂▃▉█▁▏ + ▊ ▊▊ ▍▍▋▋▋▊▊▂▊▆▉▏▏ ▏▍▉▊▌ + ▊ ▏▊▉▋▁▌▊▉▃▆▃ ▏▎▁▋▂▎ ▏▎ ▉▏▏▋▊▌▎ + ▉▎ ▎▂▎▂▉▏▍▏▄▋ ▏▉▋▁▊▌▍▂▄▅▉▉ ▍▉▍ + ▎▌▉▄▅▆▇▄▃▃▃▇▁ ▊ ▏▎ ▁▍ + ▃▆ ▌▏ ▋▂ ▂ + ▎█▋ ▉ ▏▄██▆▊▎▏ ▎▂ + ▇▇▊▏ ▉▋ ▎▊██▃ ▄▂▊▊▌▌▂ + ▄█▁▏▏▎▉▄▋▍▁▉▎▁███▃▎▊▏ ▊▆▊ + ▌██▇▄▇▄▆█▇▃▄████▇▊▊▏ ▏▌▌ + ▎████▂▄▅▉███▇▃▁▁▉▎ ▏▋▍▏ + ▎▌▋▊▊▋▋▍▏▉█████▇▇███▃▁▅▌ ▏▌▍ + ▏▄▉▁▁▅▆▉▊▊▆██████████████▉▋▋ + ▏▋▋▁█▇▆▅▉▆████████████████▊ + ▆███████▉▃███████▊ + ▆██████▊ ▁██████▋ + ▇█████▂ ▊██████ + ▍█████▇▏ ▅█████▊ + ▎▌▁▄███████▅ ▏▂██████▋ + ▊██████▇▂▉▊▊▎ ▉▆█████▇▃▎ + ▎▌▌▌▌▌▎ ▄█████▇▁▏ + ▍▋▋▋▎▏ + + {reset}{randomcolor()}Keep guttermouths clean with something basic{reset} +""" + +logo = choice([logo0, logo1, logo2, logo3]) +