Code Appendix Generator
Generating a Code Appendix for Academic Papers
The Problem
When writing my A-level Computer Science project, I had written thousands of lines of code, all of which needed to be included in a code appendix. Rather than manually copying and pasting each file, I decided to write a small and somewhat inefficient Python script to automate the process.
import os
directory = '.'
with open('output.txt', 'a') as output_file:
for root, dirs, files in os.walk(directory):
for file in files:
try:
with open(os.path.join(root, file), 'r', encoding='utf-8') as f: output_file.write(f"File: {file}\nDirectory: {root}\n" + f.read() + '\n')
except Exception as e: print(f"Error decoding file: {os.path.join(root, file)}: {e}")
The simple script above was able to iterate through all files in the current directory and its subdirectories, and append the contents of each file to a single output file. This was a great start, but I wanted to improve the script to make it more efficient and user-friendly. My first goal was to implement a “blacklist” file (.apdxignore
), which would exclude any files matching the regex patterns in the blacklist.
Expanding the Script
After I had written the script to the point where I was happy with it, I uploaded it to GitHub and shared it with my classmates. I received a lot of positive feedback, and I was encouraged to continue developing the script. I then added more features, as requested by classmates, and below is where the script stands today.
import logging
import os
import argparse
import re
HEADER = '-' * 25 + 'Contents' + '-' * 25
FOOTER = '-' * 24 + '/Contents' + "-"*25
APDX = '.apdx'
CHUNK_SIZE = 1048576 # 1MB
def load_apdxignore():
if os.path.exists(APDX):
with open(APDX, 'r') as f:
return f.read().splitlines()
return []
def check_file(file_path, ignore_files, blacklist):
if blacklist:
return not any(re.search(ignore, file_path) for ignore in ignore_files)
return any(re.search(ignore, file_path) for ignore in ignore_files)
def log_error(file_path, e):
logging.error(f"Error decoding file: {file_path}: {e}")
def write_to_file(output_file, file, root, file_path):
output_file.write(f"File: {file}\nDirectory: {root}\n{HEADER}\n")
with open(file_path, 'r', encoding='utf-8') as f:
if os.path.getsize(file_path) > CHUNK_SIZE:
while True:
chunk = f.read(1024)
if not chunk:
break
output_file.write(chunk)
else:
output_file.write(f.read())
output_file.write(f"\n{FOOTER}\n\n")
def process_files(directory, ignore_files, output_file, blacklist):
for root, dirs, files in os.walk(directory):
for file in files:
try:
file_path = os.path.join(root, file)
if check_file(file_path, ignore_files, blacklist):
write_to_file(output_file, file, root, file_path)
except (IOError, PermissionError) as e:
log_error(file_path, e)
except UnicodeDecodeError as e:
logging.error(f"Error decoding file: {file_path}: {e}")
except UnicodeEncodeError as e:
logging.error(f"Error encoding file: {file_path}: {e}")
def main(blacklist=True, output_file_path="output.txt", directory=".", overwrite=True):
logging.basicConfig(level=logging.INFO)
logging.info(f"Parameters: apdx_operating_mode={'Blacklist' if blacklist else 'Whitelist'}, output_file_path={output_file_path}, directory={directory}, overwrite={overwrite}")
if overwrite and os.path.exists(output_file_path):
os.remove(output_file_path)
ignore_files = load_apdxignore() if os.path.exists(APDX) else []
with open(output_file_path, 'a') as output_file:
process_files(directory, ignore_files, output_file, blacklist)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Generate an appendix.')
parser.add_argument('--blacklist', dest='apdx_blacklist', action='store_true', help='Use .apdx as a blacklist')
parser.add_argument('--whitelist', dest='apdx_whitelist', action='store_true', help='Use .apdx as a whitelist')
parser.add_argument('--output', dest='output_file_path', default='output.txt', help='Output file path')
parser.add_argument('--dir', dest='directory', default='.', help='Directory to process')
parser.add_argument('--no-overwrite', dest='overwrite', action='store_false', help='Do not overwrite existing output file')
args = parser.parse_args()
blacklist = args.apdx_blacklist
whitelist = args.apdx_whitelist
args.apdx_blacklist = blacklist or not whitelist
main(args.apdx_blacklist, args.output_file_path, args.directory, args.overwrite)
All of the code above is available on GitHub, and I encourage you to check it out and contribute if you have any ideas for improvements. I hope this script can be useful to others who are writing academic papers and need to include a code appendix.