dotfiles/tools/rtapp-copyright-headers

#!/usr/bin/env python
# coding: utf8
#
## Idempotently add copyright headers to source files.
#
# To check that all files have required headers:
# $ copyright-headers check
#
# To add/update the copyright headers:
# $ copyright-headers update
#
# To ignore files under a certain directory and below:
# $ touch some/directory/.autocopyrightignore
# $ git add some/directory/.autocopyrightignore
# Originally from https://github.com/ghuntley/ghuntley/blob/trunk/third_party/copyright-headers/copyright-headers.
# SPDX-License

import os
import re
import string
import subprocess
import sys

#########################
# File-type configuration
# start: The start marker of the block.
# line: The prefix for the notice text.
# end: The end marker of the block.
# skip: Optional prefix for lines to skip when adding header initially.

filetypes = \
  { 'hs'     : { 'start' : '-' * 80 , 'line' : '--'   , 'end' : ''    , 'empty_line' : '', 'skip' : '#!'  } ,
    'lhs'    : { 'start' : '-' * 80 , 'line' : '--'   , 'end' : ''    , 'empty_line' : '' } ,
    'chs'    : { 'start' : '-' * 80 , 'line' : '--'   , 'end' : ''    , 'empty_line' : '' } ,
    'js'     : { 'start' : ''       , 'line' : '//'   , 'end' : ''    , 'empty_line' : '' } ,
    'ts'     : { 'start' : ''       , 'line' : '//'   , 'end' : ''    , 'empty_line' : '' } ,
    'tsx'    : { 'start' : ''       , 'line' : '//'   , 'end' : ''    , 'empty_line' : '' } ,
    'java'   : { 'start' : ''       , 'line' : '//'   , 'end' : ''    , 'empty_line' : '' } ,
    'scala'  : { 'start' : ''       , 'line' : '//'   , 'end' : ''    , 'empty_line' : '' } ,
    'rst'    : { 'start' : ''       , 'line' : '..'   , 'end' : ''    , 'empty_line' : '' } ,
    'tex'    : { 'start' : '%' * 80 , 'line' : '%'    , 'end' : ''    , 'empty_line' : '' } ,
    'py'     : { 'start' : '#' * 80 , 'line' : '#'    , 'end' : ''    , 'empty_line' : '', 'skip' : '#!' } ,
    'proto'  : { 'start' : '/*'     , 'line' : '//'   , 'end' : ''    , 'empty_line' : '' } ,
    'c'      : { 'start' : '/*'     , 'line' : '//'   , 'end' : ''    , 'empty_line' : '' } ,
    'cpp'    : { 'start' : '/*'     , 'line' : '//'   , 'end' : ''    , 'empty_line' : '' } ,
    'cc'     : { 'start' : '/*'     , 'line' : '//'   , 'end' : ''    , 'empty_line' : '' } ,
    'h'      : { 'start' : '/*'     , 'line' : '//'   , 'end' : ''    , 'empty_line' : '' } ,
    'hpp'    : { 'start' : '/*'     , 'line' : '//'   , 'end' : ''    , 'empty_line' : '' } ,
    'hh'     : { 'start' : '/*'     , 'line' : '//'   , 'end' : ''    , 'empty_line' : '' } ,
    'groovy' : { 'start' : '/*'     , 'line' : '//'   , 'end' : ''    , 'empty_line' : '' } ,
    'sql'    : { 'start' : '/*'     , 'line' : '--'   , 'end' : ''    , 'empty_line' : '' } ,
    'sh'     : { 'start' : '#' * 80 , 'line' : '#'    , 'end' : ''    , 'empty_line' : '', 'skip' : '#!' } ,
    'bats'   : { 'start' : '#' * 80 , 'line' : '#'    , 'end' : ''    , 'empty_line' : '', 'skip' : '#!' } ,
    'html'   : { 'start' : '<!--'   , 'line' : ''     , 'end' : '-->' , 'skip' : '<!' } ,
    'bat'    : { 'start' : '::'     , 'line' : '::'   , 'end' : ''    , 'empty_line' : '' } ,
    'cmd'    : { 'start' : '::'     , 'line' : '::'   , 'end' : ''    , 'empty_line' : '' } ,
    'bazel'  : { 'start' : '#' * 80 , 'line' : '#'    , 'end' : ''    , 'empty_line' : '', 'skip': '#!' } ,
    'bzl'    : { 'start' : '#' * 80 , 'line' : '#'    , 'end' : ''    , 'empty_line' : '', 'skip': '#!' } ,
    'rb'     : { 'start' : '#' * 80 , 'line' : '#'    , 'end' : ''    , 'empty_line' : '' } ,
    'tf'     : { 'start' : '#' * 80 , 'line' : '#'    , 'end' : ''    , 'empty_line' : '' } ,
    'css'    : { 'start' : '/*'     , 'line' : ''     , 'end' : ' */' } ,
    'scss'   : { 'start' : '/*'     , 'line' : ''     , 'end' : ' */' } ,
    'yml'    : { 'start' : '#' * 80 , 'line' : '#'    , 'end' : ''    , 'empty_line' : '' } ,
    'yaml'   : { 'start' : '#' * 80 , 'line' : '#'    , 'end' : ''    , 'empty_line' : '' } ,
    'nix'    : { 'start' : '#' * 80 , 'line' : '#'    , 'end' : ''    , 'empty_line' : '' } ,
    'conf'   : { 'start' : '#' * 80 , 'line' : '#'    , 'end' : ''    , 'empty_line' : '' } ,
    'patch'  : { 'start' : '@' * 80 , 'line' : '@'    , 'end' : ''    , 'empty_line' : '' } ,
    'cs'     : { 'start' : '//' * 80 , 'line' : '//'  , 'end' : ''    , 'empty_line' : '' } ,
  }

#########################

# Switch the current working directory to the root of the repository.
def chdir_to_toplevel():
  toplevel = subprocess                                    \
    .check_output(["git", "rev-parse", "--show-toplevel"], universal_newlines=True) \
    .strip()
  os.chdir(toplevel)

# Find all files from the provided directory (defaults to '.')
# using git ls-files, ignoring all prefixes specified in the root
# .copyright-ignore file.
def list_files(dir):
  if dir is None:
    dir = "."

  all_files = subprocess                       \
    .check_output(["git", "ls-files", dir], universal_newlines=True)    \
    .strip()                                   \
    .split("\n")

  # Find all '.autocopyrightignore' paths
  ignores = [ os.path.dirname(f) + "/" for f in all_files
              if os.path.basename(f) == '.autocopyrightignore' ]

  # Filter out files under directories that contained
  # .autocopyrightignore.
  unignored = [ f for f in all_files
                if not any(map(lambda x: f.startswith(x), ignores))  ]

  # Filter out symlinks
  return [ f for f in unignored if not os.path.islink(f) ]

def process_file(filename, check_only):
  (_, ext) = os.path.splitext(filename)
  ext = ext[1:]
  if ext not in filetypes:
    #print "skipping %s" % filename
    return True

  start_mark = filetypes[ext]['start'] + "\n"
  line_mark = filetypes[ext]['line']
  # filetypes[ext]['start'] + ' ' + line + ' ' + filetypes[ext]['end']
  empty_line_mark = filetypes[ext].get('empty_line', line_mark)
  end_mark = filetypes[ext]['end'] + "\n"

  def comment(line):
    if line == '':
      return empty_line_mark
    else:
      if line_mark == '':
          return filetypes[ext]['start'] + ' ' + line + ' ' + filetypes[ext]['end']
      else:
          return line_mark + ' ' + line

  notice_lines = list(map(comment, LEGAL_NOTICES))
  suffix="\n"
  header = suffix.join(notice_lines) + "\n"

  # Read all lines from the file
  with open(filename, 'r', encoding="ISO-8859-1", ) as fp:
    lines = fp.readlines()
  # Pretend that the file has extra empty line at the end to handle files,
  # which consist only of the copyright notice and nothing else.
  # label: ADD_BLANK_LINE
  lines.append('\n')

  # Process the file:
  # If we find an existing copyright header with the same starting line
  # compare against it. If no copyright header exists or it's different
  # remove the existing one and prepend the new header to the file.
  state='start'
  index=0      # Current index to copyright notice text when comparing
  before_header=[] # Lines in the file before possible copyright header
  after_header=[]  # Lines in the file after the header
  for line in lines:
    # looking for start comment block
    if state == 'start':
      if (line.strip(filetypes[ext]['line']).strip(filetypes[ext]['start'])[0:19] == " Copyright (c) 2020"):
        state='notice'

        if line != notice_lines[0]+"\n":
            print("{0}: notice text mismatch:\n  actual: {1}\n  expected: {2}" \
                  .format(filename, line.strip(), notice_lines[index].strip()))
            state='find_end_and_fail'
        else:
            index += 1

      else:
        before_header.append(line)

    # looking for notice start
    elif state == 'notice':
      if (line != notice_lines[1] + "\n"):
        # not a copyright block, switch back to looking for start
        state='start'
        before_header.append(start_mark)
        before_header.append(line)

      else:
        # match, found the copyright block, start comparing.
        index += 1
        state='compare'

    # comparing notice texts
    elif state == 'compare':
      if index == len(notice_lines):
        if line != end_mark and not line.endswith(end_mark) and line != empty_line_mark + "\n":
          print(f"end marker invalid: {line} vs {end_mark}")
          state='fail'
        else:
          state='ok'

      elif line != notice_lines[index]+"\n":
        print("{0}: notice text mismatch:\n  actual: {1}\n  expected: {2}" \
          .format(filename, line.strip(), notice_lines[index].strip()))
        state='find_end_and_fail'

      else:
        index += 1

    # skip comment lines until end mark and fail
    elif state == 'find_end_and_fail':
      if not line.startswith(line_mark) \
              and not line.startswith(' *') \
              and not line.startswith('    ') \
              or line == end_mark\
              or line.endswith(end_mark)\
              or line == empty_line_mark + "\n":
        state = 'fail'

    elif state == 'fail' or state == 'ok':
      after_header.append(line)

    else:
      sys.exit("abort: unknown state %s" % state)

  if state == 'ok':
    #print "%s: ok." % filename
    return True
  else:
    if not check_only:
      print(f"{filename}: copyright header missing or out-of-date, updating.")
      with open(filename, 'w') as fp:
        if state == 'fail':
          # copyright header existed, but was out of date, replace it.
          fp.write("".join(before_header))
          fp.write(header)
          # remove additional blank line added at ADD_BLANK_LINE
          fp.write("".join(after_header[:-1]))
        elif 'skip' in filetypes[ext]:
          # no copyright header, but file type requires skipping lines
          skip_prefix = filetypes[ext]['skip']
          # skip lines matching the given prefix
          index=0
          while index < len(before_header):
            line = before_header[index]
            if line.startswith(skip_prefix):
              fp.write(line)
              index += 1
            else:
              break
          fp.write(header + "\n")
          # write the rest of the file.
          while index < len(before_header):
            fp.write(before_header[index])
            index += 1
        else:
          # no copyright header, prepend it.
          fp.write(header + "\n")
          fp.write("".join(before_header))
      return True
    else:
      if state == 'fail':
        print(f"{filename}s: copyright header out-of-date")
      else:
        print(f"{filename}: copyright header missing")
      return False

#########################
# Main

def usage():
  print("usage: copyright-headers <check|update> [DIRECTORY]")
  print("  If DIRECTORY is provided, checks only in that directory")
  print("  Otherwise, checks in 'pkgs'.")
  print("  Note that the provided directory must be relative to the")
  print("  root of the git repository.")
  sys.exit(1)

chdir_to_toplevel()
with open('.copyrightheader', 'r') as f:
  LEGAL_NOTICES = list(map(lambda x: x.strip(), f.readlines()))

if len(sys.argv) != 2 and len(sys.argv) != 3:
  usage()

if sys.argv[1] == "update":
  check_only = False
elif sys.argv[1] == "check":
  check_only = True
else:
  usage()

dir = None
if len(sys.argv) > 2:
  dir = sys.argv[2]

failed=False
for file in list_files(dir):
  if not process_file(file, check_only):
    failed=True

if failed and check_only:
  print("\nCopyright header check failed.\nPlease update copyright headers by running 'copyright-headers update'")

sys.exit(failed)