aboutsummaryrefslogtreecommitdiff
path: root/bdep-util/git-pre-commit-copyright-check.in
blob: 496f4406032b6946a434c193ebfd7a4390eac86a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#! /usr/bin/env bash

# file      : bdep-util/git-pre-commit-copyright-check.in
# license   : MIT; see accompanying LICENSE file

# Check copyright notices in the COPYRIGHT and LICENSE files and issue a
# warning if they don't include the current year.
#
# Specifically, look first for COPYRIGHT and then LICENSE in the root
# directory and all subdirectories in a project, excluding git submodule
# subdirectories as well as the COPYRIGHT/LICENSE files that are symlinks
# pointing into git submodule subdirectories.
#
# Then in each file look for the first line matching the:
#
# ^Copyright (\([cC]\))? ...
#
# Regex, where "..."  matches various year lists/ranges (e.g., "2010", "2010,
# 2011", "2010-2011", and their combinations; see the pattern below for
# details). Specifically, we don't consider Copyright notices that:
#
#   - don't start at the very beginning of a line (indented, etc)
#   - contain something other than years prior to the last year (names, etc)
#   - wrap over multiple lines (long year list, etc)
#
# Note also that the current year is obtained in the UTC timezone.
#
trap 'exit 1' ERR
set -o errtrace     # Trap in functions and subshells.
set -o pipefail     # Fail if any pipeline command fails.
shopt -s lastpipe   # Execute last pipeline command in the current shell.
shopt -s nullglob   # Expand no-match globs to nothing rather than themselves.

function info () { echo "$*" 1>&2; }
function error () { info "$*"; exit 1; }

repo="$(pwd)"

# Return the list of project files with the specified name. Skip files in the
# submodule subdirectories and symlinks which refer to such files.
#
function project_files () # <name>
{
  local name="$1"

  local f
  while read f; do

    # Resolve the target file path.
    #
    # Specifically, if this is a symlink, then try to resolve it into the
    # target, recursively, and skip it if it is dangling. Otherwise, this is a
    # regular file and it is a target.
    #
    local t
    if [[ -L "$f" ]]; then

      # Skip the dangling symlink trying to resolve it recursively.
      #
      # Note that readlink silently fails if any of the path components is a
      # dangling symlink, except for the last component.
      #
      # Also note that while readlink is not POSIX, it is present on both
      # Linux and FreeBSD (which we only care about, currently).
      #
      if ! t="$(readlink -f "$f")"; then
        continue
      fi

      # Check that the target exists and is a regular file.
      #
      if [[ ! -f "$t" ]]; then
        continue
      fi
    else
      t="$f"
    fi

    # Check that the resolved target parent directory belongs to the project
    # repository and skip it if that's not the case.
    #
    local r
    r="$(git -C "$(dirname "$t")" rev-parse --show-toplevel)"

    if [[ "$r" != "$repo" ]]; then
      continue
    fi

    echo "$f"
  done < <(find . \( -type f -o -type l \) -name "$name" -print)
}

# Recursively collect the COPYRIGHT and LICENSE files, skipping the LICENSE
# files in directories that contain the COPYRIGHT file.
#
files=()

fs=($(project_files COPYRIGHT))

for f in "${fs[@]}"; do
  files+=("$f")
done

fs=($(project_files LICENSE))

for f in "${fs[@]}"; do
  d="$(dirname "$f")"

  if [[ ! -f "$d/COPYRIGHT" ]]; then
    files+=("$f")
  fi
done

# Grep for the Copyright notice in the collected files and issue the warning
# if it is present and is outdated.
#
# @@ We should probably skip the COPYRIGHT/LICENSE files whose parent
#    directories don't (recursively) contain staged files (think of projects
#    with multiple packages, bundled third-party code, etc). Note that we can
#    obtain the staged file list with the `git diff --name-only --cached`
#    command.
#
current_year="$(date -u +'%Y')"

for f in "${files[@]}"; do
  year="$(sed -n -re 's%^Copyright( +\([cC]\))?[ 0-9,-]*[ ,-]([0-9]{4}).*$%\2%p' "$f" | sed -n -e '1p')"

  # Print the 'WARNING:' prefix in bold red, so it doesn't go unnoticed.
  #
  if [[ -n "$year" && "$year" != "$current_year" ]]; then
    bw="\033[1m"    # Bold.
    nw="\033[0m"    # No weight.
    rc="\033[0;31m" # Red.
    nc="\033[0m"    # No color.
    echo -e "${rc}${bw}WARNING:${nw}${nc} last copyright year in '${f#./}' is $year" 1>&2
  fi
done