aboutsummaryrefslogtreecommitdiff
path: root/libbuild2/version/snapshot-git.cxx
blob: 30dea6dcf718c7f205e16460c1675d7e18a79e2f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
// file      : libbuild2/version/snapshot-git.cxx -*- C++ -*-
// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
// license   : MIT; see accompanying LICENSE file

#include <ctime> // time_t

#include <libbutl/sha1.mxx>

#include <libbuild2/version/snapshot.hxx>

using namespace std;
using namespace butl;

namespace build2
{
  namespace version
  {
    snapshot
    extract_snapshot_git (const dir_path& src_root)
    {
      snapshot r;
      const char* d (src_root.string ().c_str ());

      // On startup git prepends the PATH environment variable value with the
      // computed directory path where its sub-programs are supposedly located
      // (--exec-path option, GIT_EXEC_PATH environment variable, etc; see
      // cmd_main() in git's git.c for details).
      //
      // Then, when git needs to run itself or one of its components as a
      // child process, it resolves the full executable path searching in
      // directories listed in PATH (see locate_in_PATH() in git's
      // run-command.c for details).
      //
      // On Windows we install git and its components into a place where it is
      // not expected to be, which results in the wrong path in PATH as set by
      // git (for example, c:/build2/libexec/git-core) which in turn may lead
      // to running some other git that appear in the PATH variable. To
      // prevent this we pass the git's exec directory via the --exec-path
      // option explicitly.
      //
      path p ("git");
      process_path pp (run_search (p, true /* init */));

#ifdef _WIN32
      string ep ("--exec-path=" + pp.effect.directory ().string ());
#endif

      size_t args_i (3); // First reserved.
      const char* args[] {
        pp.recall_string (),
#ifdef _WIN32
        (++args_i, ep.c_str ()),
#endif
        "-C",
        d,
        nullptr, nullptr, nullptr, // Reserve.
        nullptr};

      // First check whether the working directory is clean. There doesn't
      // seem to be a way to do everything in a single invocation (the
      // porcelain v2 gives us the commit id but not timestamp).
      //

      // If git status --porcelain returns anything, then the working
      // directory is not clean.
      //
      args[args_i    ] = "status";
      args[args_i + 1] = "--porcelain";
      args[args_i + 2] = nullptr;

      r.committed = run<string> (
        3 /* verbosity */,
        pp,
        args,
        [](string& s, bool) {return move (s);}).empty ();

      // Now extract the commit id and date. One might think that would be
      // easy... Commit id is a SHA1 hash of the commit object. And commit
      // object looks like this:
      //
      // commit <len>\0
      // <data>
      //
      // Where <len> is the size of <data> and <data> is the output of:
      //
      // git cat-file commit HEAD
      //
      // There is also one annoying special case: new repository without any
      // commits. In this case the above command will fail (with diagnostics
      // and non-zero exit code) because there is no HEAD. Of course, it can
      // also fail for other reason (like broken repository) which would be
      // hard to distinguish. Note, however, that we just ran git status and
      // it would have most likely failed if this were the case. So here we
      // (reluctantly) assume that the only reason git cat-file fails is if
      // there is no HEAD (that we equal with the "new repository" condition
      // which is, strictly speaking, might not be the case either). So we
      // suppress any diagnostics, and handle non-zero exit code.
      //
      string data;

      args[args_i    ] = "cat-file";
      args[args_i + 1] = "commit";
      args[args_i + 2] = "HEAD";
      args[args_i + 3] = nullptr;

      process pr (run_start (3     /* verbosity */,
                             pp,
                             args,
                             0     /* stdin  */,
                             -1    /* stdout */,
                             false /* error  */));

      string l;
      try
      {
        ifdstream is (move (pr.in_ofd), ifdstream::badbit);

        while (!eof (getline (is, l)))
        {
          data += l;
          data += '\n'; // We assume there is always a newline.

          if (r.sn == 0 && l.compare (0, 10, "committer ") == 0)
          try
          {
            // The line format is:
            //
            // committer <noise> <timestamp> <timezone>
            //
            // For example:
            //
            // committer John Doe <john@example.org> 1493117819 +0200
            //
            // The timestamp is in seconds since UNIX epoch. The timezone
            // appears to be always numeric (+0000 for UTC). Note that
            // timestamp appears to be already in UTC with timezone being just
            // for information it seems.
            //
            size_t p1 (l.rfind (' ')); // Can't be npos.

            size_t p2 (l.rfind (' ', p1 - 1));
            if (p2 == string::npos)
              throw invalid_argument ("missing timestamp");

            string ts (l, p2 + 1, p1 - p2 - 1);
            time_t t (static_cast<time_t> (stoull (ts)));

#if 0
            string tz (l, p1 + 1);

            if (tz.size () != 5)
              throw invalid_argument ("invalid timezone");

            unsigned long h (stoul (string (tz, 1, 2)));
            unsigned long m (stoul (string (tz, 3, 2)));
            unsigned long s (h * 3600 + m * 60);

            // The timezone indicates where the timestamp was generated so to
            // convert to UTC we need to invert the sign.
            //
            switch (tz[0])
            {
            case '+': t -= s; break;
            case '-': t += s; break;
            default: throw invalid_argument ("invalid timezone sign");
            }
#endif
            // Represent as YYYYMMDDhhmmss.
            //
            r.sn = stoull (to_string (system_clock::from_time_t (t),
                                      "%Y%m%d%H%M%S",
                                      false /* special */,
                                      false /* local (already in UTC) */));
          }
          catch (const invalid_argument& e)
          {
            fail << "unable to extract git commit date from '" << l << "': "
                 << e;
          }
        }

        is.close ();
      }
      catch (const io_error&)
      {
        // Presumably the child process failed. Let run_finish() deal with
        // that.
      }

      if (!run_finish_code (args, pr, l))
      {
        // Presumably new repository without HEAD. Return uncommitted snapshot
        // with UNIX epoch as timestamp.
        //
        r.sn = 19700101000000ULL;
        r.committed = false;
        return r;
      }

      if (r.sn == 0)
        fail << "unable to extract git commit id/date for " << src_root;

      if (r.committed)
      {
        sha1 cs;
        cs.append ("commit " + to_string (data.size ())); // Includes '\0'.
        cs.append (data.c_str (), data.size ());
        r.id.assign (cs.string (), 12); // 12-characters abbreviated commit id.
      }
      else
        r.sn++; // Add a second.

      return r;
    }
  }
}