aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bbot/agent/agent.cxx46
1 files changed, 26 insertions, 20 deletions
diff --git a/bbot/agent/agent.cxx b/bbot/agent/agent.cxx
index 5df470f..0b57208 100644
--- a/bbot/agent/agent.cxx
+++ b/bbot/agent/agent.cxx
@@ -1076,7 +1076,7 @@ enumerate_machines (const dir_path& machines)
{
tracer trace ("enumerate_machines", machines.string ().c_str ());
- size_t dir_iter_retry (0); // Directory iteration retry (see below).
+ size_t dir_iter_retries (0); // Directory iteration retry count (see below).
for (;;) // From-scratch retry loop for after bootstrap (see below).
{
@@ -1152,6 +1152,7 @@ enumerate_machines (const dir_path& machines)
//
try
{
+ bool dir_iter_retry (false);
for (const dir_entry& ve:
dir_iterator (machines, dir_iterator::no_follow))
{
@@ -1426,31 +1427,36 @@ enumerate_machines (const dir_path& machines)
}
catch (const system_error& e)
{
- fail << "unable to iterate over " << vd << ": " << e;
+ // Once in a while we get ENOENT while iterating over the machines
+ // volume directory. This directory contains the machine directories
+ // (not btrfs subvolumes) and is not being changed when we get this
+ // error. Maybe this is due to directory sizes/timestamps changes,
+ // but then we would expect to get this error a lot more often..? So
+ // this feels like a btrfs bug which we are going to retry a few
+ // times. See GH issue #349 for additional information.
+ //
+ dir_iter_retry = (dir_iter_retries++ != 3);
+
+ (dir_iter_retry
+ ? warn
+ : error) << "unable to iterate over " << vd << ": " << e;
+
+ if (dir_iter_retry)
+ break;
+ else
+ throw failed ();
}
} // Outer dir_iterator loop.
- }
- catch (const system_error& e)
- {
- // Once in a while we get ENOENT while iterating over the machines
- // directory. This directory contains the machine directories (not
- // subvolumes) and is not being changed when we get this error. Maybe
- // this is due to directory sizes/timestamps changes, but then we would
- // expect to get this error a lot more often..? So this feels like a
- // btrfs bug which we are going to retry a few times. See GH issue #349
- // for additional information.
- //
- bool retry (dir_iter_retry++ != 2);
- (retry ? warn : error) << "unable to iterate over " << machines
- << ": " << e;
- if (retry)
+ if (dir_iter_retry)
continue; // Re-enumerate from scratch.
else
- throw failed ();
+ dir_iter_retries = 0; // Reset for re-enumeration due to other reasons.
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to iterate over " << machines << ": " << e;
}
-
- dir_iter_retry = 0; // Reset for re-enumeration due to other reasons.
// See if there is a pending bootstrap and whether we can perform it.
//