2023-02-22
ยง
|
11:45 |
<jynus@cumin1001> |
dbctl commit (dc=all): 'Depool db1166, seen mw errors', diff saved to https://phabricator.wikimedia.org/P44726 and previous config saved to /var/cache/conftool/dbconfig/20230222-114515-jynus.json |
[production] |
11:26 |
<moritzm> |
installing git security updates |
[production] |
11:24 |
<cgoubert@cumin1001> |
END (PASS) - Cookbook sre.switchdc.mediawiki.09-run-puppet-on-db-masters (exit_code=0) |
[production] |
11:18 |
<cgoubert@cumin1001> |
START - Cookbook sre.switchdc.mediawiki.09-run-puppet-on-db-masters |
[production] |
11:17 |
<cgoubert@cumin1001> |
END (PASS) - Cookbook sre.switchdc.mediawiki.09-restore-ttl (exit_code=0) |
[production] |
11:16 |
<cgoubert@cumin1001> |
START - Cookbook sre.switchdc.mediawiki.09-restore-ttl |
[production] |
11:16 |
<cgoubert@cumin1001> |
END (PASS) - Cookbook sre.switchdc.mediawiki.08-start-maintenance (exit_code=0) |
[production] |
11:15 |
<cgoubert@cumin1001> |
START - Cookbook sre.switchdc.mediawiki.08-start-maintenance |
[production] |
11:14 |
<cgoubert@cumin1001> |
END (PASS) - Cookbook sre.switchdc.mediawiki.08-restart-envoy-on-jobrunners (exit_code=0) |
[production] |
11:14 |
<cgoubert@cumin1001> |
START - Cookbook sre.switchdc.mediawiki.08-restart-envoy-on-jobrunners |
[production] |
11:13 |
<cgoubert@cumin1001> |
END (PASS) - Cookbook sre.switchdc.mediawiki.07-set-readwrite (exit_code=0) |
[production] |
11:13 |
<cgoubert@cumin1001> |
[DRY-RUN] MediaWiki read-only period ends at: 2023-02-22 11:13:51.466468 |
[production] |
11:13 |
<cgoubert@cumin1001> |
START - Cookbook sre.switchdc.mediawiki.07-set-readwrite |
[production] |
11:13 |
<cgoubert@cumin1001> |
END (PASS) - Cookbook sre.switchdc.mediawiki.06-set-db-readwrite (exit_code=0) |
[production] |
11:13 |
<cgoubert@cumin1001> |
START - Cookbook sre.switchdc.mediawiki.06-set-db-readwrite |
[production] |
11:13 |
<eoghan@cumin2002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1:00:00 on gitlab2002.wikimedia.org with reason: Running failover to gitlab1003 - T329930 |
[production] |
11:13 |
<cgoubert@cumin1001> |
END (PASS) - Cookbook sre.switchdc.mediawiki.04-switch-mediawiki (exit_code=0) |
[production] |
11:13 |
<cgoubert@cumin1001> |
START - Cookbook sre.switchdc.mediawiki.04-switch-mediawiki |
[production] |
11:13 |
<eoghan@cumin2002> |
START - Cookbook sre.hosts.downtime for 1:00:00 on gitlab2002.wikimedia.org with reason: Running failover to gitlab1003 - T329930 |
[production] |
11:04 |
<cgoubert@cumin1001> |
END (FAIL) - Cookbook sre.switchdc.mediawiki.03-set-db-readonly (exit_code=99) |
[production] |
11:03 |
<cgoubert@cumin1001> |
START - Cookbook sre.switchdc.mediawiki.03-set-db-readonly |
[production] |
11:03 |
<cgoubert@cumin1001> |
END (PASS) - Cookbook sre.switchdc.mediawiki.02-set-readonly (exit_code=0) |
[production] |
11:03 |
<cgoubert@cumin1001> |
[DRY-RUN] MediaWiki read-only period starts at: 2023-02-22 11:03:19.149671 |
[production] |
11:03 |
<cgoubert@cumin1001> |
START - Cookbook sre.switchdc.mediawiki.02-set-readonly |
[production] |
11:02 |
<cgoubert@cumin1001> |
END (PASS) - Cookbook sre.switchdc.mediawiki.01-stop-maintenance (exit_code=0) |
[production] |
11:02 |
<cgoubert@cumin1001> |
START - Cookbook sre.switchdc.mediawiki.01-stop-maintenance |
[production] |
11:01 |
<cgoubert@cumin1001> |
END (PASS) - Cookbook sre.switchdc.mediawiki.00-downtime-db-readonly-checks (exit_code=0) |
[production] |
11:01 |
<cgoubert@cumin1001> |
START - Cookbook sre.switchdc.mediawiki.00-downtime-db-readonly-checks |
[production] |
11:01 |
<cgoubert@cumin1001> |
END (PASS) - Cookbook sre.switchdc.mediawiki.00-disable-puppet (exit_code=0) |
[production] |
11:01 |
<cgoubert@cumin1001> |
START - Cookbook sre.switchdc.mediawiki.00-disable-puppet |
[production] |
10:47 |
<elukey@cumin1001> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host kubernetes2019.codfw.wmnet with OS bullseye |
[production] |
10:45 |
<elukey@cumin1001> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host kubernetes2021.codfw.wmnet with OS bullseye |
[production] |
10:40 |
<elukey@cumin1001> |
END (FAIL) - Cookbook sre.hosts.reimage (exit_code=99) for host kubernetes2020.codfw.wmnet with OS bullseye |
[production] |
10:39 |
<elukey@cumin1001> |
END (FAIL) - Cookbook sre.hosts.reimage (exit_code=99) for host kubernetes2018.codfw.wmnet with OS bullseye |
[production] |
10:35 |
<cgoubert@cumin1001> |
END (PASS) - Cookbook sre.switchdc.mediawiki.00-reduce-ttl (exit_code=0) |
[production] |
10:35 |
<cgoubert@cumin1001> |
START - Cookbook sre.switchdc.mediawiki.00-reduce-ttl |
[production] |
10:28 |
<elukey@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on kubernetes2019.codfw.wmnet with reason: host reimage |
[production] |
10:26 |
<elukey@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on kubernetes2021.codfw.wmnet with reason: host reimage |
[production] |
10:24 |
<elukey@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on kubernetes2020.codfw.wmnet with reason: host reimage |
[production] |
10:22 |
<nfraison@cumin1001> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host an-presto1005.eqiad.wmnet with OS bullseye |
[production] |
10:21 |
<elukey@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on kubernetes2018.codfw.wmnet with reason: host reimage |
[production] |
10:21 |
<elukey@cumin1001> |
END (FAIL) - Cookbook sre.hosts.reimage (exit_code=99) for host kubernetes2017.codfw.wmnet with OS bullseye |
[production] |
10:20 |
<elukey@cumin1001> |
START - Cookbook sre.hosts.downtime for 2:00:00 on kubernetes2021.codfw.wmnet with reason: host reimage |
[production] |
10:19 |
<elukey@cumin1001> |
START - Cookbook sre.hosts.downtime for 2:00:00 on kubernetes2020.codfw.wmnet with reason: host reimage |
[production] |
10:18 |
<elukey@cumin1001> |
START - Cookbook sre.hosts.downtime for 2:00:00 on kubernetes2019.codfw.wmnet with reason: host reimage |
[production] |
10:18 |
<elukey@cumin1001> |
START - Cookbook sre.hosts.downtime for 2:00:00 on kubernetes2018.codfw.wmnet with reason: host reimage |
[production] |
10:13 |
<cgoubert@cumin1001> |
END (PASS) - Cookbook sre.switchdc.mediawiki.00-reduce-ttl (exit_code=0) |
[production] |
10:08 |
<claime> |
Starting sre.switchdc.mediawiki live test preparation steps |
[production] |
10:07 |
<cgoubert@cumin1001> |
START - Cookbook sre.switchdc.mediawiki.00-reduce-ttl |
[production] |
10:05 |
<elukey@cumin1001> |
START - Cookbook sre.hosts.reimage for host kubernetes2021.codfw.wmnet with OS bullseye |
[production] |