2022-10-03
ยง
|
12:02 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'Depool db2123', diff saved to https://phabricator.wikimedia.org/P35303 and previous config saved to /var/cache/conftool/dbconfig/20221003-120208-root.json |
[production] |
12:01 |
<marostegui@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 8:00:00 on db2123.codfw.wmnet with reason: Cloning |
[production] |
12:01 |
<marostegui@cumin1001> |
START - Cookbook sre.hosts.downtime for 8:00:00 on db2123.codfw.wmnet with reason: Cloning |
[production] |
12:00 |
<marostegui@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on db1116.eqiad.wmnet with reason: Reboot |
[production] |
12:00 |
<marostegui@cumin1001> |
START - Cookbook sre.hosts.downtime for 2:00:00 on db1116.eqiad.wmnet with reason: Reboot |
[production] |
11:54 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'db2157 (re)pooling @ 1%: After upgrade', diff saved to https://phabricator.wikimedia.org/P35302 and previous config saved to /var/cache/conftool/dbconfig/20221003-115449-root.json |
[production] |
11:54 |
<marostegui@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on db1117.eqiad.wmnet with reason: Reboot |
[production] |
11:54 |
<marostegui@cumin1001> |
START - Cookbook sre.hosts.downtime for 2:00:00 on db1117.eqiad.wmnet with reason: Reboot |
[production] |
11:28 |
<hnowlan@puppetmaster1001> |
conftool action : set/pooled=true; selector: dnsdisc=sessionstore,name=eqiad |
[production] |
11:28 |
<hnowlan@deploy1002> |
helmfile [eqiad] DONE helmfile.d/services/sessionstore: sync |
[production] |
11:27 |
<hnowlan@cumin1001> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host sessionstore1003.eqiad.wmnet with OS buster |
[production] |
11:27 |
<hnowlan@deploy1002> |
helmfile [eqiad] START helmfile.d/services/sessionstore: sync |
[production] |
11:20 |
<hnowlan@puppetmaster1001> |
conftool action : set/pooled=false; selector: dnsdisc=sessionstore,name=eqiad |
[production] |
11:08 |
<hnowlan@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on sessionstore1003.eqiad.wmnet with reason: host reimage |
[production] |
11:04 |
<hnowlan@cumin1001> |
START - Cookbook sre.hosts.downtime for 2:00:00 on sessionstore1003.eqiad.wmnet with reason: host reimage |
[production] |
10:52 |
<hnowlan@cumin1001> |
START - Cookbook sre.hosts.reimage for host sessionstore1003.eqiad.wmnet with OS buster |
[production] |
10:49 |
<hnowlan@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1:00:00 on sessionstore1003.eqiad.wmnet with reason: Prep for reimage |
[production] |
10:48 |
<hnowlan@cumin1001> |
START - Cookbook sre.hosts.downtime for 1:00:00 on sessionstore1003.eqiad.wmnet with reason: Prep for reimage |
[production] |
10:41 |
<hnowlan@puppetmaster1001> |
conftool action : set/pooled=true; selector: dnsdisc=sessionstore,name=eqiad |
[production] |
10:41 |
<hnowlan@cumin1001> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host sessionstore1002.eqiad.wmnet with OS buster |
[production] |
10:40 |
<hnowlan@deploy1002> |
helmfile [eqiad] DONE helmfile.d/services/sessionstore: sync |
[production] |
10:40 |
<hnowlan@deploy1002> |
helmfile [eqiad] START helmfile.d/services/sessionstore: sync |
[production] |
10:39 |
<hnowlan> |
starting cassandra on reimaged sessionstore1002 |
[production] |
10:37 |
<_joe_> |
remove stale druid.svc.eqiad.wmnet certificate from the puppetmaster CA; it was expired anyways |
[production] |
10:32 |
<hnowlan@puppetmaster1001> |
conftool action : set/pooled=false; selector: dnsdisc=sessionstore,name=eqiad |
[production] |
10:31 |
<jelto@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 0:20:00 on gitlab1004.wikimedia.org with reason: upgrade gitlab1004 to new version |
[production] |
10:31 |
<jelto@cumin1001> |
START - Cookbook sre.hosts.downtime for 0:20:00 on gitlab1004.wikimedia.org with reason: upgrade gitlab1004 to new version |
[production] |
10:19 |
<hnowlan@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on sessionstore1002.eqiad.wmnet with reason: host reimage |
[production] |
10:16 |
<hnowlan@cumin1001> |
START - Cookbook sre.hosts.downtime for 2:00:00 on sessionstore1002.eqiad.wmnet with reason: host reimage |
[production] |
10:05 |
<hnowlan@cumin1001> |
START - Cookbook sre.hosts.reimage for host sessionstore1002.eqiad.wmnet with OS buster |
[production] |
10:00 |
<hnowlan> |
c-foreach-nt drain on sessionstore1002 |
[production] |
10:00 |
<hnowlan@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1:00:00 on sessionstore1002.eqiad.wmnet with reason: Prep for reimage |
[production] |
10:00 |
<hnowlan@cumin1001> |
START - Cookbook sre.hosts.downtime for 1:00:00 on sessionstore1002.eqiad.wmnet with reason: Prep for reimage |
[production] |
09:25 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'db1200 (re)pooling @ 100%: After upgrade', diff saved to https://phabricator.wikimedia.org/P35300 and previous config saved to /var/cache/conftool/dbconfig/20221003-092519-root.json |
[production] |
09:22 |
<ayounsi@cumin1001> |
END (PASS) - Cookbook sre.network.peering (exit_code=0) with action 'email' for AS: 31133 |
[production] |
09:21 |
<ayounsi@cumin1001> |
START - Cookbook sre.network.peering with action 'email' for AS: 31133 |
[production] |
09:11 |
<ayounsi@cumin1001> |
END (PASS) - Cookbook sre.network.peering (exit_code=0) with action 'email' for AS: 62044 |
[production] |
09:11 |
<ayounsi@cumin1001> |
START - Cookbook sre.network.peering with action 'email' for AS: 62044 |
[production] |
09:10 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'db1200 (re)pooling @ 75%: After upgrade', diff saved to https://phabricator.wikimedia.org/P35299 and previous config saved to /var/cache/conftool/dbconfig/20221003-091014-root.json |
[production] |
08:59 |
<marostegui@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 8:00:00 on db[2157,2178].codfw.wmnet with reason: Reclone |
[production] |
08:59 |
<marostegui@cumin1001> |
START - Cookbook sre.hosts.downtime for 8:00:00 on db[2157,2178].codfw.wmnet with reason: Reclone |
[production] |
08:58 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'Depool db2157', diff saved to https://phabricator.wikimedia.org/P35297 and previous config saved to /var/cache/conftool/dbconfig/20221003-085840-root.json |
[production] |
08:55 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'db1200 (re)pooling @ 50%: After upgrade', diff saved to https://phabricator.wikimedia.org/P35296 and previous config saved to /var/cache/conftool/dbconfig/20221003-085509-root.json |
[production] |
08:54 |
<ayounsi@cumin1001> |
END (PASS) - Cookbook sre.network.peering (exit_code=0) with action 'configure' for AS: 12975 |
[production] |
08:53 |
<ayounsi@cumin1001> |
START - Cookbook sre.network.peering with action 'configure' for AS: 12975 |
[production] |
08:50 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'db2175 (re)pooling @ 100%: After upgrade', diff saved to https://phabricator.wikimedia.org/P35295 and previous config saved to /var/cache/conftool/dbconfig/20221003-085007-root.json |
[production] |
08:40 |
<vgutierrez@cumin1001> |
END (FAIL) - Cookbook sre.hosts.decommission (exit_code=1) for hosts cp5001.eqsin.wmnet |
[production] |
08:40 |
<vgutierrez@cumin1001> |
END (PASS) - Cookbook sre.dns.netbox (exit_code=0) |
[production] |
08:40 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'db1200 (re)pooling @ 25%: After upgrade', diff saved to https://phabricator.wikimedia.org/P35294 and previous config saved to /var/cache/conftool/dbconfig/20221003-084004-root.json |
[production] |
08:39 |
<ayounsi@cumin1001> |
END (FAIL) - Cookbook sre.network.peering (exit_code=99) with action 'email' for AS: 3303 |
[production] |