2024-06-05
ยง
|
10:52 |
<jmm@cumin2002> |
START - Cookbook sre.ldap.roll-restart-reboot-replica rolling reboot on A:ldap-replicas-codfw |
[production] |
10:50 |
<mvernon@cumin2002> |
START - Cookbook sre.hosts.reboot-single for host ms-be2059.codfw.wmnet |
[production] |
10:50 |
<mvernon@cumin2002> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host ms-be2058.codfw.wmnet |
[production] |
10:47 |
<ladsgroup@cumin1002> |
dbctl commit (dc=all): 'db1184 (re)pooling @ 75%: Maint over', diff saved to https://phabricator.wikimedia.org/P64096 and previous config saved to /var/cache/conftool/dbconfig/20240605-104757-ladsgroup.json |
[production] |
10:46 |
<mvernon@cumin1002> |
START - Cookbook sre.hosts.reboot-single for host ms-be1060.eqiad.wmnet |
[production] |
10:46 |
<mvernon@cumin1002> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host ms-be1059.eqiad.wmnet |
[production] |
10:42 |
<mvernon@cumin2002> |
START - Cookbook sre.hosts.reboot-single for host ms-be2058.codfw.wmnet |
[production] |
10:40 |
<mvernon@cumin2002> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host ms-be2057.codfw.wmnet |
[production] |
10:39 |
<klausman@cumin2002> |
END (PASS) - Cookbook sre.ganeti.reboot-vm (exit_code=0) for VM ml-etcd1003.eqiad.wmnet |
[production] |
10:38 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'db1227 (re)pooling @ 75%: Repooling', diff saved to https://phabricator.wikimedia.org/P64094 and previous config saved to /var/cache/conftool/dbconfig/20240605-103854-root.json |
[production] |
10:37 |
<klausman@cumin2002> |
START - Cookbook sre.ganeti.reboot-vm for VM ml-etcd1003.eqiad.wmnet |
[production] |
10:37 |
<hnowlan@cumin1002> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host wikikube-worker1012.eqiad.wmnet with OS bullseye |
[production] |
10:35 |
<klausman@cumin2002> |
END (PASS) - Cookbook sre.k8s.reboot-nodes (exit_code=0) rolling reboot on A:ml-serve-worker-codfw |
[production] |
10:34 |
<hnowlan@cumin1002> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host wikikube-worker1010.eqiad.wmnet with OS bullseye |
[production] |
10:32 |
<ladsgroup@cumin1002> |
dbctl commit (dc=all): 'db1184 (re)pooling @ 25%: Maint over', diff saved to https://phabricator.wikimedia.org/P64093 and previous config saved to /var/cache/conftool/dbconfig/20240605-103251-ladsgroup.json |
[production] |
10:32 |
<mvernon@cumin1002> |
START - Cookbook sre.hosts.reboot-single for host ms-be1059.eqiad.wmnet |
[production] |
10:32 |
<mvernon@cumin2002> |
START - Cookbook sre.hosts.reboot-single for host ms-be2057.codfw.wmnet |
[production] |
10:31 |
<mvernon@cumin2002> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host ms-be2056.codfw.wmnet |
[production] |
10:30 |
<hnowlan@cumin1002> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host wikikube-worker1008.eqiad.wmnet with OS bullseye |
[production] |
10:30 |
<mvernon@cumin1002> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host ms-be1058.eqiad.wmnet |
[production] |
10:27 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.netbox.restart-reboot (exit_code=0) rolling reboot on A:netbox |
[production] |
10:23 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'db1227 (re)pooling @ 50%: Repooling', diff saved to https://phabricator.wikimedia.org/P64091 and previous config saved to /var/cache/conftool/dbconfig/20240605-102348-root.json |
[production] |
10:22 |
<ladsgroup@cumin1002> |
dbctl commit (dc=all): 'Depooling db2207 (T352010)', diff saved to https://phabricator.wikimedia.org/P64090 and previous config saved to /var/cache/conftool/dbconfig/20240605-102252-ladsgroup.json |
[production] |
10:22 |
<ladsgroup@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db2207.codfw.wmnet with reason: Maintenance |
[production] |
10:22 |
<ladsgroup@cumin1002> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db2207.codfw.wmnet with reason: Maintenance |
[production] |
10:22 |
<mvernon@cumin1002> |
START - Cookbook sre.hosts.reboot-single for host ms-be1058.eqiad.wmnet |
[production] |
10:22 |
<mvernon@cumin2002> |
START - Cookbook sre.hosts.reboot-single for host ms-be2056.codfw.wmnet |
[production] |
10:21 |
<mvernon@cumin2002> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host ms-be2055.codfw.wmnet |
[production] |
10:21 |
<mvernon@cumin1002> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host ms-be1057.eqiad.wmnet |
[production] |
10:18 |
<hnowlan@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on wikikube-worker1012.eqiad.wmnet with reason: host reimage |
[production] |
10:17 |
<ladsgroup@cumin1002> |
dbctl commit (dc=all): 'db1184 (re)pooling @ 10%: Maint over', diff saved to https://phabricator.wikimedia.org/P64088 and previous config saved to /var/cache/conftool/dbconfig/20240605-101744-ladsgroup.json |
[production] |
10:16 |
<marostegui@cumin1002> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host db1152.eqiad.wmnet with OS bookworm |
[production] |
10:15 |
<ladsgroup@cumin1002> |
dbctl commit (dc=all): 'Depooling db2203 (T352010)', diff saved to https://phabricator.wikimedia.org/P64087 and previous config saved to /var/cache/conftool/dbconfig/20240605-101521-ladsgroup.json |
[production] |
10:15 |
<ladsgroup@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db2203.codfw.wmnet with reason: Maintenance |
[production] |
10:15 |
<hnowlan@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on wikikube-worker1010.eqiad.wmnet with reason: host reimage |
[production] |
10:15 |
<ladsgroup@cumin1002> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db2203.codfw.wmnet with reason: Maintenance |
[production] |
10:13 |
<dcaro@cumin1002> |
END (ERROR) - Cookbook sre.hosts.reboot-single (exit_code=97) for host cloudcephosd1031.eqiad.wmnet |
[production] |
10:13 |
<hnowlan@cumin1002> |
START - Cookbook sre.hosts.downtime for 2:00:00 on wikikube-worker1012.eqiad.wmnet with reason: host reimage |
[production] |
10:11 |
<hnowlan@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on wikikube-worker1008.eqiad.wmnet with reason: host reimage |
[production] |
10:10 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Promote db1152 back to x2 eqiad master T366677', diff saved to https://phabricator.wikimedia.org/P64086 and previous config saved to /var/cache/conftool/dbconfig/20240605-101019-root.json |
[production] |
10:09 |
<hnowlan@cumin1002> |
START - Cookbook sre.hosts.downtime for 2:00:00 on wikikube-worker1010.eqiad.wmnet with reason: host reimage |
[production] |
10:09 |
<hnowlan@cumin1002> |
START - Cookbook sre.hosts.downtime for 2:00:00 on wikikube-worker1008.eqiad.wmnet with reason: host reimage |
[production] |
10:08 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'db1227 (re)pooling @ 25%: Repooling', diff saved to https://phabricator.wikimedia.org/P64085 and previous config saved to /var/cache/conftool/dbconfig/20240605-100842-root.json |
[production] |
10:08 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'db1186 (re)pooling @ 100%: Repooling', diff saved to https://phabricator.wikimedia.org/P64084 and previous config saved to /var/cache/conftool/dbconfig/20240605-100810-root.json |
[production] |
10:01 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'db2207 (re)pooling @ 100%: Repooling', diff saved to https://phabricator.wikimedia.org/P64083 and previous config saved to /var/cache/conftool/dbconfig/20240605-100117-root.json |
[production] |
10:00 |
<fabfur> |
disabling puppet on cp4037 to test Benthos performances (T358109) |
[production] |
10:00 |
<hnowlan@cumin1002> |
START - Cookbook sre.hosts.reimage for host wikikube-worker1012.eqiad.wmnet with OS bullseye |
[production] |
10:00 |
<mvernon@cumin1002> |
START - Cookbook sre.hosts.reboot-single for host ms-be1057.eqiad.wmnet |
[production] |
10:00 |
<hnowlan@cumin1002> |
START - Cookbook sre.hosts.reimage for host wikikube-worker1011.eqiad.wmnet with OS bullseye |
[production] |
10:00 |
<mvernon@cumin2002> |
START - Cookbook sre.hosts.reboot-single for host ms-be2055.codfw.wmnet |
[production] |