|
2026-05-19
ยง
|
| 12:08 |
<mvernon@cumin2002> |
START - Cookbook sre.hosts.reboot-single for host apus-be2004.codfw.wmnet |
[production] |
| 12:07 |
<brouberol@deploy1003> |
helmfile [dse-k8s-eqiad] START helmfile.d/admin 'apply'. |
[production] |
| 12:07 |
<marostegui@cumin1003> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on db1288.eqiad.wmnet with reason: host reimage |
[production] |
| 12:07 |
<mvernon@cumin2002> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host apus-be2005.codfw.wmnet |
[production] |
| 12:06 |
<jmm@cumin2002> |
START - Cookbook sre.hosts.reboot-single for host ganeti7004.magru.wmnet |
[production] |
| 12:06 |
<jmm@cumin2002> |
START - Cookbook sre.hosts.reboot-single for host ganeti5007.eqsin.wmnet |
[production] |
| 12:06 |
<marostegui@cumin1003> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host db1284.eqiad.wmnet with OS trixie |
[production] |
| 12:06 |
<tappof@cumin1003> |
START - Cookbook sre.hosts.reboot-single for host prometheus6002.drmrs.wmnet |
[production] |
| 12:06 |
<tappof@cumin1003> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host prometheus2007.codfw.wmnet |
[production] |
| 12:04 |
<marostegui@cumin1003> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host db1277.eqiad.wmnet with OS trixie |
[production] |
| 12:03 |
<marostegui@cumin1003> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on db1281.eqiad.wmnet with reason: host reimage |
[production] |
| 12:01 |
<mvernon@cumin2002> |
START - Cookbook sre.hosts.reboot-single for host apus-be2005.codfw.wmnet |
[production] |
| 12:00 |
<mvernon@cumin2002> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host apus-be2006.codfw.wmnet |
[production] |
| 12:00 |
<marostegui@cumin1003> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on db1283.eqiad.wmnet with reason: host reimage |
[production] |
| 11:58 |
<jmm@cumin2002> |
START - Cookbook sre.ganeti.drain-node for draining ganeti node ganeti7004.magru.wmnet |
[production] |
| 11:58 |
<jmm@cumin2002> |
START - Cookbook sre.ganeti.drain-node for draining ganeti node ganeti5007.eqsin.wmnet |
[production] |
| 11:57 |
<taavi@cloudcumin1001> |
END (PASS) - Cookbook wmcs.openstack.cloudweb.safe_reboot (exit_code=0) on hosts matched by 'P{O:wmcs::openstack::eqiad1::cloudweb}' |
[admin] |
| 11:57 |
<taavi@cumin1003> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host cloudidp2001-dev.codfw.wmnet |
[production] |
| 11:56 |
<marostegui@cumin1003> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on db1282.eqiad.wmnet with reason: host reimage |
[production] |
| 11:56 |
<cgoubert@cumin1003> |
END (PASS) - Cookbook sre.kafka.roll-restart-reboot-brokers (exit_code=0) rolling reboot on A:kafka-main-eqiad |
[production] |
| 11:56 |
<mvernon@cumin2002> |
START - Cookbook sre.hosts.reboot-single for host apus-be2006.codfw.wmnet |
[production] |
| 11:56 |
<tappof@cumin1003> |
START - Cookbook sre.hosts.reboot-single for host prometheus2007.codfw.wmnet |
[production] |
| 11:56 |
<tappof@cumin1003> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host prometheus2005.codfw.wmnet |
[production] |
| 11:53 |
<taavi@cumin1003> |
START - Cookbook sre.hosts.reboot-single for host cloudidp2001-dev.codfw.wmnet |
[production] |
| 11:52 |
<marostegui@cumin1003> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on db1284.eqiad.wmnet with reason: host reimage |
[production] |
| 11:52 |
<taavi@cloudcumin1001> |
START - Cookbook wmcs.openstack.cloudweb.safe_reboot on hosts matched by 'P{O:wmcs::openstack::eqiad1::cloudweb}' |
[admin] |
| 11:51 |
<taavi@cloudcumin1001> |
END (PASS) - Cookbook wmcs.openstack.cloudweb.safe_reboot (exit_code=0) on hosts matched by 'P{O:wmcs::openstack::codfw1dev::cloudweb}' |
[admin] |
| 11:50 |
<jynus@cumin1003> |
DONE (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 6:00:00 on 18 hosts with reason: restart |
[production] |
| 11:49 |
<marostegui@cumin1003> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on db1277.eqiad.wmnet with reason: host reimage |
[production] |
| 11:49 |
<marostegui@cumin1003> |
START - Cookbook sre.hosts.downtime for 2:00:00 on db1287.eqiad.wmnet with reason: host reimage |
[production] |
| 11:49 |
<marostegui@cumin1003> |
START - Cookbook sre.hosts.downtime for 2:00:00 on db1288.eqiad.wmnet with reason: host reimage |
[production] |
| 11:48 |
<taavi@cloudcumin1001> |
START - Cookbook wmcs.openstack.cloudweb.safe_reboot on hosts matched by 'P{O:wmcs::openstack::codfw1dev::cloudweb}' |
[admin] |
| 11:48 |
<marostegui@cumin1003> |
START - Cookbook sre.hosts.downtime for 2:00:00 on db1286.eqiad.wmnet with reason: host reimage |
[production] |
| 11:48 |
<marostegui@cumin1003> |
START - Cookbook sre.hosts.downtime for 2:00:00 on db1284.eqiad.wmnet with reason: host reimage |
[production] |
| 11:47 |
<marostegui@cumin1003> |
START - Cookbook sre.hosts.downtime for 2:00:00 on db1283.eqiad.wmnet with reason: host reimage |
[production] |
| 11:47 |
<marostegui@cumin1003> |
START - Cookbook sre.hosts.downtime for 2:00:00 on db1282.eqiad.wmnet with reason: host reimage |
[production] |
| 11:46 |
<tappof@cumin1003> |
START - Cookbook sre.hosts.reboot-single for host prometheus2005.codfw.wmnet |
[production] |
| 11:46 |
<tappof@cumin1003> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host prometheus5003.eqsin.wmnet |
[production] |
| 11:45 |
<marostegui@cumin1003> |
START - Cookbook sre.hosts.downtime for 2:00:00 on db1281.eqiad.wmnet with reason: host reimage |
[production] |
| 11:45 |
<marostegui@cumin1003> |
START - Cookbook sre.hosts.downtime for 2:00:00 on db1280.eqiad.wmnet with reason: host reimage |
[production] |
| 11:44 |
<marostegui@cumin1003> |
START - Cookbook sre.hosts.downtime for 2:00:00 on db1279.eqiad.wmnet with reason: host reimage |
[production] |
| 11:44 |
<marostegui@cumin1003> |
START - Cookbook sre.hosts.downtime for 2:00:00 on db1278.eqiad.wmnet with reason: host reimage |
[production] |
| 11:44 |
<marostegui@cumin1003> |
START - Cookbook sre.hosts.downtime for 2:00:00 on db1277.eqiad.wmnet with reason: host reimage |
[production] |
| 11:42 |
<mvernon@cumin2002> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host moss-be2003.codfw.wmnet |
[production] |
| 11:39 |
<tappof@cumin1003> |
START - Cookbook sre.hosts.reboot-single for host prometheus5003.eqsin.wmnet |
[production] |
| 11:39 |
<tappof@cumin1003> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host prometheus1008.eqiad.wmnet |
[production] |
| 11:39 |
<jynus@cumin1003> |
DONE (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 6:00:00 on ms-backup[1003-1004].eqiad.wmnet with reason: restart |
[production] |
| 11:37 |
<moritzm> |
failover Ganeti cluster in eqsin to ganeti5004 |
[production] |
| 11:37 |
<moritzm> |
failover Ganeti cluster in magru to ganeti7001 |
[production] |
| 11:36 |
<marostegui@cumin1003> |
START - Cookbook sre.hosts.reimage for host db1288.eqiad.wmnet with OS trixie |
[production] |