|
2023-03-08
§
|
| 16:02 |
<bking@cumin2002> |
START - Cookbook sre.hosts.provision for host elastic1061.mgmt.eqiad.wmnet with reboot policy GRACEFUL |
[production] |
| 16:00 |
<bking@cumin2002> |
START - Cookbook sre.hosts.reboot-single for host elastic1062.eqiad.wmnet |
[production] |
| 16:00 |
<bking@cumin2002> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host elastic1061.eqiad.wmnet |
[production] |
| 15:59 |
<bking@cumin2002> |
END (FAIL) - Cookbook sre.hardware.upgrade-firmware (exit_code=1) upgrade firmware for hosts ['elastic1062.eqiad.wmnet'] |
[production] |
| 15:55 |
<bking@cumin2002> |
START - Cookbook sre.hardware.upgrade-firmware upgrade firmware for hosts ['elastic1063.eqiad.wmnet'] |
[production] |
| 15:54 |
<bking@cumin2002> |
END (FAIL) - Cookbook sre.hardware.upgrade-firmware (exit_code=99) upgrade firmware for hosts ['elastic1063.eqiad.wmnet'] |
[production] |
| 15:54 |
<bking@cumin2002> |
START - Cookbook sre.hosts.reboot-single for host elastic1061.eqiad.wmnet |
[production] |
| 15:52 |
<bking@cumin2002> |
START - Cookbook sre.hardware.upgrade-firmware upgrade firmware for hosts ['elastic1062.eqiad.wmnet'] |
[production] |
| 15:52 |
<bking@cumin2002> |
END (FAIL) - Cookbook sre.hardware.upgrade-firmware (exit_code=99) upgrade firmware for hosts ['elastic1062.eqiad.wmnet'] |
[production] |
| 15:46 |
<bking@cumin2002> |
START - Cookbook sre.hardware.upgrade-firmware upgrade firmware for hosts ['elastic1063.eqiad.wmnet'] |
[production] |
| 15:42 |
<bking@cumin2002> |
START - Cookbook sre.hardware.upgrade-firmware upgrade firmware for hosts ['elastic1062.eqiad.wmnet'] |
[production] |
| 15:33 |
<bking@cumin2002> |
END (FAIL) - Cookbook sre.hardware.upgrade-firmware (exit_code=99) upgrade firmware for hosts ['elastic1061.eqiad.wmnet'] |
[production] |
| 15:31 |
<bking@cumin2002> |
END (FAIL) - Cookbook sre.hardware.upgrade-firmware (exit_code=1) upgrade firmware for hosts ['elastic1060.eqiad.wmnet'] |
[production] |
| 15:26 |
<bking@cumin2002> |
START - Cookbook sre.hardware.upgrade-firmware upgrade firmware for hosts ['elastic1061.eqiad.wmnet'] |
[production] |
| 15:23 |
<bking@cumin2002> |
START - Cookbook sre.hardware.upgrade-firmware upgrade firmware for hosts ['elastic1060.eqiad.wmnet'] |
[production] |
| 14:25 |
<inflatador> |
bking@cumin2002 powering down elastic1060-66 for re-rack T322082 |
[production] |
|
2023-03-07
§
|
| 21:58 |
<inflatador> |
bking@cumin2002 depool elastic row D hosts to prepare for T322082 |
[production] |
| 21:57 |
<bking@cumin2002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on 7 hosts with reason: re-rack |
[production] |
| 21:56 |
<bking@cumin2002> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on 7 hosts with reason: re-rack |
[production] |
| 21:41 |
<inflatador> |
bking@cumin2002 ban elastic row D hosts to prepare for T322082 |
[production] |
| 21:07 |
<bking@deploy2002> |
Finished deploy [airflow-dags/search@d533716]: initial deployment to search platform airflow 2 instance-bk (duration: 00m 41s) |
[production] |
| 21:07 |
<bking@deploy2002> |
Started deploy [airflow-dags/search@d533716]: initial deployment to search platform airflow 2 instance-bk |
[production] |
| 20:17 |
<bking@deploy2002> |
Finished deploy [airflow-dags/search@9924c93]: initial deployment to search platform airflow 2 instance-bk (duration: 01m 18s) |
[production] |
| 20:16 |
<bking@deploy2002> |
Started deploy [airflow-dags/search@9924c93]: initial deployment to search platform airflow 2 instance-bk |
[production] |
| 17:51 |
<inflatador> |
bking@cumin2002 repool wdqs hosts post-maintenance T329073 |
[production] |
| 16:58 |
<bking@cumin2002> |
conftool action : set/pooled=true; selector: dnsdisc=wdqs,name=eqiad |
[production] |
| 14:56 |
<inflatador> |
bking@cumin2002 unban production row A elastic nodes from all clusters T329073 |
[production] |
| 14:52 |
<inflatador> |
bking@cumin2002 unban row A cloudelastic nodes T329073 |
[production] |
|
2023-03-06
§
|
| 23:20 |
<bking@cumin2002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on wcqs1001.eqiad.wmnet,wdqs[1003-1004,1006,1011].eqiad.wmnet with reason: switch maintenance |
[production] |
| 23:20 |
<bking@cumin2002> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on wcqs1001.eqiad.wmnet,wdqs[1003-1004,1006,1011].eqiad.wmnet with reason: switch maintenance |
[production] |
| 23:19 |
<bking@cumin2002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on 12 hosts with reason: switch maintenance |
[production] |
| 23:19 |
<bking@cumin2002> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on 12 hosts with reason: switch maintenance |
[production] |
| 23:16 |
<inflatador> |
bking@cumin2002 ban row A cloudelastic hosts T329073 |
[production] |
| 23:04 |
<inflatador> |
bking@cumin2002 'depool wcqs and wdqs row A hosts T329073' |
[production] |
|
2023-03-03
§
|
| 20:58 |
<inflatador> |
bking@cumin2002 persistently unban all elastic nodes in eqiad T322082 |
[production] |
| 20:55 |
<bking@cumin2002> |
END (PASS) - Cookbook sre.puppet.sync-netbox-hiera (exit_code=0) generate netbox hiera data: "Update location of elastic1059 - bking@cumin2002 - T322082" |
[production] |
| 20:52 |
<bking@cumin2002> |
START - Cookbook sre.puppet.sync-netbox-hiera generate netbox hiera data: "Update location of elastic1059 - bking@cumin2002 - T322082" |
[production] |
| 20:41 |
<bking@cumin2002> |
END (PASS) - Cookbook sre.hosts.provision (exit_code=0) for host elastic1059.mgmt.eqiad.wmnet with reboot policy GRACEFUL |
[production] |
| 20:33 |
<bking@cumin2002> |
START - Cookbook sre.hosts.provision for host elastic1059.mgmt.eqiad.wmnet with reboot policy GRACEFUL |
[production] |
| 20:25 |
<bking@cumin2002> |
END (PASS) - Cookbook sre.puppet.sync-netbox-hiera (exit_code=0) generate netbox hiera data: "Update location of elastic1058 - bking@cumin2002 - T322082" |
[production] |
| 20:23 |
<bking@cumin2002> |
START - Cookbook sre.puppet.sync-netbox-hiera generate netbox hiera data: "Update location of elastic1058 - bking@cumin2002 - T322082" |
[production] |
| 20:13 |
<bking@cumin2002> |
END (PASS) - Cookbook sre.hosts.provision (exit_code=0) for host elastic1058.mgmt.eqiad.wmnet with reboot policy GRACEFUL |
[production] |
| 20:05 |
<bking@cumin2002> |
START - Cookbook sre.hosts.provision for host elastic1058.mgmt.eqiad.wmnet with reboot policy GRACEFUL |
[production] |
| 19:51 |
<bking@cumin2002> |
END (PASS) - Cookbook sre.puppet.sync-netbox-hiera (exit_code=0) generate netbox hiera data: "Update location of elastic hosts - bking@cumin2002 - T322082" |
[production] |
| 19:49 |
<bking@cumin2002> |
START - Cookbook sre.puppet.sync-netbox-hiera generate netbox hiera data: "Update location of elastic hosts - bking@cumin2002 - T322082" |
[production] |
| 19:48 |
<bking@cumin2002> |
END (PASS) - Cookbook sre.hosts.provision (exit_code=0) for host elastic1057.mgmt.eqiad.wmnet with reboot policy GRACEFUL |
[production] |
| 19:40 |
<bking@cumin2002> |
START - Cookbook sre.hosts.provision for host elastic1057.mgmt.eqiad.wmnet with reboot policy GRACEFUL |
[production] |
| 19:39 |
<bking@cumin2002> |
END (FAIL) - Cookbook sre.puppet.sync-netbox-hiera (exit_code=99) generate netbox hiera data: "Update location of elastic1055 - bking@cumin2002 - T322082" |
[production] |
| 19:36 |
<bking@cumin2002> |
START - Cookbook sre.puppet.sync-netbox-hiera generate netbox hiera data: "Update location of elastic1055 - bking@cumin2002 - T322082" |
[production] |
| 19:36 |
<bking@cumin2002> |
END (ERROR) - Cookbook sre.puppet.sync-netbox-hiera (exit_code=97) generate netbox hiera data: "Update location of elastic1055 - bking@cumin2002 - T322082" |
[production] |