2022-07-05
§
|
07:03 |
<marostegui@cumin1001> |
START - Cookbook sre.hosts.downtime for 1:00:00 on 8 hosts with reason: codfw s3 sanitarium master switch |
[production] |
07:00 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'Decommission db2073 T311837', diff saved to https://phabricator.wikimedia.org/P30822 and previous config saved to /var/cache/conftool/dbconfig/20220705-070019-marostegui.json |
[production] |
06:57 |
<marostegui@cumin1001> |
END (PASS) - Cookbook sre.hosts.decommission (exit_code=0) for hosts db2073.codfw.wmnet |
[production] |
06:55 |
<marostegui@cumin1001> |
END (PASS) - Cookbook sre.dns.netbox (exit_code=0) |
[production] |
06:53 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'db1131 (re)pooling @ 2%: After maintenance', diff saved to https://phabricator.wikimedia.org/P30821 and previous config saved to /var/cache/conftool/dbconfig/20220705-065352-root.json |
[production] |
06:50 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'db1132 (re)pooling @ 2%: After restart', diff saved to https://phabricator.wikimedia.org/P30820 and previous config saved to /var/cache/conftool/dbconfig/20220705-065035-root.json |
[production] |
06:50 |
<marostegui@cumin1001> |
START - Cookbook sre.dns.netbox |
[production] |
06:46 |
<marostegui@cumin1001> |
START - Cookbook sre.hosts.decommission for hosts db2073.codfw.wmnet |
[production] |
06:38 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'db1131 (re)pooling @ 1%: After maintenance', diff saved to https://phabricator.wikimedia.org/P30819 and previous config saved to /var/cache/conftool/dbconfig/20220705-063848-root.json |
[production] |
06:35 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'db1132 (re)pooling @ 1%: After restart', diff saved to https://phabricator.wikimedia.org/P30818 and previous config saved to /var/cache/conftool/dbconfig/20220705-063531-root.json |
[production] |
06:34 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'Depool db1132', diff saved to https://phabricator.wikimedia.org/P30817 and previous config saved to /var/cache/conftool/dbconfig/20220705-063402-root.json |
[production] |
06:09 |
<marostegui> |
dbmaint s6@eqiad T298557 |
[production] |
06:05 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'Depool db1131 T311522', diff saved to https://phabricator.wikimedia.org/P30816 and previous config saved to /var/cache/conftool/dbconfig/20220705-060526-root.json |
[production] |
06:01 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'Promote db1173 to s6 primary and set section read-write T311522', diff saved to https://phabricator.wikimedia.org/P30815 and previous config saved to /var/cache/conftool/dbconfig/20220705-060139-root.json |
[production] |
06:01 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'Set s6 eqiad as read-only for maintenance - T311522', diff saved to https://phabricator.wikimedia.org/P30814 and previous config saved to /var/cache/conftool/dbconfig/20220705-060111-marostegui.json |
[production] |
06:00 |
<marostegui> |
Starting s6 eqiad failover from db1131 to db1173 - T311522 |
[production] |
05:59 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1:00:00 on db1130.eqiad.wmnet with reason: Maintenance |
[production] |
05:58 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 1:00:00 on db1130.eqiad.wmnet with reason: Maintenance |
[production] |
05:58 |
<TimStarling> |
deploying multi-DC support g 801621, manual puppet run on cp1080 |
[production] |
05:22 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'Set db1173 with weight 0 T311522', diff saved to https://phabricator.wikimedia.org/P30813 and previous config saved to /var/cache/conftool/dbconfig/20220705-052219-marostegui.json |
[production] |
05:21 |
<marostegui@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1:00:00 on 23 hosts with reason: Primary switchover s6 T311522 |
[production] |
05:21 |
<marostegui@cumin1001> |
START - Cookbook sre.hosts.downtime for 1:00:00 on 23 hosts with reason: Primary switchover s6 T311522 |
[production] |
02:32 |
<mwdebug-deploy@deploy1002> |
helmfile [codfw] DONE helmfile.d/services/mwdebug: apply |
[production] |
02:31 |
<mwdebug-deploy@deploy1002> |
helmfile [codfw] START helmfile.d/services/mwdebug: apply |
[production] |
02:31 |
<mwdebug-deploy@deploy1002> |
helmfile [eqiad] DONE helmfile.d/services/mwdebug: apply |
[production] |
02:31 |
<mwdebug-deploy@deploy1002> |
helmfile [eqiad] START helmfile.d/services/mwdebug: apply |
[production] |
02:11 |
<mwdebug-deploy@deploy1002> |
helmfile [codfw] DONE helmfile.d/services/mwdebug: apply |
[production] |
02:08 |
<mwdebug-deploy@deploy1002> |
helmfile [codfw] START helmfile.d/services/mwdebug: apply |
[production] |
02:08 |
<mwdebug-deploy@deploy1002> |
helmfile [eqiad] DONE helmfile.d/services/mwdebug: apply |
[production] |
02:07 |
<mwdebug-deploy@deploy1002> |
helmfile [eqiad] START helmfile.d/services/mwdebug: apply |
[production] |
2022-07-04
§
|
20:09 |
<andrew@cumin1001> |
END (FAIL) - Cookbook sre.hosts.reboot-single (exit_code=1) for host cloudcontrol1004.wikimedia.org |
[production] |
19:53 |
<andrew@cumin1001> |
START - Cookbook sre.hosts.reboot-single for host cloudcontrol1004.wikimedia.org |
[production] |
19:40 |
<andrew@cumin1001> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host cloudcontrol2004-dev.wikimedia.org |
[production] |
19:38 |
<andrew@cumin1001> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host cloudcontrol1005.wikimedia.org |
[production] |
19:31 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1:00:00 on dbstore1003.eqiad.wmnet with reason: Maintenance |
[production] |
19:31 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 1:00:00 on dbstore1003.eqiad.wmnet with reason: Maintenance |
[production] |
19:30 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1:00:00 on db1150.eqiad.wmnet with reason: Maintenance |
[production] |
19:30 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 1:00:00 on db1150.eqiad.wmnet with reason: Maintenance |
[production] |
19:30 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on 8 hosts with reason: Maintenance |
[production] |
19:30 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 2:00:00 on 8 hosts with reason: Maintenance |
[production] |
19:30 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1:00:00 on db2123.codfw.wmnet with reason: Maintenance |
[production] |
19:30 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 1:00:00 on db2123.codfw.wmnet with reason: Maintenance |
[production] |
19:29 |
<ladsgroup@cumin1001> |
dbctl commit (dc=all): 'Repooling after maintenance db1096:3315 (T312027)', diff saved to https://phabricator.wikimedia.org/P30811 and previous config saved to /var/cache/conftool/dbconfig/20220704-192955-ladsgroup.json |
[production] |
19:28 |
<andrew@cumin1001> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host cloudcontrol2003-dev.wikimedia.org |
[production] |
19:27 |
<andrew@cumin1001> |
START - Cookbook sre.hosts.reboot-single for host cloudcontrol2004-dev.wikimedia.org |
[production] |
19:26 |
<andrew@cumin1001> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host cloudcontrol1004.wikimedia.org |
[production] |
19:26 |
<andrew@cumin1001> |
START - Cookbook sre.hosts.reboot-single for host cloudcontrol1005.wikimedia.org |
[production] |
19:17 |
<andrew@cumin1001> |
START - Cookbook sre.hosts.reboot-single for host cloudcontrol2003-dev.wikimedia.org |
[production] |
19:15 |
<andrew@cumin1001> |
START - Cookbook sre.hosts.reboot-single for host cloudcontrol1004.wikimedia.org |
[production] |
19:15 |
<andrew@cumin1001> |
END (FAIL) - Cookbook sre.hosts.reboot-single (exit_code=1) for host cloudcontrol2001-dev.wikimedia.org |
[production] |