2022-07-21
ยง
|
08:54 |
<klausman@deploy1002> |
helmfile [ml-serve-eqiad] DONE helmfile.d/admin 'sync'. |
[production] |
08:54 |
<klausman@deploy1002> |
helmfile [ml-serve-eqiad] START helmfile.d/admin 'sync'. |
[production] |
08:54 |
<klausman@deploy1002> |
helmfile [ml-serve-codfw] DONE helmfile.d/admin 'sync'. |
[production] |
08:54 |
<klausman@deploy1002> |
helmfile [ml-serve-codfw] START helmfile.d/admin 'sync'. |
[production] |
08:54 |
<klausman@deploy1002> |
helmfile [ml-staging-codfw] DONE helmfile.d/admin 'sync'. |
[production] |
08:54 |
<klausman@deploy1002> |
helmfile [ml-staging-codfw] START helmfile.d/admin 'sync'. |
[production] |
08:53 |
<klausman@deploy1002> |
helmfile [staging-codfw] START helmfile.d/admin 'sync'. |
[production] |
08:49 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'Depooling db1175 (T312990)', diff saved to https://phabricator.wikimedia.org/P31597 and previous config saved to /var/cache/conftool/dbconfig/20220721-084935-marostegui.json |
[production] |
08:49 |
<marostegui@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 6:00:00 on db1175.eqiad.wmnet with reason: Maintenance |
[production] |
08:49 |
<marostegui@cumin1001> |
START - Cookbook sre.hosts.downtime for 6:00:00 on db1175.eqiad.wmnet with reason: Maintenance |
[production] |
08:41 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host ganeti2026.codfw.wmnet |
[production] |
08:33 |
<jmm@cumin2002> |
START - Cookbook sre.hosts.reboot-single for host ganeti2026.codfw.wmnet |
[production] |
08:31 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'Add db2169 to s6 and s7 T311493', diff saved to https://phabricator.wikimedia.org/P31595 and previous config saved to /var/cache/conftool/dbconfig/20220721-083147-marostegui.json |
[production] |
08:24 |
<marostegui@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 12:00:00 on 6 hosts with reason: Maintenance |
[production] |
08:24 |
<marostegui@cumin1001> |
START - Cookbook sre.hosts.downtime for 12:00:00 on 6 hosts with reason: Maintenance |
[production] |
08:24 |
<marostegui@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 6:00:00 on db2105.codfw.wmnet with reason: Maintenance |
[production] |
08:23 |
<marostegui@cumin1001> |
START - Cookbook sre.hosts.downtime for 6:00:00 on db2105.codfw.wmnet with reason: Maintenance |
[production] |
08:18 |
<moritzm> |
installing containerd security updates in Kubernetes eqiad workers |
[production] |
08:15 |
<marostegui@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 6:00:00 on dbstore1007.eqiad.wmnet with reason: Maintenance |
[production] |
08:14 |
<marostegui@cumin1001> |
START - Cookbook sre.hosts.downtime for 6:00:00 on dbstore1007.eqiad.wmnet with reason: Maintenance |
[production] |
08:14 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'Repooling after maintenance db1179 (T312990)', diff saved to https://phabricator.wikimedia.org/P31594 and previous config saved to /var/cache/conftool/dbconfig/20220721-081449-marostegui.json |
[production] |
07:59 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'Repooling after maintenance db1179', diff saved to https://phabricator.wikimedia.org/P31593 and previous config saved to /var/cache/conftool/dbconfig/20220721-075944-marostegui.json |
[production] |
07:57 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'db1181 (re)pooling @ 100%: After restart', diff saved to https://phabricator.wikimedia.org/P31592 and previous config saved to /var/cache/conftool/dbconfig/20220721-075757-root.json |
[production] |
07:57 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'db1120 (re)pooling @ 100%: After maintenance', diff saved to https://phabricator.wikimedia.org/P31591 and previous config saved to /var/cache/conftool/dbconfig/20220721-075745-root.json |
[production] |
07:46 |
<ladsgroup@deploy1002> |
Synchronized portals: Wikimedia Portals Update: [[gerrit:815895|Adding Wikiquote to the new portals (T273179)]] (duration: 03m 10s) |
[production] |
07:44 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'Repooling after maintenance db1179', diff saved to https://phabricator.wikimedia.org/P31590 and previous config saved to /var/cache/conftool/dbconfig/20220721-074439-marostegui.json |
[production] |
07:43 |
<ladsgroup@deploy1002> |
Synchronized portals/wikipedia.org/assets: Wikimedia Portals Update: [[gerrit:815895|Adding Wikiquote to the new portals (T273179)]] (duration: 03m 08s) |
[production] |
07:42 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'db1181 (re)pooling @ 75%: After restart', diff saved to https://phabricator.wikimedia.org/P31589 and previous config saved to /var/cache/conftool/dbconfig/20220721-074253-root.json |
[production] |
07:42 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'db1120 (re)pooling @ 75%: After maintenance', diff saved to https://phabricator.wikimedia.org/P31588 and previous config saved to /var/cache/conftool/dbconfig/20220721-074242-root.json |
[production] |
07:39 |
<mwdebug-deploy@deploy1002> |
helmfile [codfw] DONE helmfile.d/services/mwdebug: apply |
[production] |
07:38 |
<mwdebug-deploy@deploy1002> |
helmfile [codfw] START helmfile.d/services/mwdebug: apply |
[production] |
07:38 |
<mwdebug-deploy@deploy1002> |
helmfile [eqiad] DONE helmfile.d/services/mwdebug: apply |
[production] |
07:37 |
<mwdebug-deploy@deploy1002> |
helmfile [eqiad] START helmfile.d/services/mwdebug: apply |
[production] |
07:35 |
<ladsgroup@cumin1001> |
dbctl commit (dc=all): 'Depooling db1168 (T312863)', diff saved to https://phabricator.wikimedia.org/P31587 and previous config saved to /var/cache/conftool/dbconfig/20220721-073502-ladsgroup.json |
[production] |
07:34 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db1168.eqiad.wmnet with reason: Maintenance |
[production] |
07:34 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db1168.eqiad.wmnet with reason: Maintenance |
[production] |
07:32 |
<ladsgroup@cumin1001> |
dbctl commit (dc=all): 'Depooling db1179 (T312863)', diff saved to https://phabricator.wikimedia.org/P31586 and previous config saved to /var/cache/conftool/dbconfig/20220721-073251-ladsgroup.json |
[production] |
07:32 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db1179.eqiad.wmnet with reason: Maintenance |
[production] |
07:32 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db1179.eqiad.wmnet with reason: Maintenance |
[production] |
07:32 |
<ladsgroup@cumin1001> |
dbctl commit (dc=all): 'Depooling db1110 (T312863)', diff saved to https://phabricator.wikimedia.org/P31585 and previous config saved to /var/cache/conftool/dbconfig/20220721-073217-ladsgroup.json |
[production] |
07:32 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db1110.eqiad.wmnet with reason: Maintenance |
[production] |
07:32 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host ganeti2026.codfw.wmnet with OS bullseye |
[production] |
07:31 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db1110.eqiad.wmnet with reason: Maintenance |
[production] |
07:31 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2 days, 0:00:00 on 8 hosts with reason: Maintenance |
[production] |
07:31 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 2 days, 0:00:00 on 8 hosts with reason: Maintenance |
[production] |
07:31 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db2123.codfw.wmnet with reason: Maintenance |
[production] |
07:31 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db2123.codfw.wmnet with reason: Maintenance |
[production] |
07:31 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on dbstore1003.eqiad.wmnet with reason: Maintenance |
[production] |
07:31 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on dbstore1003.eqiad.wmnet with reason: Maintenance |
[production] |
07:30 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host ganeti2009.codfw.wmnet |
[production] |