2023-01-23
§
|
07:25 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'db1206 (re)pooling @ 5%: After changing s1 sanitarium master', diff saved to https://phabricator.wikimedia.org/P43214 and previous config saved to /var/cache/conftool/dbconfig/20230123-072530-root.json |
[production] |
07:25 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'db1106 (re)pooling @ 5%: After changing s1 sanitarium master', diff saved to https://phabricator.wikimedia.org/P43213 and previous config saved to /var/cache/conftool/dbconfig/20230123-072520-root.json |
[production] |
07:23 |
<ladsgroup@cumin1001> |
dbctl commit (dc=all): 'db2107 (re)pooling @ 10%: Maint done', diff saved to https://phabricator.wikimedia.org/P43212 and previous config saved to /var/cache/conftool/dbconfig/20230123-072309-ladsgroup.json |
[production] |
07:13 |
<marostegui@cumin1001> |
dbctl commit (dc=all): 'Depool db1106 db1206 T326669', diff saved to https://phabricator.wikimedia.org/P43211 and previous config saved to /var/cache/conftool/dbconfig/20230123-071323-marostegui.json |
[production] |
07:09 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 8:00:00 on db2107.codfw.wmnet with reason: Maintenance |
[production] |
07:09 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 8:00:00 on db2107.codfw.wmnet with reason: Maintenance |
[production] |
07:08 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 6:00:00 on db2107.codfw.wmnet with reason: Maintenance |
[production] |
07:08 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 6:00:00 on db2107.codfw.wmnet with reason: Maintenance |
[production] |
06:58 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db2113.codfw.wmnet with reason: Maintenance |
[production] |
06:58 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db2113.codfw.wmnet with reason: Maintenance |
[production] |
06:23 |
<kart_> |
Updated cxserver to 2023-01-20-051603-production (T323840, T326236) |
[production] |
06:19 |
<kartik@deploy1002> |
helmfile [eqiad] DONE helmfile.d/services/cxserver: apply |
[production] |
06:18 |
<kartik@deploy1002> |
helmfile [eqiad] START helmfile.d/services/cxserver: apply |
[production] |
06:18 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db2107.codfw.wmnet with reason: Maintenance |
[production] |
06:18 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db2107.codfw.wmnet with reason: Maintenance |
[production] |
06:17 |
<kartik@deploy1002> |
helmfile [codfw] DONE helmfile.d/services/cxserver: apply |
[production] |
06:17 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 6:00:00 on db2107.codfw.wmnet with reason: Maintenance |
[production] |
06:17 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 6:00:00 on db2107.codfw.wmnet with reason: Maintenance |
[production] |
06:16 |
<kartik@deploy1002> |
helmfile [codfw] START helmfile.d/services/cxserver: apply |
[production] |
06:12 |
<kartik@deploy1002> |
helmfile [staging] DONE helmfile.d/services/cxserver: apply |
[production] |
06:12 |
<kartik@deploy1002> |
helmfile [staging] START helmfile.d/services/cxserver: apply |
[production] |
05:07 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 12:00:00 on db2113.codfw.wmnet with reason: Maintenance |
[production] |
05:07 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 12:00:00 on db2113.codfw.wmnet with reason: Maintenance |
[production] |
05:01 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db2113.codfw.wmnet with reason: Maintenance |
[production] |
05:01 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db2113.codfw.wmnet with reason: Maintenance |
[production] |
04:59 |
<ladsgroup@cumin1001> |
dbctl commit (dc=all): 'Depool db2113 T327611', diff saved to https://phabricator.wikimedia.org/P43210 and previous config saved to /var/cache/conftool/dbconfig/20230123-045939-ladsgroup.json |
[production] |
04:57 |
<ladsgroup@cumin1001> |
dbctl commit (dc=all): 'Promote db2123 to s5 primary T327611', diff saved to https://phabricator.wikimedia.org/P43209 and previous config saved to /var/cache/conftool/dbconfig/20230123-045740-ladsgroup.json |
[production] |
04:57 |
<Amir1> |
Starting s5 codfw failover from db2113 to db2123 - T327611 |
[production] |
04:53 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db2107.codfw.wmnet with reason: Maintenance |
[production] |
04:53 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db2107.codfw.wmnet with reason: Maintenance |
[production] |
04:33 |
<ladsgroup@cumin1001> |
dbctl commit (dc=all): 'Set db2123 with weight 0 T327611', diff saved to https://phabricator.wikimedia.org/P43208 and previous config saved to /var/cache/conftool/dbconfig/20230123-043324-ladsgroup.json |
[production] |
04:33 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1:00:00 on 25 hosts with reason: Primary switchover s5 T327611 |
[production] |
04:32 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 1:00:00 on 25 hosts with reason: Primary switchover s5 T327611 |
[production] |
04:02 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 12:00:00 on db2107.codfw.wmnet with reason: Maintenance |
[production] |
04:02 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 12:00:00 on db2107.codfw.wmnet with reason: Maintenance |
[production] |
03:56 |
<ladsgroup@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db2107.codfw.wmnet with reason: Maintenance |
[production] |
03:56 |
<ladsgroup@cumin1001> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db2107.codfw.wmnet with reason: Maintenance |
[production] |
03:54 |
<ladsgroup@cumin1001> |
dbctl commit (dc=all): 'Depool db2107 T327609', diff saved to https://phabricator.wikimedia.org/P43207 and previous config saved to /var/cache/conftool/dbconfig/20230123-035458-ladsgroup.json |
[production] |
03:52 |
<Amir1> |
Starting s2 codfw failover from db2107 to db2104 - T327609 |
[production] |
2023-01-20
§
|
18:22 |
<jynus> |
deploying new grants for backups on m1 T327155 |
[production] |
16:15 |
<isaranto@deploy1002> |
helmfile [ml-staging-codfw] Ran 'sync' command on namespace 'revscoring-editquality-reverted' for release 'main' . |
[production] |
16:15 |
<isaranto@deploy1002> |
helmfile [ml-staging-codfw] Ran 'sync' command on namespace 'revscoring-editquality-goodfaith' for release 'main' . |
[production] |
16:15 |
<isaranto@deploy1002> |
helmfile [ml-staging-codfw] Ran 'sync' command on namespace 'revscoring-editquality-damaging' for release 'main' . |
[production] |
16:14 |
<isaranto@deploy1002> |
helmfile [ml-staging-codfw] Ran 'sync' command on namespace 'revscoring-drafttopic' for release 'main' . |
[production] |
16:14 |
<isaranto@deploy1002> |
helmfile [ml-staging-codfw] Ran 'sync' command on namespace 'revscoring-draftquality' for release 'main' . |
[production] |
16:14 |
<isaranto@deploy1002> |
helmfile [ml-staging-codfw] Ran 'sync' command on namespace 'revscoring-articletopic' for release 'main' . |
[production] |
16:14 |
<isaranto@deploy1002> |
helmfile [ml-staging-codfw] Ran 'sync' command on namespace 'revscoring-articlequality' for release 'main' . |
[production] |
14:28 |
<elukey@deploy1002> |
helmfile [ml-serve-codfw] DONE helmfile.d/admin 'sync'. |
[production] |
14:27 |
<elukey@deploy1002> |
helmfile [ml-serve-codfw] START helmfile.d/admin 'sync'. |
[production] |
14:24 |
<elukey@deploy1002> |
helmfile [ml-serve-eqiad] DONE helmfile.d/admin 'sync'. |
[production] |