2024-03-14
ยง
|
09:09 |
<marostegui> |
enable eqiad -> codfw replication on s5 T358199 |
[production] |
09:09 |
<marostegui@cumin1002> |
START - Cookbook sre.hosts.downtime for 0:30:00 on 27 hosts with reason: Enabling circular replication |
[production] |
09:06 |
<marostegui@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 0:30:00 on 27 hosts with reason: Enabling circular replication |
[production] |
09:06 |
<marostegui@cumin1002> |
START - Cookbook sre.hosts.downtime for 0:30:00 on 27 hosts with reason: Enabling circular replication |
[production] |
09:06 |
<marostegui> |
enable eqiad -> codfw replication on s6 T358199 |
[production] |
09:04 |
<logmsgbot> |
@deploy2002 helmfile [eqiad] DONE helmfile.d/services/cirrus-streaming-updater: apply |
[production] |
09:04 |
<logmsgbot> |
@deploy2002 helmfile [eqiad] START helmfile.d/services/cirrus-streaming-updater: apply |
[production] |
08:55 |
<arnaudb@cumin1002> |
dbctl commit (dc=all): 'db2115 (re)pooling @ 100%: Post reimage', diff saved to https://phabricator.wikimedia.org/P58794 and previous config saved to /var/cache/conftool/dbconfig/20240314-085530-arnaudb.json |
[production] |
08:55 |
<marostegui@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 0:30:00 on 31 hosts with reason: Enabling circular replication |
[production] |
08:54 |
<marostegui@cumin1002> |
START - Cookbook sre.hosts.downtime for 0:30:00 on 31 hosts with reason: Enabling circular replication |
[production] |
08:54 |
<marostegui> |
enable eqiad -> codfw replication on s7 T358199 |
[production] |
08:51 |
<marostegui@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 0:30:00 on 34 hosts with reason: Enabling circular replication |
[production] |
08:51 |
<marostegui@cumin1002> |
START - Cookbook sre.hosts.downtime for 0:30:00 on 34 hosts with reason: Enabling circular replication |
[production] |
08:50 |
<marostegui> |
enable eqiad -> codfw replication on s8 T358199 |
[production] |
08:40 |
<arnaudb@cumin1002> |
dbctl commit (dc=all): 'db2115 (re)pooling @ 75%: Post reimage', diff saved to https://phabricator.wikimedia.org/P58793 and previous config saved to /var/cache/conftool/dbconfig/20240314-084024-arnaudb.json |
[production] |
08:38 |
<marostegui@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 0:30:00 on 16 hosts with reason: Enabling circular replication |
[production] |
08:38 |
<logmsgbot> |
@deploy2002 helmfile [eqiad] DONE helmfile.d/services/cirrus-streaming-updater: apply |
[production] |
08:38 |
<logmsgbot> |
@deploy2002 helmfile [eqiad] START helmfile.d/services/cirrus-streaming-updater: apply |
[production] |
08:37 |
<marostegui@cumin1002> |
START - Cookbook sre.hosts.downtime for 0:30:00 on 16 hosts with reason: Enabling circular replication |
[production] |
08:37 |
<marostegui> |
enable eqiad -> codfw replication on x1 T358199 |
[production] |
08:32 |
<logmsgbot> |
@deploy2002 helmfile [eqiad] DONE helmfile.d/services/cirrus-streaming-updater: apply |
[production] |
08:32 |
<logmsgbot> |
@deploy2002 helmfile [eqiad] START helmfile.d/services/cirrus-streaming-updater: apply |
[production] |
08:05 |
<marostegui@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 0:30:00 on 6 hosts with reason: Enabling circular replication |
[production] |
08:05 |
<marostegui@cumin1002> |
START - Cookbook sre.hosts.downtime for 0:30:00 on 6 hosts with reason: Enabling circular replication |
[production] |
07:59 |
<arnaudb@cumin1002> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host db2115.codfw.wmnet with OS bookworm |
[production] |
07:38 |
<arnaudb@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on db2115.codfw.wmnet with reason: host reimage |
[production] |
07:35 |
<arnaudb@cumin1002> |
START - Cookbook sre.hosts.downtime for 2:00:00 on db2115.codfw.wmnet with reason: host reimage |
[production] |
07:20 |
<arnaudb@cumin1002> |
START - Cookbook sre.hosts.reimage for host db2115.codfw.wmnet with OS bookworm |
[production] |
07:17 |
<arnaudb@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on db2115.codfw.wmnet with reason: Silence for reimaging |
[production] |
07:17 |
<arnaudb@cumin1002> |
START - Cookbook sre.hosts.downtime for 2:00:00 on db2115.codfw.wmnet with reason: Silence for reimaging |
[production] |
07:15 |
<kart_> |
Updated cxserver to 2024-03-14-065833-production (T350773) |
[production] |
07:14 |
<kartik@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/cxserver: apply |
[production] |
07:13 |
<kartik@deploy2002> |
helmfile [eqiad] START helmfile.d/services/cxserver: apply |
[production] |
07:13 |
<kartik@deploy2002> |
helmfile [codfw] DONE helmfile.d/services/cxserver: apply |
[production] |
07:12 |
<kartik@deploy2002> |
helmfile [codfw] START helmfile.d/services/cxserver: apply |
[production] |
07:06 |
<kartik@deploy2002> |
helmfile [staging] DONE helmfile.d/services/cxserver: apply |
[production] |
07:05 |
<kartik@deploy2002> |
helmfile [staging] START helmfile.d/services/cxserver: apply |
[production] |
06:48 |
<kartik@deploy2002> |
helmfile [staging] DONE helmfile.d/services/cxserver: apply |
[production] |
06:48 |
<kartik@deploy2002> |
helmfile [staging] START helmfile.d/services/cxserver: apply |
[production] |
06:45 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Promote db2196 to x1 primary and set section read-write T359919', diff saved to https://phabricator.wikimedia.org/P58789 and previous config saved to /var/cache/conftool/dbconfig/20240314-064513-root.json |
[production] |
06:31 |
<logmsgbot> |
@deploy2002 helmfile [eqiad] DONE helmfile.d/services/cirrus-streaming-updater: apply |
[production] |
06:31 |
<logmsgbot> |
@deploy2002 helmfile [eqiad] START helmfile.d/services/cirrus-streaming-updater: apply |
[production] |
06:23 |
<arnaudb> |
Starting x1 codfw failover from db2115 to db2196 - T359919 |
[production] |
06:06 |
<arnaudb@cumin1002> |
dbctl commit (dc=all): 'Set db2196 with weight 0 T359919', diff saved to https://phabricator.wikimedia.org/P58788 and previous config saved to /var/cache/conftool/dbconfig/20240314-060644-arnaudb.json |
[production] |
06:06 |
<arnaudb@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1:00:00 on 16 hosts with reason: Primary switchover x1 T359919 |
[production] |
06:06 |
<arnaudb@cumin1002> |
START - Cookbook sre.hosts.downtime for 1:00:00 on 16 hosts with reason: Primary switchover x1 T359919 |
[production] |
05:52 |
<kartik@deploy2002> |
helmfile [staging] DONE helmfile.d/services/cxserver: apply |
[production] |
05:52 |
<kartik@deploy2002> |
helmfile [staging] START helmfile.d/services/cxserver: apply |
[production] |
04:23 |
<tstarling@deploy2002> |
Synchronized wmf-config/CommonSettings.php: reverting for now due to slow query T355034 (duration: 12m 28s) |
[production] |
02:04 |
<logmsgbot> |
@deploy2002 helmfile [eqiad] DONE helmfile.d/services/cirrus-streaming-updater: apply |
[production] |