2024-09-09
ยง
|
10:30 |
<jelto@deploy1003> |
helmfile [staging-eqiad] START helmfile.d/admin 'apply'. |
[production] |
10:19 |
<jelto@deploy1003> |
helmfile [staging-codfw] DONE helmfile.d/admin 'apply'. |
[production] |
10:18 |
<jelto@deploy1003> |
helmfile [staging-codfw] START helmfile.d/admin 'apply'. |
[production] |
09:44 |
<isaranto@deploy1003> |
helmfile [ml-serve-eqiad] Ran 'sync' command on namespace 'article-models' for release 'main' . |
[production] |
09:44 |
<isaranto@deploy1003> |
helmfile [ml-serve-codfw] Ran 'sync' command on namespace 'article-models' for release 'main' . |
[production] |
09:42 |
<isaranto@deploy1003> |
helmfile [ml-staging-codfw] Ran 'sync' command on namespace 'article-models' for release 'main' . |
[production] |
09:38 |
<brouberol@deploy1003> |
helmfile [dse-k8s-eqiad] DONE helmfile.d/admin 'apply'. |
[production] |
09:38 |
<brouberol@deploy1003> |
helmfile [dse-k8s-eqiad] START helmfile.d/admin 'apply'. |
[production] |
09:27 |
<arnaudb@cumin1002> |
START - Cookbook sre.mysql.clone of db2127.codfw.wmnet onto db2227.codfw.wmnet |
[production] |
09:25 |
<moritzm> |
removing libssl1.1 from prometheus hosts which were dist-upgraded from bullseye to bookworm |
[production] |
09:24 |
<arnaudb@cumin1002> |
dbctl commit (dc=all): 'Cloning db2127 in db2227 for T373579', diff saved to https://phabricator.wikimedia.org/P68745 and previous config saved to /var/cache/conftool/dbconfig/20240909-092404-arnaudb.json |
[production] |
09:21 |
<arnaudb@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db2227.codfw.wmnet with reason: provisionning db2227.codfw.wmnet - T373579 |
[production] |
09:21 |
<arnaudb@cumin1002> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db2227.codfw.wmnet with reason: provisionning db2227.codfw.wmnet - T373579 |
[production] |
09:21 |
<arnaudb@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db2127.codfw.wmnet with reason: provisionning db2227.codfw.wmnet - T373579 |
[production] |
09:21 |
<arnaudb@cumin1002> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db2127.codfw.wmnet with reason: provisionning db2227.codfw.wmnet - T373579 |
[production] |
09:18 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.debmonitor.remove-hosts (exit_code=0) for 1 hosts: gerrit1004.wikimedia.org |
[production] |
09:18 |
<jmm@cumin2002> |
START - Cookbook sre.debmonitor.remove-hosts for 1 hosts: gerrit1004.wikimedia.org |
[production] |
09:07 |
<urbanecm> |
[urbanecm@mwmaint1002 ~]$ mwscript namespaceDupes.php --wiki=mnwiki --add-prefix=BROKEN --fix # T366271 |
[production] |
08:57 |
<moritzm> |
restarting postfix on mx-in/mx-out to pick up openssl updates |
[production] |
08:54 |
<elukey@cumin2002> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host dragonfly-supernode2001.codfw.wmnet with OS bookworm |
[production] |
08:51 |
<arnaudb@cumin1002> |
dbctl commit (dc=all): 'API/vslow/dump T374086', diff saved to https://phabricator.wikimedia.org/P68744 and previous config saved to /var/cache/conftool/dbconfig/20240909-085122-arnaudb.json |
[production] |
08:48 |
<arnaudb@cumin1002> |
dbctl commit (dc=all): 'Promote db2213 to s5 primary T374086', diff saved to https://phabricator.wikimedia.org/P68743 and previous config saved to /var/cache/conftool/dbconfig/20240909-084810-arnaudb.json |
[production] |
08:47 |
<arnaudb> |
Starting s5 codfw failover from db2123 to db2213 - T374086 |
[production] |
08:41 |
<jelto@cumin1002> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host lists2001.wikimedia.org |
[production] |
08:40 |
<elukey@cumin2002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on dragonfly-supernode2001.codfw.wmnet with reason: host reimage |
[production] |
08:39 |
<arnaudb@cumin1002> |
dbctl commit (dc=all): 'Remove db2213 from API/vslow/dump T374086', diff saved to https://phabricator.wikimedia.org/P68742 and previous config saved to /var/cache/conftool/dbconfig/20240909-083910-arnaudb.json |
[production] |
08:38 |
<arnaudb@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1:00:00 on 24 hosts with reason: Primary switchover s5 T374086 |
[production] |
08:38 |
<arnaudb@cumin1002> |
START - Cookbook sre.hosts.downtime for 1:00:00 on 24 hosts with reason: Primary switchover s5 T374086 |
[production] |
08:37 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.debmonitor.remove-hosts (exit_code=0) for 1 hosts: kubernetes2054.codfw.wmnet |
[production] |
08:37 |
<jmm@cumin2002> |
START - Cookbook sre.debmonitor.remove-hosts for 1 hosts: kubernetes2054.codfw.wmnet |
[production] |
08:36 |
<elukey@cumin2002> |
START - Cookbook sre.hosts.downtime for 2:00:00 on dragonfly-supernode2001.codfw.wmnet with reason: host reimage |
[production] |
08:36 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.debmonitor.remove-hosts (exit_code=0) for 1 hosts: kubernetes2055.codfw.wmnet |
[production] |
08:36 |
<jmm@cumin2002> |
START - Cookbook sre.debmonitor.remove-hosts for 1 hosts: kubernetes2055.codfw.wmnet |
[production] |
08:35 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.debmonitor.remove-hosts (exit_code=0) for 1 hosts: kubernetes2057.codfw.wmnet |
[production] |
08:35 |
<jmm@cumin2002> |
START - Cookbook sre.debmonitor.remove-hosts for 1 hosts: kubernetes2057.codfw.wmnet |
[production] |
08:35 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.debmonitor.remove-hosts (exit_code=0) for 1 hosts: kubernetes2055.codfw.wmnet |
[production] |
08:35 |
<jmm@cumin2002> |
START - Cookbook sre.debmonitor.remove-hosts for 1 hosts: kubernetes2055.codfw.wmnet |
[production] |
08:35 |
<jelto@cumin1002> |
START - Cookbook sre.hosts.reboot-single for host lists2001.wikimedia.org |
[production] |
08:35 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.debmonitor.remove-hosts (exit_code=0) for 1 hosts: kubernetes2035.codfw.wmnet |
[production] |
08:35 |
<jmm@cumin2002> |
START - Cookbook sre.debmonitor.remove-hosts for 1 hosts: kubernetes2035.codfw.wmnet |
[production] |
08:35 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.debmonitor.remove-hosts (exit_code=0) for 1 hosts: kubernetes2033.codfw.wmnet |
[production] |
08:35 |
<jmm@cumin2002> |
START - Cookbook sre.debmonitor.remove-hosts for 1 hosts: kubernetes2033.codfw.wmnet |
[production] |
08:34 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.debmonitor.remove-hosts (exit_code=0) for 1 hosts: kubernetes2029.codfw.wmnet |
[production] |
08:34 |
<jmm@cumin2002> |
START - Cookbook sre.debmonitor.remove-hosts for 1 hosts: kubernetes2029.codfw.wmnet |
[production] |
08:34 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.debmonitor.remove-hosts (exit_code=0) for 1 hosts: kubernetes2028.codfw.wmnet |
[production] |
08:34 |
<jmm@cumin2002> |
START - Cookbook sre.debmonitor.remove-hosts for 1 hosts: kubernetes2028.codfw.wmnet |
[production] |
08:34 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.debmonitor.remove-hosts (exit_code=0) for 1 hosts: kubernetes2027.codfw.wmnet |
[production] |
08:34 |
<jmm@cumin2002> |
START - Cookbook sre.debmonitor.remove-hosts for 1 hosts: kubernetes2027.codfw.wmnet |
[production] |
08:33 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.debmonitor.remove-hosts (exit_code=0) for 1 hosts: kubernetes2025.codfw.wmnet |
[production] |
08:33 |
<jmm@cumin2002> |
START - Cookbook sre.debmonitor.remove-hosts for 1 hosts: kubernetes2025.codfw.wmnet |
[production] |