2024-06-12
ยง
|
10:17 |
<jayme@deploy1002> |
helmfile [eqiad] DONE helmfile.d/services/machinetranslation: apply |
[production] |
10:16 |
<jayme@deploy1002> |
helmfile [staging] DONE helmfile.d/services/shellbox-timeline: apply |
[production] |
10:16 |
<jayme@deploy1002> |
helmfile [staging] START helmfile.d/services/shellbox-timeline: apply |
[production] |
10:16 |
<jayme@deploy1002> |
helmfile [staging] DONE helmfile.d/services/shellbox-syntaxhighlight: apply |
[production] |
10:16 |
<jayme@deploy1002> |
helmfile [staging] START helmfile.d/services/shellbox-syntaxhighlight: apply |
[production] |
10:16 |
<jayme@deploy1002> |
helmfile [staging] DONE helmfile.d/services/shellbox-media: apply |
[production] |
10:16 |
<jayme@deploy1002> |
helmfile [staging] START helmfile.d/services/shellbox-media: apply |
[production] |
10:16 |
<jayme@deploy1002> |
helmfile [staging] DONE helmfile.d/services/shellbox-constraints: apply |
[production] |
10:15 |
<cgoubert@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 14 days, 0:00:00 on 9 hosts with reason: decommissioning |
[production] |
10:15 |
<jayme@deploy1002> |
helmfile [staging] START helmfile.d/services/shellbox-constraints: apply |
[production] |
10:15 |
<jayme@deploy1002> |
helmfile [staging] DONE helmfile.d/services/shellbox: apply |
[production] |
10:15 |
<jayme@deploy1002> |
helmfile [staging] START helmfile.d/services/shellbox: apply |
[production] |
10:15 |
<cgoubert@cumin1002> |
START - Cookbook sre.hosts.downtime for 14 days, 0:00:00 on 9 hosts with reason: decommissioning |
[production] |
10:14 |
<jayme@deploy1002> |
helmfile [staging] DONE helmfile.d/services/shellbox: apply |
[production] |
10:14 |
<jayme@deploy1002> |
helmfile [staging] START helmfile.d/services/shellbox: apply |
[production] |
10:10 |
<claime> |
Depooling mw2281.codfw.wmnet,mw22[83-90].codfw.wmnet for decommission - T367275 |
[production] |
10:10 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Repooling after maintenance db1189 (T367261)', diff saved to https://phabricator.wikimedia.org/P64683 and previous config saved to /var/cache/conftool/dbconfig/20240612-101032-marostegui.json |
[production] |
10:08 |
<jayme@deploy1002> |
helmfile [eqiad] START helmfile.d/services/machinetranslation: apply |
[production] |
10:07 |
<jayme@deploy1002> |
helmfile [codfw] DONE helmfile.d/services/machinetranslation: apply |
[production] |
10:07 |
<jayme@deploy1002> |
helmfile [codfw] START helmfile.d/services/machinetranslation: apply |
[production] |
10:07 |
<zabe> |
zabe@mwmaint1002:~$ foreachwikiindblist 'all - s4' refreshImageMetadata.php --mime image/webp # T364680 |
[production] |
09:48 |
<fabfur> |
disabling puppet on cp4037 to test benthos configuration (T360454) |
[production] |
09:47 |
<fabfur> |
disabling puppet on cp4037 to test benthos configuration |
[production] |
09:47 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Repooling after maintenance db1175', diff saved to https://phabricator.wikimedia.org/P64680 and previous config saved to /var/cache/conftool/dbconfig/20240612-094738-marostegui.json |
[production] |
09:47 |
<_joe_> |
running dump_cloud_ip_ranges on puppetmaster1001 to test fixed script |
[production] |
09:43 |
<fnegri@cumin1002> |
conftool action : set/pooled=no; selector: name=clouddb1018.eqiad.wmnet,service=s7 |
[production] |
09:43 |
<fnegri@cumin1002> |
conftool action : set/pooled=no; selector: name=clouddb1018.eqiad.wmnet,service=s2 |
[production] |
09:33 |
<jayme@deploy1002> |
helmfile [staging-eqiad] DONE helmfile.d/admin 'apply'. |
[production] |
09:32 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Repooling after maintenance db1175', diff saved to https://phabricator.wikimedia.org/P64679 and previous config saved to /var/cache/conftool/dbconfig/20240612-093231-marostegui.json |
[production] |
09:32 |
<jayme@deploy1002> |
helmfile [staging-eqiad] START helmfile.d/admin 'apply'. |
[production] |
09:17 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Repooling after maintenance db1175 (T367261)', diff saved to https://phabricator.wikimedia.org/P64678 and previous config saved to /var/cache/conftool/dbconfig/20240612-091724-marostegui.json |
[production] |
09:11 |
<moritzm> |
failover ganeti cluster for eqsin to ganeti5004 |
[production] |
09:10 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Depooling db1175 (T367261)', diff saved to https://phabricator.wikimedia.org/P64677 and previous config saved to /var/cache/conftool/dbconfig/20240612-090959-marostegui.json |
[production] |
09:09 |
<marostegui@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 12:00:00 on db1175.eqiad.wmnet with reason: Maintenance |
[production] |
09:09 |
<marostegui@cumin1002> |
START - Cookbook sre.hosts.downtime for 12:00:00 on db1175.eqiad.wmnet with reason: Maintenance |
[production] |
09:09 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Repooling after maintenance db1166 (T367261)', diff saved to https://phabricator.wikimedia.org/P64676 and previous config saved to /var/cache/conftool/dbconfig/20240612-090937-marostegui.json |
[production] |
09:08 |
<ladsgroup@cumin1002> |
dbctl commit (dc=all): 'db2214 (re)pooling @ 100%: Maint over', diff saved to https://phabricator.wikimedia.org/P64675 and previous config saved to /var/cache/conftool/dbconfig/20240612-090834-ladsgroup.json |
[production] |
09:06 |
<kamila@cumin1002> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host wikikube-ctrl1002.eqiad.wmnet with OS bullseye |
[production] |
09:04 |
<Lucas_WMDE> |
START lucaswerkmeister-wmde@mwmaint1002:~$ time mwscript extensions/DiscussionTools/maintenance/persistRevisionThreadItems.php --wiki enwiki --current --all --touched-after=20240524120000 --start '["55386869"]' 2>&1 | tee -a ~/T315510-enwiki-9; date |
[production] |
09:04 |
<ladsgroup@cumin1002> |
dbctl commit (dc=all): 'db1223 (re)pooling @ 100%: Maint over', diff saved to https://phabricator.wikimedia.org/P64674 and previous config saved to /var/cache/conftool/dbconfig/20240612-090435-ladsgroup.json |
[production] |
09:04 |
<Lucas_WMDE> |
STOPPED lucaswerkmeister-wmde@mwmaint1002:~$ time mwscript extensions/DiscussionTools/maintenance/persistRevisionThreadItems.php --wiki enwiki --current --all --touched-after=20240524120000 --start '["55019880"]' 2>&1 | tee -a ~/T315510-enwiki-8; date # Ctrl+C, had become very slow, trying restart |
[production] |
08:54 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Repooling after maintenance db1166', diff saved to https://phabricator.wikimedia.org/P64673 and previous config saved to /var/cache/conftool/dbconfig/20240612-085430-marostegui.json |
[production] |
08:53 |
<ladsgroup@cumin1002> |
dbctl commit (dc=all): 'db2214 (re)pooling @ 75%: Maint over', diff saved to https://phabricator.wikimedia.org/P64672 and previous config saved to /var/cache/conftool/dbconfig/20240612-085329-ladsgroup.json |
[production] |
08:52 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.ganeti.drain-node (exit_code=0) for draining ganeti node ganeti5006.eqsin.wmnet |
[production] |
08:52 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host ganeti5006.eqsin.wmnet |
[production] |
08:49 |
<ladsgroup@cumin1002> |
dbctl commit (dc=all): 'db1223 (re)pooling @ 75%: Maint over', diff saved to https://phabricator.wikimedia.org/P64671 and previous config saved to /var/cache/conftool/dbconfig/20240612-084929-ladsgroup.json |
[production] |
08:45 |
<jmm@cumin2002> |
START - Cookbook sre.hosts.reboot-single for host ganeti5006.eqsin.wmnet |
[production] |
08:42 |
<kamila@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on wikikube-ctrl1002.eqiad.wmnet with reason: host reimage |
[production] |
08:42 |
<zabe> |
zabe@mwmaint1002:~$ mwscript refreshImageMetadata.php commonswiki --mime image/webp # T364680 |
[production] |
08:39 |
<slyngshede@cumin1002> |
END (PASS) - Cookbook sre.idm.logout (exit_code=0) Logging Mike Pham out of all services on: 2200 hosts |
[production] |