2024-06-12
ยง
|
10:14 |
<jayme@deploy1002> |
helmfile [staging] START helmfile.d/services/shellbox: apply |
[production] |
10:10 |
<claime> |
Depooling mw2281.codfw.wmnet,mw22[83-90].codfw.wmnet for decommission - T367275 |
[production] |
10:10 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Repooling after maintenance db1189 (T367261)', diff saved to https://phabricator.wikimedia.org/P64683 and previous config saved to /var/cache/conftool/dbconfig/20240612-101032-marostegui.json |
[production] |
10:08 |
<jayme@deploy1002> |
helmfile [eqiad] START helmfile.d/services/machinetranslation: apply |
[production] |
10:07 |
<jayme@deploy1002> |
helmfile [codfw] DONE helmfile.d/services/machinetranslation: apply |
[production] |
10:07 |
<jayme@deploy1002> |
helmfile [codfw] START helmfile.d/services/machinetranslation: apply |
[production] |
10:07 |
<zabe> |
zabe@mwmaint1002:~$ foreachwikiindblist 'all - s4' refreshImageMetadata.php --mime image/webp # T364680 |
[production] |
09:48 |
<fabfur> |
disabling puppet on cp4037 to test benthos configuration (T360454) |
[production] |
09:47 |
<fabfur> |
disabling puppet on cp4037 to test benthos configuration |
[production] |
09:47 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Repooling after maintenance db1175', diff saved to https://phabricator.wikimedia.org/P64680 and previous config saved to /var/cache/conftool/dbconfig/20240612-094738-marostegui.json |
[production] |
09:47 |
<_joe_> |
running dump_cloud_ip_ranges on puppetmaster1001 to test fixed script |
[production] |
09:43 |
<fnegri@cumin1002> |
conftool action : set/pooled=no; selector: name=clouddb1018.eqiad.wmnet,service=s7 |
[production] |
09:43 |
<fnegri@cumin1002> |
conftool action : set/pooled=no; selector: name=clouddb1018.eqiad.wmnet,service=s2 |
[production] |
09:33 |
<jayme@deploy1002> |
helmfile [staging-eqiad] DONE helmfile.d/admin 'apply'. |
[production] |
09:32 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Repooling after maintenance db1175', diff saved to https://phabricator.wikimedia.org/P64679 and previous config saved to /var/cache/conftool/dbconfig/20240612-093231-marostegui.json |
[production] |
09:32 |
<jayme@deploy1002> |
helmfile [staging-eqiad] START helmfile.d/admin 'apply'. |
[production] |
09:17 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Repooling after maintenance db1175 (T367261)', diff saved to https://phabricator.wikimedia.org/P64678 and previous config saved to /var/cache/conftool/dbconfig/20240612-091724-marostegui.json |
[production] |
09:11 |
<moritzm> |
failover ganeti cluster for eqsin to ganeti5004 |
[production] |
09:10 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Depooling db1175 (T367261)', diff saved to https://phabricator.wikimedia.org/P64677 and previous config saved to /var/cache/conftool/dbconfig/20240612-090959-marostegui.json |
[production] |
09:09 |
<marostegui@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 12:00:00 on db1175.eqiad.wmnet with reason: Maintenance |
[production] |
09:09 |
<marostegui@cumin1002> |
START - Cookbook sre.hosts.downtime for 12:00:00 on db1175.eqiad.wmnet with reason: Maintenance |
[production] |
09:09 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Repooling after maintenance db1166 (T367261)', diff saved to https://phabricator.wikimedia.org/P64676 and previous config saved to /var/cache/conftool/dbconfig/20240612-090937-marostegui.json |
[production] |
09:08 |
<ladsgroup@cumin1002> |
dbctl commit (dc=all): 'db2214 (re)pooling @ 100%: Maint over', diff saved to https://phabricator.wikimedia.org/P64675 and previous config saved to /var/cache/conftool/dbconfig/20240612-090834-ladsgroup.json |
[production] |
09:06 |
<kamila@cumin1002> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host wikikube-ctrl1002.eqiad.wmnet with OS bullseye |
[production] |
09:04 |
<Lucas_WMDE> |
START lucaswerkmeister-wmde@mwmaint1002:~$ time mwscript extensions/DiscussionTools/maintenance/persistRevisionThreadItems.php --wiki enwiki --current --all --touched-after=20240524120000 --start '["55386869"]' 2>&1 | tee -a ~/T315510-enwiki-9; date |
[production] |
09:04 |
<ladsgroup@cumin1002> |
dbctl commit (dc=all): 'db1223 (re)pooling @ 100%: Maint over', diff saved to https://phabricator.wikimedia.org/P64674 and previous config saved to /var/cache/conftool/dbconfig/20240612-090435-ladsgroup.json |
[production] |
09:04 |
<Lucas_WMDE> |
STOPPED lucaswerkmeister-wmde@mwmaint1002:~$ time mwscript extensions/DiscussionTools/maintenance/persistRevisionThreadItems.php --wiki enwiki --current --all --touched-after=20240524120000 --start '["55019880"]' 2>&1 | tee -a ~/T315510-enwiki-8; date # Ctrl+C, had become very slow, trying restart |
[production] |
08:54 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Repooling after maintenance db1166', diff saved to https://phabricator.wikimedia.org/P64673 and previous config saved to /var/cache/conftool/dbconfig/20240612-085430-marostegui.json |
[production] |
08:53 |
<ladsgroup@cumin1002> |
dbctl commit (dc=all): 'db2214 (re)pooling @ 75%: Maint over', diff saved to https://phabricator.wikimedia.org/P64672 and previous config saved to /var/cache/conftool/dbconfig/20240612-085329-ladsgroup.json |
[production] |
08:52 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.ganeti.drain-node (exit_code=0) for draining ganeti node ganeti5006.eqsin.wmnet |
[production] |
08:52 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host ganeti5006.eqsin.wmnet |
[production] |
08:49 |
<ladsgroup@cumin1002> |
dbctl commit (dc=all): 'db1223 (re)pooling @ 75%: Maint over', diff saved to https://phabricator.wikimedia.org/P64671 and previous config saved to /var/cache/conftool/dbconfig/20240612-084929-ladsgroup.json |
[production] |
08:45 |
<jmm@cumin2002> |
START - Cookbook sre.hosts.reboot-single for host ganeti5006.eqsin.wmnet |
[production] |
08:42 |
<kamila@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on wikikube-ctrl1002.eqiad.wmnet with reason: host reimage |
[production] |
08:42 |
<zabe> |
zabe@mwmaint1002:~$ mwscript refreshImageMetadata.php commonswiki --mime image/webp # T364680 |
[production] |
08:39 |
<slyngshede@cumin1002> |
END (PASS) - Cookbook sre.idm.logout (exit_code=0) Logging Mike Pham out of all services on: 2200 hosts |
[production] |
08:39 |
<kamila@cumin1002> |
START - Cookbook sre.hosts.downtime for 2:00:00 on wikikube-ctrl1002.eqiad.wmnet with reason: host reimage |
[production] |
08:39 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Repooling after maintenance db1166', diff saved to https://phabricator.wikimedia.org/P64670 and previous config saved to /var/cache/conftool/dbconfig/20240612-083923-marostegui.json |
[production] |
08:38 |
<slyngshede@cumin1002> |
START - Cookbook sre.idm.logout Logging Mike Pham out of all services on: 2200 hosts |
[production] |
08:38 |
<ladsgroup@cumin1002> |
dbctl commit (dc=all): 'db2214 (re)pooling @ 50%: Maint over', diff saved to https://phabricator.wikimedia.org/P64669 and previous config saved to /var/cache/conftool/dbconfig/20240612-083824-ladsgroup.json |
[production] |
08:36 |
<Lucas_WMDE> |
lucaswerkmeister-wmde@deploy1002 ~ $ mwscript-k8s --comment 'T367174, P12703' extensions/Wikibase/repo/maintenance/changePropertyDataType.php wikidatawiki -- --property-id P12703 --new-data-type external-id --summary '[[phabricator:T367174|T367174]]' # succeeded |
[production] |
08:35 |
<Lucas_WMDE> |
lucaswerkmeister-wmde@deploy1002 ~ $ mwscript-k8s --comment 'T367174, P12583' extensions/Wikibase/repo/maintenance/changePropertyDataType.php wikidatawiki -- --property-id P12583 --new-data-type external-id --summary '[[phabricator:T367174|T367174]]' # succeeded |
[production] |
08:34 |
<ladsgroup@cumin1002> |
dbctl commit (dc=all): 'db1223 (re)pooling @ 50%: Maint over', diff saved to https://phabricator.wikimedia.org/P64668 and previous config saved to /var/cache/conftool/dbconfig/20240612-083424-ladsgroup.json |
[production] |
08:28 |
<brouberol@cumin2002> |
END (FAIL) - Cookbook sre.wdqs.data-reload (exit_code=99) reloading wikidata_full on wdqs2023.codfw.wmnet from DumpsSource.HDFS (hdfs:///wmf/discovery/wdqs-reload-cookbook-test-T349069/ using stat1009.eqiad.wmnet) |
[production] |
08:27 |
<ladsgroup@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db2123.codfw.wmnet with reason: Maintenance |
[production] |
08:27 |
<ladsgroup@cumin1002> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db2123.codfw.wmnet with reason: Maintenance |
[production] |
08:27 |
<marostegui@cumin1002> |
dbctl commit (dc=all): 'Depool db2123', diff saved to https://phabricator.wikimedia.org/P64667 and previous config saved to /var/cache/conftool/dbconfig/20240612-082702-marostegui.json |
[production] |
08:26 |
<fabfur@cumin1002> |
START - Cookbook sre.cdn.roll-reboot rolling reboot on A:cp-upload_codfw |
[production] |
08:26 |
<fabfur> |
start rebooting all cp-upload_codfw hosts for T366555 (spaced 1.5 hrs) |
[production] |
08:25 |
<kamila@cumin1002> |
START - Cookbook sre.hosts.reimage for host wikikube-ctrl1002.eqiad.wmnet with OS bullseye |
[production] |