2025-10-01
ยง
|
14:18 |
<elukey@cumin2002> |
START - Cookbook sre.hardware.upgrade-firmware upgrade firmware for hosts ['cp2048.codfw.wmnet'] |
[production] |
14:18 |
<cgoubert@deploy2002> |
helmfile [eqiad] START helmfile.d/services/developer-portal: apply |
[production] |
14:18 |
<cgoubert@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/data-gateway: apply |
[production] |
14:17 |
<cgoubert@deploy2002> |
helmfile [eqiad] START helmfile.d/services/data-gateway: apply |
[production] |
14:16 |
<cgoubert@cumin1003> |
conftool action : set/pooled=false; selector: dnsdisc=thumbor.*,name=codfw |
[production] |
14:16 |
<cgoubert@cumin1003> |
conftool action : set/pooled=true; selector: dnsdisc=swift.*,name=eqiad |
[production] |
14:16 |
<cgoubert@cumin1003> |
conftool action : set/pooled=true; selector: dnsdisc=thumbor.*,name=eqiad |
[production] |
14:16 |
<cgoubert@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/cxserver: apply |
[production] |
14:15 |
<cgoubert@deploy2002> |
helmfile [eqiad] START helmfile.d/services/cxserver: apply |
[production] |
14:14 |
<elukey@cumin2002> |
END (PASS) - Cookbook sre.hardware.upgrade-firmware (exit_code=0) upgrade firmware for hosts ['cp2048.codfw.wmnet'] |
[production] |
14:14 |
<cgoubert@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/commons-impact-analytics: apply |
[production] |
14:14 |
<elukey@cumin2002> |
START - Cookbook sre.hardware.upgrade-firmware upgrade firmware for hosts ['cp2048.codfw.wmnet'] |
[production] |
14:13 |
<cgoubert@deploy2002> |
helmfile [eqiad] START helmfile.d/services/commons-impact-analytics: apply |
[production] |
14:12 |
<cgoubert@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/citoid: apply |
[production] |
14:11 |
<cgoubert@deploy2002> |
helmfile [eqiad] START helmfile.d/services/citoid: apply |
[production] |
14:11 |
<cgoubert@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/cirrus-streaming-updater: apply |
[production] |
14:11 |
<cgoubert@deploy2002> |
helmfile [eqiad] START helmfile.d/services/cirrus-streaming-updater: apply |
[production] |
14:11 |
<cgoubert@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/chart-renderer: apply |
[production] |
14:09 |
<cgoubert@deploy2002> |
helmfile [eqiad] START helmfile.d/services/chart-renderer: apply |
[production] |
14:08 |
<cgoubert@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/changeprop-jobqueue: apply |
[production] |
14:08 |
<cgoubert@deploy2002> |
helmfile [eqiad] START helmfile.d/services/changeprop-jobqueue: apply |
[production] |
14:08 |
<cgoubert@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/changeprop: apply |
[production] |
14:06 |
<cgoubert@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/thumbor: apply |
[production] |
14:06 |
<cgoubert@deploy2002> |
helmfile [eqiad] START helmfile.d/services/changeprop: apply |
[production] |
14:06 |
<cgoubert@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/api-gateway: apply |
[production] |
14:06 |
<cgoubert@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/kartotherian: apply |
[production] |
14:05 |
<fceratto@cumin1002> |
dbctl commit (dc=all): 'Repooling after maintenance db1259 (T401906)', diff saved to https://phabricator.wikimedia.org/P83572 and previous config saved to /var/cache/conftool/dbconfig/20251001-140538-fceratto.json |
[production] |
14:05 |
<cgoubert@deploy2002> |
helmfile [eqiad] START helmfile.d/services/api-gateway: apply |
[production] |
14:05 |
<cgoubert@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/apertium: apply |
[production] |
14:04 |
<cgoubert@deploy2002> |
helmfile [eqiad] START helmfile.d/services/kartotherian: apply |
[production] |
14:04 |
<fceratto@cumin1002> |
dbctl commit (dc=all): 'Depooling db1259 (T401906)', diff saved to https://phabricator.wikimedia.org/P83571 and previous config saved to /var/cache/conftool/dbconfig/20251001-140422-fceratto.json |
[production] |
14:04 |
<fceratto@cumin1002> |
DONE (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 4:00:00 on db1259.eqiad.wmnet with reason: Maintenance |
[production] |
14:04 |
<fceratto@cumin1002> |
dbctl commit (dc=all): 'Repooling after maintenance db1254 (T401906)', diff saved to https://phabricator.wikimedia.org/P83570 and previous config saved to /var/cache/conftool/dbconfig/20251001-140400-fceratto.json |
[production] |
14:03 |
<cgoubert@deploy2002> |
helmfile [eqiad] START helmfile.d/services/thumbor: apply |
[production] |
14:02 |
<cgoubert@deploy2002> |
helmfile [eqiad] START helmfile.d/services/apertium: apply |
[production] |
14:01 |
<cgoubert@cumin1003> |
conftool action : set/pooled=true; selector: name=eqiad,dnsdisc=toolhub.* |
[production] |
14:00 |
<cgoubert@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/toolhub: apply |
[production] |
13:58 |
<cgoubert@deploy2002> |
helmfile [eqiad] START helmfile.d/services/toolhub: apply |
[production] |
13:56 |
<bking@cumin2002> |
conftool action : set/weight=10:pooled=yes; selector: name=wdqs2016\.codfw\.wmnet |
[production] |
13:53 |
<cgoubert@deploy2002> |
helmfile [eqiad] DONE helmfile.d/admin 'sync'. |
[production] |
13:51 |
<jelto@cumin1003> |
DONE (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 0:30:00 on 239 hosts with reason: eqiad Wikikube kubernetes cluster upgrade to 1.31 - T405703 |
[production] |
13:48 |
<fceratto@cumin1002> |
dbctl commit (dc=all): 'Repooling after maintenance db1254', diff saved to https://phabricator.wikimedia.org/P83569 and previous config saved to /var/cache/conftool/dbconfig/20251001-134852-fceratto.json |
[production] |
13:46 |
<cgoubert@deploy2002> |
helmfile [eqiad] START helmfile.d/admin 'sync'. |
[production] |
13:44 |
<SandraEbele_> |
Deployed refinery-source using jenkins(weekly deployment train) |
[analytics] |
13:44 |
<SandraEbele_> |
Deployed refinery-source using jenkins(weekly deployment train) |
[production] |
13:44 |
<cgoubert@cumin1003> |
END (FAIL) - Cookbook sre.k8s.pool-depool-node (exit_code=99) pool for host wikikube-ctrl[1001-1004].eqiad.wmnet |
[production] |
13:44 |
<cgoubert@cumin1003> |
START - Cookbook sre.k8s.pool-depool-node pool for host wikikube-ctrl[1001-1004].eqiad.wmnet |
[production] |
13:35 |
<cgoubert@cumin1003> |
END (FAIL) - Cookbook sre.k8s.wipe-cluster (exit_code=99) Wipe the K8s cluster wikikube-eqiad: eqiad Wikikube kubernetes cluster upgrade to 1.31 - T405703 |
[production] |
13:35 |
<cgoubert@deploy2002> |
helmfile [aux-k8s-codfw] DONE helmfile.d/admin 'apply'. |
[production] |
13:34 |
<cgoubert@deploy2002> |
helmfile [aux-k8s-codfw] START helmfile.d/admin 'apply'. |
[production] |