2024-11-20
§
|
16:35 |
<klausman@deploy2002> |
helmfile [ml-staging-codfw] DONE helmfile.d/admin 'apply'. |
[production] |
16:35 |
<jiji@deploy2002> |
helmfile [staging] START helmfile.d/services/eventstreams: apply |
[production] |
16:34 |
<klausman@deploy2002> |
helmfile [ml-staging-codfw] START helmfile.d/admin 'apply'. |
[production] |
16:28 |
<jiji@deploy2002> |
helmfile [staging] START helmfile.d/services/eventgate-main: apply |
[production] |
16:26 |
<jiji@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/changeprop-jobqueue: apply |
[production] |
16:25 |
<aikochou@deploy2002> |
helmfile [ml-serve-codfw] Ran 'sync' command on namespace 'revision-models' for release 'main' . |
[production] |
16:24 |
<jiji@deploy2002> |
helmfile [eqiad] START helmfile.d/services/changeprop-jobqueue: apply |
[production] |
16:23 |
<jiji@deploy2002> |
helmfile [staging] DONE helmfile.d/services/changeprop-jobqueue: apply |
[production] |
16:22 |
<jiji@deploy2002> |
helmfile [staging] START helmfile.d/services/changeprop-jobqueue: apply |
[production] |
16:22 |
<jiji@deploy2002> |
helmfile [staging] DONE helmfile.d/services/benthos-cache-invalidator: apply |
[production] |
16:21 |
<jiji@deploy2002> |
helmfile [staging] START helmfile.d/services/benthos-cache-invalidator: apply |
[production] |
16:15 |
<aikochou@deploy2002> |
helmfile [ml-staging-codfw] Ran 'sync' command on namespace 'revision-models' for release 'main' . |
[production] |
16:10 |
<jmm@cumin2002> |
END (PASS) - Cookbook sre.ganeti.drain-node (exit_code=0) for draining ganeti node ganeti1017.eqiad.wmnet |
[production] |
15:51 |
<apine@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/wikifunctions: apply |
[production] |
15:50 |
<apine@deploy2002> |
helmfile [eqiad] START helmfile.d/services/wikifunctions: apply |
[production] |
15:50 |
<apine@deploy2002> |
helmfile [codfw] DONE helmfile.d/services/wikifunctions: apply |
[production] |
15:49 |
<apine@deploy2002> |
helmfile [codfw] START helmfile.d/services/wikifunctions: apply |
[production] |
15:48 |
<dancy@deploy2002> |
Finished scap sync-world: no-op deployment for testing. (duration: 03m 21s) |
[production] |
15:44 |
<dancy@deploy2002> |
Started scap sync-world: no-op deployment for testing. |
[production] |
15:44 |
<apine@deploy2002> |
helmfile [staging] DONE helmfile.d/services/wikifunctions: apply |
[production] |
15:44 |
<apine@deploy2002> |
helmfile [staging] START helmfile.d/services/wikifunctions: apply |
[production] |
15:37 |
<apine@deploy2002> |
helmfile [staging] START helmfile.d/services/wikifunctions: apply |
[production] |
15:37 |
<apine@deploy2002> |
helmfile [staging] DONE helmfile.d/services/wikifunctions: apply |
[production] |
15:33 |
<arnaudb@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db1206.eqiad.wmnet with reason: host overworked by dumps - T368098 |
[production] |
15:33 |
<arnaudb@cumin1002> |
START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db1206.eqiad.wmnet with reason: host overworked by dumps - T368098 |
[production] |
15:31 |
<jynus> |
starting resharding of commons backup files into new host backup2010 T376892 |
[production] |
15:27 |
<apine@deploy2002> |
helmfile [staging] START helmfile.d/services/wikifunctions: apply |
[production] |
15:23 |
<apine@deploy2002> |
helmfile [codfw] DONE helmfile.d/services/wikifunctions: apply |
[production] |
15:23 |
<apine@deploy2002> |
helmfile [codfw] START helmfile.d/services/wikifunctions: apply |
[production] |
15:22 |
<apine@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/wikifunctions: apply |
[production] |
15:22 |
<apine@deploy2002> |
helmfile [eqiad] START helmfile.d/services/wikifunctions: apply |
[production] |
15:19 |
<apine@deploy2002> |
helmfile [staging] DONE helmfile.d/services/wikifunctions: apply |
[production] |
15:19 |
<apine@deploy2002> |
helmfile [staging] START helmfile.d/services/wikifunctions: apply |
[production] |
15:15 |
<apine@deploy2002> |
helmfile [eqiad] DONE helmfile.d/services/wikifunctions: apply |
[production] |
15:14 |
<apine@deploy2002> |
helmfile [eqiad] START helmfile.d/services/wikifunctions: apply |
[production] |
15:13 |
<apine@deploy2002> |
helmfile [codfw] DONE helmfile.d/services/wikifunctions: apply |
[production] |
15:13 |
<apine@deploy2002> |
helmfile [codfw] START helmfile.d/services/wikifunctions: apply |
[production] |
15:10 |
<apine@deploy2002> |
helmfile [staging] DONE helmfile.d/services/wikifunctions: apply |
[production] |
15:09 |
<apine@deploy2002> |
helmfile [staging] START helmfile.d/services/wikifunctions: apply |
[production] |
15:09 |
<urandom> |
bootstrapping cassandra, restbase2037-{a,b,c} — T380236 |
[production] |
15:04 |
<btullis@cumin1002> |
END (PASS) - Cookbook sre.ceph.roll-restart-reboot-server (exit_code=0) rolling reboot on P{cephosd100[2-4].eqiad.wmnet} and (A:cephosd) |
[production] |
14:57 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.hosts.provision (exit_code=0) for host thanos-be1005.mgmt.eqiad.wmnet with chassis set policy FORCE_RESTART |
[production] |
14:53 |
<JennH> |
power cycling unresponsive mgmt switch in codfw: msw-c3-codfw |
[production] |
14:50 |
<btullis@cumin1002> |
END (FAIL) - Cookbook sre.hadoop.roll-restart-workers (exit_code=99) restart workers for Hadoop analytics cluster: Roll restart of jvm daemons for openjdk upgrade. |
[production] |
14:43 |
<elukey@cumin1002> |
START - Cookbook sre.hosts.provision for host thanos-be1005.mgmt.eqiad.wmnet with chassis set policy FORCE_RESTART |
[production] |
14:29 |
<cdanis> |
T380226 💙cdanis@mwmaint2002.codfw.wmnet ~ 🕤☕ mwscript sql.php --wiki=commonswiki --cluster=extension1 /srv/mediawiki/php-1.44.0-wmf.4/extensions/JsonConfig/sql/mysql/tables-generated.sql |
[production] |
14:25 |
<sukhe@puppetserver1001> |
conftool action : set/pooled=yes; selector: name=cp7007.magru.wmnet [reason: host reimaged] |
[production] |
14:24 |
<btullis@cumin1002> |
START - Cookbook sre.ceph.roll-restart-reboot-server rolling reboot on P{cephosd100[2-4].eqiad.wmnet} and (A:cephosd) |
[production] |
14:23 |
<jynus> |
starting resharding of commons backup files into new host backup1010 T376892 |
[production] |
14:23 |
<sukhe> |
running homer on asw*magru* |
[production] |