|
2023-07-10
§
|
| 13:27 |
<elukey@deploy1002> |
helmfile [eqiad] START helmfile.d/services/eventgate-main: sync |
[production] |
| 10:50 |
<elukey@deploy1002> |
helmfile [codfw] DONE helmfile.d/services/eventgate-main: sync |
[production] |
| 10:50 |
<elukey@deploy1002> |
helmfile [codfw] START helmfile.d/services/eventgate-main: sync |
[production] |
| 10:44 |
<elukey@deploy1002> |
helmfile [staging] DONE helmfile.d/services/eventgate-main: sync |
[production] |
| 10:44 |
<elukey@deploy1002> |
helmfile [staging] START helmfile.d/services/eventgate-main: sync |
[production] |
| 07:30 |
<elukey@deploy1002> |
helmfile [eqiad] DONE helmfile.d/services/changeprop-jobqueue: sync |
[production] |
| 07:29 |
<elukey@deploy1002> |
helmfile [eqiad] START helmfile.d/services/changeprop-jobqueue: sync |
[production] |
| 07:22 |
<elukey@deploy1002> |
helmfile [codfw] DONE helmfile.d/services/changeprop-jobqueue: sync |
[production] |
| 07:21 |
<elukey@deploy1002> |
helmfile [codfw] START helmfile.d/services/changeprop-jobqueue: sync |
[production] |
| 07:20 |
<elukey@deploy1002> |
helmfile [staging] DONE helmfile.d/services/changeprop-jobqueue: sync |
[production] |
| 07:20 |
<elukey@deploy1002> |
helmfile [staging] START helmfile.d/services/changeprop-jobqueue: sync |
[production] |
|
2023-07-06
§
|
| 15:54 |
<elukey> |
changeprop's kafka linger.ms set to 20s - T338357 (was 5ms, now changeprop waits a bit more to batch messages to send to kafka in one go) |
[production] |
| 15:53 |
<elukey@deploy1002> |
helmfile [eqiad] DONE helmfile.d/services/changeprop: sync |
[production] |
| 15:53 |
<elukey@deploy1002> |
helmfile [eqiad] START helmfile.d/services/changeprop: sync |
[production] |
| 15:45 |
<elukey@deploy1002> |
helmfile [codfw] DONE helmfile.d/services/changeprop: sync |
[production] |
| 15:45 |
<elukey@deploy1002> |
helmfile [codfw] START helmfile.d/services/changeprop: sync |
[production] |
| 15:36 |
<elukey@deploy1002> |
helmfile [staging] DONE helmfile.d/services/changeprop: sync |
[production] |
| 15:36 |
<elukey@deploy1002> |
helmfile [staging] START helmfile.d/services/changeprop: sync |
[production] |
| 13:33 |
<elukey@cumin1001> |
END (PASS) - Cookbook sre.hosts.reimage (exit_code=0) for host zookeeper-test1002.eqiad.wmnet with OS bookworm |
[production] |
| 12:58 |
<elukey@cumin1001> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 2:00:00 on zookeeper-test1002.eqiad.wmnet with reason: host reimage |
[production] |
| 12:56 |
<elukey@cumin1001> |
START - Cookbook sre.hosts.downtime for 2:00:00 on zookeeper-test1002.eqiad.wmnet with reason: host reimage |
[production] |
| 12:42 |
<elukey@cumin1001> |
START - Cookbook sre.hosts.reimage for host zookeeper-test1002.eqiad.wmnet with OS bookworm |
[production] |
| 12:15 |
<elukey@cumin1001> |
END (FAIL) - Cookbook sre.hosts.reimage (exit_code=99) for host zookeeper-test1002.eqiad.wmnet with OS bookworm |
[production] |
| 12:15 |
<elukey@cumin1001> |
START - Cookbook sre.hosts.reimage for host zookeeper-test1002.eqiad.wmnet with OS bookworm |
[production] |
| 09:11 |
<elukey> |
restart kube-apiserver on ml-serve-ctrl2* as attempt to fix LIST-related latency issues |
[production] |