2024-08-06
§
|
15:23 |
<elukey@cumin1002> |
START - Cookbook sre.hosts.provision for host wikikube-worker2035.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
15:20 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.hosts.provision (exit_code=0) for host sretest2002.mgmt.codfw.wmnet with reboot policy FORCED |
[production] |
15:12 |
<elukey@cumin1002> |
START - Cookbook sre.hosts.provision for host sretest2002.mgmt.codfw.wmnet with reboot policy FORCED |
[production] |
15:11 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.hosts.provision (exit_code=0) for host sretest1001.mgmt.eqiad.wmnet with reboot policy GRACEFUL |
[production] |
15:10 |
<elukey@cumin1002> |
START - Cookbook sre.hosts.provision for host sretest1001.mgmt.eqiad.wmnet with reboot policy GRACEFUL |
[production] |
14:23 |
<elukey> |
upgrade debmonitor-server on debmonitor[1,2]003 to version 0.5 - cp /var/cache/apt/archives/python3-debmonitor_0.4.0-3_all.deb . |
[production] |
12:39 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.cassandra.roll-restart (exit_code=0) for nodes matching A:ml-cache-codfw: Openjdk upgrade - elukey@cumin1002 |
[production] |
12:32 |
<elukey> |
apt-get purge debmonitor-server + run-puppet-agent to re-install the daemon on debmonitor2003 |
[production] |
12:31 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 0:30:00 on debmonitor2003.codfw.wmnet with reason: failover test |
[production] |
12:31 |
<elukey@cumin1002> |
START - Cookbook sre.hosts.downtime for 0:30:00 on debmonitor2003.codfw.wmnet with reason: failover test |
[production] |
12:21 |
<elukey@cumin1002> |
START - Cookbook sre.cassandra.roll-restart for nodes matching A:ml-cache-codfw: Openjdk upgrade - elukey@cumin1002 |
[production] |
12:16 |
<elukey> |
restart debmonitor-server on debmonitor1003 |
[production] |
12:13 |
<elukey> |
stop debmonitor-server on debmonitor1003 as temporary test |
[production] |
12:11 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 0:30:00 on debmonitor1003.eqiad.wmnet with reason: failover test |
[production] |
12:11 |
<elukey@cumin1002> |
START - Cookbook sre.hosts.downtime for 0:30:00 on debmonitor1003.eqiad.wmnet with reason: failover test |
[production] |
09:24 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.cassandra.roll-restart (exit_code=0) for nodes matching A:ml-cache-eqiad: Openjdk upgrade - elukey@cumin1002 |
[production] |
09:07 |
<elukey@cumin1002> |
START - Cookbook sre.cassandra.roll-restart for nodes matching A:ml-cache-eqiad: Openjdk upgrade - elukey@cumin1002 |
[production] |
09:02 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.cassandra.roll-restart (exit_code=0) for nodes matching A:cassandra-dev: Openjdk upgrade - elukey@cumin1002 |
[production] |
08:42 |
<elukey@cumin1002> |
START - Cookbook sre.cassandra.roll-restart for nodes matching A:cassandra-dev: Openjdk upgrade - elukey@cumin1002 |
[production] |
08:16 |
<elukey> |
powercycle wdqs1023, misbehaving and not responding to ssh anymore |
[production] |
08:12 |
<elukey@puppetserver1001> |
conftool action : set/pooled=no; selector: name=wdqs1023.eqiad.wmnet |
[production] |
07:34 |
<elukey> |
powercycle ml-serve2001 - host seems frozen, DIMM errors registered in `getsel` |
[production] |
2024-08-05
§
|
15:39 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.netbox.update-extras (exit_code=0) rolling restart_daemons on A:netbox |
[production] |
15:38 |
<elukey@cumin1002> |
START - Cookbook sre.netbox.update-extras rolling restart_daemons on A:netbox |
[production] |
15:27 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.netbox.update-extras (exit_code=0) rolling restart_daemons on A:netbox |
[production] |
15:26 |
<elukey@cumin1002> |
START - Cookbook sre.netbox.update-extras rolling restart_daemons on A:netbox |
[production] |
15:22 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.netbox.update-extras (exit_code=0) rolling restart_daemons on A:netbox |
[production] |
15:22 |
<elukey@cumin1002> |
START - Cookbook sre.netbox.update-extras rolling restart_daemons on A:netbox |
[production] |
15:07 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.hosts.provision (exit_code=0) for host db2239.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
15:03 |
<elukey@cumin1002> |
START - Cookbook sre.hosts.provision for host db2239.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
15:02 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.hosts.provision (exit_code=0) for host db2240.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
14:49 |
<elukey@cumin1002> |
START - Cookbook sre.hosts.provision for host db2240.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
14:43 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.hosts.provision (exit_code=0) for host db2238.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
14:35 |
<elukey@cumin1002> |
START - Cookbook sre.hosts.provision for host db2238.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
14:04 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.hosts.provision (exit_code=0) for host db2237.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
14:01 |
<elukey@cumin1002> |
START - Cookbook sre.hosts.provision for host db2237.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
14:01 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.hosts.provision (exit_code=0) for host db2236.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
13:57 |
<elukey@cumin1002> |
START - Cookbook sre.hosts.provision for host db2236.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
2024-08-02
§
|
15:10 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.hosts.provision (exit_code=0) for host db2235.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
15:05 |
<elukey@cumin1002> |
START - Cookbook sre.hosts.provision for host db2235.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
15:00 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.hosts.provision (exit_code=0) for host db2234.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
14:53 |
<elukey@cumin1002> |
START - Cookbook sre.hosts.provision for host db2234.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
14:52 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.hosts.provision (exit_code=0) for host db2233.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
14:49 |
<elukey@cumin1002> |
START - Cookbook sre.hosts.provision for host db2233.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
14:41 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.hosts.provision (exit_code=0) for host db2232.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
14:34 |
<elukey@cumin1002> |
START - Cookbook sre.hosts.provision for host db2232.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
14:34 |
<elukey@cumin1002> |
END (PASS) - Cookbook sre.hosts.provision (exit_code=0) for host db2231.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
14:27 |
<elukey@cumin1002> |
START - Cookbook sre.hosts.provision for host db2231.mgmt.codfw.wmnet with reboot policy GRACEFUL |
[production] |
10:18 |
<elukey> |
manually start dump_cloud_ip_ranges.service on puppetmaster1001 as test |
[production] |