production SAL

6251-6300 of 10000 results (93ms)

2023-06-28 §
10:57	<hnowlan@deploy1002>	helmfile [codfw] START helmfile.d/services/rest-gateway: apply	[production]
10:57	<hnowlan@deploy1002>	helmfile [eqiad] DONE helmfile.d/services/rest-gateway: apply	[production]
10:57	<hnowlan@deploy1002>	helmfile [eqiad] START helmfile.d/services/rest-gateway: apply	[production]
10:55	<btullis@deploy1002>	helmfile [staging] DONE helmfile.d/services/datahub: sync on main	[production]
10:52	<fabfur@cumin1001>	END (PASS) - Cookbook sre.cdn.roll-upgrade-haproxy (exit_code=0) rolling upgrade of HAProxy on A:cp-upload_codfw	[production]
10:51	<claime>	Migrating to rsync::quickdatacopy for deployment servers - T289857	[production]
10:51	<hnowlan@deploy1002>	helmfile [codfw] DONE helmfile.d/services/rest-gateway: apply	[production]
10:50	<hnowlan@deploy1002>	helmfile [codfw] START helmfile.d/services/rest-gateway: apply	[production]
10:50	<hnowlan@deploy1002>	helmfile [eqiad] DONE helmfile.d/services/rest-gateway: apply	[production]
10:50	<hnowlan@deploy1002>	helmfile [eqiad] START helmfile.d/services/rest-gateway: apply	[production]
10:47	<elukey@cumin1001>	START - Cookbook sre.cassandra.roll-restart for nodes matching A:ml-cache-codfw: Roll restart to pick up Java 11 - elukey@cumin1001	[production]
10:47	<elukey@cumin1001>	END (PASS) - Cookbook sre.cassandra.roll-restart (exit_code=0) for nodes matching A:ml-cache-eqiad: Roll restart to pick up Java 11 - elukey@cumin1001	[production]
10:44	<btullis@deploy1002>	helmfile [staging] START helmfile.d/services/datahub: apply on main	[production]
10:42	<elukey@deploy1002>	helmfile [ml-serve-eqiad] 'sync' command on namespace 'ores-legacy' for release 'main' .	[production]
10:42	<elukey@deploy1002>	helmfile [ml-serve-codfw] 'sync' command on namespace 'ores-legacy' for release 'main' .	[production]
10:42	<jmm@cumin2002>	END (PASS) - Cookbook sre.ganeti.drain-node (exit_code=0) for draining ganeti node ganeti5007.eqsin.wmnet	[production]
10:42	<jmm@cumin2002>	END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host ganeti5007.eqsin.wmnet	[production]
10:41	<elukey@deploy1002>	helmfile [ml-staging-codfw] 'sync' command on namespace 'ores-legacy' for release 'main' .	[production]
10:38	<fabfur@cumin1001>	END (FAIL) - Cookbook sre.cdn.roll-upgrade-haproxy (exit_code=1) rolling upgrade of HAProxy on A:cp-text_codfw	[production]
10:35	<fabfur@cumin1001>	START - Cookbook sre.cdn.roll-upgrade-haproxy rolling upgrade of HAProxy on A:cp-text_codfw	[production]
10:34	<jmm@cumin2002>	START - Cookbook sre.hosts.reboot-single for host ganeti5007.eqsin.wmnet	[production]
10:34	<fabfur@cumin1001>	START - Cookbook sre.cdn.roll-upgrade-haproxy rolling upgrade of HAProxy on A:cp-upload_codfw	[production]
10:31	<elukey@deploy1002>	helmfile [ml-staging-codfw] 'sync' command on namespace 'ores-legacy' for release 'main' .	[production]
10:29	<elukey@cumin1001>	START - Cookbook sre.cassandra.roll-restart for nodes matching A:ml-cache-eqiad: Roll restart to pick up Java 11 - elukey@cumin1001	[production]
10:28	<jmm@cumin2002>	START - Cookbook sre.ganeti.drain-node for draining ganeti node ganeti5007.eqsin.wmnet	[production]
10:21	<hnowlan>	disabling puppet on A:cp-text for testing 933508	[production]
10:20	<hnowlan@puppetmaster1001>	conftool action : set/pooled=no; selector: service=ats-be,name=cp2037.codfw.wmnet	[production]
10:11	<vgutierrez>	repool cp4037	[production]
10:05	<elukey@deploy1002>	helmfile [ml-serve-eqiad] DONE helmfile.d/admin 'sync'.	[production]
10:02	<elukey@deploy1002>	helmfile [ml-serve-eqiad] START helmfile.d/admin 'sync'.	[production]
10:01	<elukey@deploy1002>	helmfile [ml-serve-codfw] DONE helmfile.d/admin 'sync'.	[production]
09:57	<elukey@deploy1002>	helmfile [ml-serve-codfw] START helmfile.d/admin 'sync'.	[production]
09:57	<elukey@deploy1002>	helmfile [ml-staging-codfw] DONE helmfile.d/admin 'sync'.	[production]
09:55	<elukey@deploy1002>	helmfile [ml-staging-codfw] START helmfile.d/admin 'sync'.	[production]
09:46	<btullis@cumin1001>	END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 7 days, 0:00:00 on 8 hosts with reason: Decommissioning	[production]
09:46	<btullis@cumin1001>	START - Cookbook sre.hosts.downtime for 7 days, 0:00:00 on 8 hosts with reason: Decommissioning	[production]
09:29	<jmm@cumin2002>	END (PASS) - Cookbook sre.ganeti.drain-node (exit_code=0) for draining ganeti node ganeti4005.ulsfo.wmnet	[production]
09:29	<jmm@cumin2002>	END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host ganeti4005.ulsfo.wmnet	[production]
09:19	<jmm@cumin2002>	START - Cookbook sre.hosts.reboot-single for host ganeti4005.ulsfo.wmnet	[production]
09:09	<jmm@cumin2002>	START - Cookbook sre.ganeti.drain-node for draining ganeti node ganeti4005.ulsfo.wmnet	[production]
09:09	<vgutierrez>	depool cp4037 for some ATS tests	[production]
09:08	<moritzm>	failover ganeti master in codfw to ganeti4008	[production]
09:06	<jmm@cumin2002>	END (PASS) - Cookbook sre.ganeti.drain-node (exit_code=0) for draining ganeti node ganeti4008.ulsfo.wmnet	[production]
09:06	<jmm@cumin2002>	END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host ganeti4008.ulsfo.wmnet	[production]
08:59	<jmm@cumin2002>	START - Cookbook sre.hosts.reboot-single for host ganeti4008.ulsfo.wmnet	[production]
08:40	<btullis@deploy1002>	helmfile [staging] DONE helmfile.d/services/datahub: sync on main	[production]
08:28	<btullis@deploy1002>	helmfile [staging] START helmfile.d/services/datahub: apply on main	[production]
08:24	<jmm@cumin2002>	START - Cookbook sre.ganeti.drain-node for draining ganeti node ganeti4008.ulsfo.wmnet	[production]
08:23	<jmm@cumin2002>	END (PASS) - Cookbook sre.ganeti.drain-node (exit_code=0) for draining ganeti node ganeti4007.ulsfo.wmnet	[production]
08:23	<jmm@cumin2002>	END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host ganeti4007.ulsfo.wmnet	[production]