production SAL

7301-7350 of 10000 results (77ms)

2023-07-07 §
12:17	<hashar>	Re-enabled zuul-merger on contint2001 and removed the Icinga maintenance window	[production]
12:02	<aborrero@cumin1001>	END (PASS) - Cookbook sre.dns.netbox (exit_code=0)	[production]
12:02	<aborrero@cumin1001>	END (PASS) - Cookbook sre.puppet.sync-netbox-hiera (exit_code=0) generate netbox hiera data: "Triggered by cookbooks.sre.dns.netbox: wikimediacloud - aborrero@cumin1001"	[production]
12:01	<aborrero@cumin1001>	START - Cookbook sre.puppet.sync-netbox-hiera generate netbox hiera data: "Triggered by cookbooks.sre.dns.netbox: wikimediacloud - aborrero@cumin1001"	[production]
11:58	<aborrero@cumin1001>	START - Cookbook sre.dns.netbox	[production]
11:48	<aborrero@cumin1001>	END (PASS) - Cookbook sre.dns.netbox (exit_code=0)	[production]
11:48	<aborrero@cumin1001>	END (PASS) - Cookbook sre.puppet.sync-netbox-hiera (exit_code=0) generate netbox hiera data: "Triggered by cookbooks.sre.dns.netbox: wikimediacloud - aborrero@cumin1001"	[production]
11:47	<aborrero@cumin1001>	START - Cookbook sre.puppet.sync-netbox-hiera generate netbox hiera data: "Triggered by cookbooks.sre.dns.netbox: wikimediacloud - aborrero@cumin1001"	[production]
11:45	<aborrero@cumin1001>	START - Cookbook sre.dns.netbox	[production]
11:42	<hashar>	Enabled zuul-merger contint1002, disabled it on contint2001 and marked that host as under maintenance in Icinga for the next two hours	[production]
11:27	<hashar>	Stopped zuul-merger contint1002	[production]
11:17	<aborrero@cumin1001>	START - Cookbook sre.dns.netbox	[production]
11:05	<aborrero@cumin1001>	END (PASS) - Cookbook sre.dns.netbox (exit_code=0)	[production]
11:05	<aborrero@cumin1001>	END (PASS) - Cookbook sre.puppet.sync-netbox-hiera (exit_code=0) generate netbox hiera data: "Triggered by cookbooks.sre.dns.netbox: wikimediacloud - aborrero@cumin1001"	[production]
11:04	<aborrero@cumin1001>	START - Cookbook sre.puppet.sync-netbox-hiera generate netbox hiera data: "Triggered by cookbooks.sre.dns.netbox: wikimediacloud - aborrero@cumin1001"	[production]
11:02	<aborrero@cumin1001>	START - Cookbook sre.dns.netbox	[production]
10:13	<moritzm>	rebooting puppetdb1003	[production]
10:09	<moritzm>	rebooting puppetserver1001	[production]
10:06	<jmm@cumin2002>	END (FAIL) - Cookbook sre.hosts.reboot-single (exit_code=1) for host puppetdb2003.codfw.wmnet	[production]
10:05	<moritzm>	rebooting puppetserver2001	[production]
10:05	<jiji@deploy1002>	helmfile [staging] DONE helmfile.d/services/ipoid: apply	[production]
10:03	<jiji@deploy1002>	helmfile [staging] START helmfile.d/services/ipoid: apply	[production]
09:59	<jmm@cumin2002>	END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host netflow1002.eqiad.wmnet	[production]
09:55	<jmm@cumin2002>	START - Cookbook sre.hosts.reboot-single for host puppetdb2003.codfw.wmnet	[production]
09:55	<jmm@cumin2002>	START - Cookbook sre.hosts.reboot-single for host netflow1002.eqiad.wmnet	[production]
09:52	<jmm@cumin2002>	END (FAIL) - Cookbook sre.hosts.reboot-single (exit_code=1) for host debmonitor2003.codfw.wmnet	[production]
09:52	<jmm@cumin2002>	END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host netflow2003.codfw.wmnet	[production]
09:46	<jmm@cumin2002>	START - Cookbook sre.hosts.reboot-single for host netflow2003.codfw.wmnet	[production]
09:46	<jmm@cumin2002>	START - Cookbook sre.hosts.reboot-single for host debmonitor2003.codfw.wmnet	[production]
09:45	<stevemunene@cumin1001>	END (FAIL) - Cookbook sre.hadoop.roll-restart-masters (exit_code=99) restart masters for Hadoop analytics cluster: Restart of jvm daemons.	[production]
09:39	<jmm@cumin2002>	END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host debmonitor1003.eqiad.wmnet	[production]
09:37	<jmm@cumin2002>	END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host netflow6001.drmrs.wmnet	[production]
09:35	<jmm@cumin2002>	START - Cookbook sre.hosts.reboot-single for host debmonitor1003.eqiad.wmnet	[production]
09:34	<jmm@cumin2002>	END (ERROR) - Cookbook sre.hosts.reboot-single (exit_code=97) for host lists1003.wikimedia.org	[production]
09:33	<jmm@cumin2002>	START - Cookbook sre.hosts.reboot-single for host netflow6001.drmrs.wmnet	[production]
09:29	<jmm@cumin2002>	END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host netflow6001.drmrs.wmnet	[production]
09:29	<stevemunene@cumin1001>	START - Cookbook sre.hadoop.roll-restart-masters restart masters for Hadoop analytics cluster: Restart of jvm daemons.	[production]
09:26	<jmm@cumin2002>	START - Cookbook sre.hosts.reboot-single for host netflow6001.drmrs.wmnet	[production]
09:24	<jmm@cumin2002>	END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host netflow3002.esams.wmnet	[production]
09:24	<jmm@cumin2002>	START - Cookbook sre.hosts.reboot-single for host lists1003.wikimedia.org	[production]
09:20	<jmm@cumin2002>	END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host people1004.eqiad.wmnet	[production]
09:19	<jmm@cumin2002>	START - Cookbook sre.hosts.reboot-single for host people1004.eqiad.wmnet	[production]
09:19	<jmm@cumin2002>	START - Cookbook sre.hosts.reboot-single for host netflow3002.esams.wmnet	[production]
09:18	<jmm@cumin2002>	END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host netflow5002.eqsin.wmnet	[production]
09:17	<jmm@cumin2002>	END (PASS) - Cookbook sre.hosts.reboot-single (exit_code=0) for host people2003.codfw.wmnet	[production]
09:13	<jmm@cumin2002>	START - Cookbook sre.hosts.reboot-single for host people2003.codfw.wmnet	[production]
09:12	<jmm@cumin2002>	START - Cookbook sre.hosts.reboot-single for host netflow5002.eqsin.wmnet	[production]
08:53	<btullis@deploy1002>	helmfile [staging] DONE helmfile.d/services/datahub: sync on main	[production]
08:50	<btullis@deploy1002>	helmfile [staging] START helmfile.d/services/datahub: apply on main	[production]
08:48	<moritzm>	installing bookworm kernel updates	[production]