2021-05-25
§
|
18:16 |
<razzi> |
sudo systemctl start all failed units from `systemctl list-units --state=failed` on an-launcher1002 |
[analytics] |
18:14 |
<razzi> |
sudo systemctl start eventlogging_to_druid_navigationtiming_hourly.service |
[analytics] |
18:01 |
<razzi> |
manually edit /etc/hadoop/conf/capacity-scheduler.xml to make queues running and sudo -u yarn kerberos-run-command yarn yarn rmadmin -refreshQueues |
[analytics] |
17:52 |
<razzi> |
sudo -u yarn kerberos-run-command yarn yarn rmadmin -refreshQueues on an-master1001 and an-master1002 |
[analytics] |
17:28 |
<razzi> |
sudo systemctl restart refine_eventlogging_legacy |
[analytics] |
17:28 |
<razzi> |
sudo -u yarn kerberos-run-command yarn yarn rmadmin -refreshQueues to enable submitting jobs once again |
[analytics] |
17:07 |
<razzi> |
re-enabled puppet on an-masters and an-launcher |
[analytics] |
17:04 |
<razzi> |
sudo -u hdfs kerberos-run-command hdfs hdfs dfsadmin -safemode leave |
[analytics] |
17:03 |
<razzi> |
sudo -u hdfs /usr/bin/hdfs haadmin -failover an-master1002-eqiad-wmnet an-master1001-eqiad-wmnet |
[analytics] |
16:43 |
<razzi> |
sudo systemctl restart hadoop-hdfs-namenode on an-master1001 |
[analytics] |
16:38 |
<razzi> |
sudo -u hdfs kerberos-run-command hdfs hdfs dfsadmin -saveNamespace |
[analytics] |
16:35 |
<razzi> |
sudo -u hdfs kerberos-run-command hdfs hdfs dfsadmin -safemode enter |
[analytics] |
16:28 |
<razzi> |
sudo -u hdfs /usr/bin/hdfs haadmin -failover an-master1002-eqiad-wmnet an-master1001-eqiad-wmnet |
[analytics] |
16:23 |
<razzi> |
sudo -u hdfs kerberos-run-command hdfs hdfs dfsadmin -safemode leave |
[analytics] |
16:06 |
<razzi> |
sudo systemctl restart hadoop-hdfs-namenode |
[analytics] |
15:52 |
<razzi> |
checkpoint hdfs with sudo -u hdfs kerberos-run-command hdfs hdfs dfsadmin -saveNamespace |
[analytics] |
15:51 |
<razzi> |
enable safe mode on an-master1001 with sudo -u hdfs kerberos-run-command hdfs hdfs dfsadmin -safemode enter |
[analytics] |
15:36 |
<razzi> |
disable puppet on an-master1001.eqiad.wmnet and an-master1002.eqiad.wmnet again |
[analytics] |
15:35 |
<razzi> |
re-enable puppet on an-masters, run puppet, and sudo -u yarn kerberos-run-command yarn yarn rmadmin -refreshQueues |
[analytics] |
15:32 |
<razzi> |
disable puppet on an-master1001.eqiad.wmnet and an-master1002.eqiad.wmnet |
[analytics] |
14:39 |
<razzi> |
stop puppet on an-launcher and stop hadoop-related timers |
[analytics] |
01:09 |
<razzi> |
sudo -u hdfs /usr/bin/hdfs haadmin -failover an-master1002-eqiad-wmnet an-master1001-eqiad-wmnet |
[analytics] |
01:07 |
<razzi> |
sudo -u hdfs /usr/bin/hdfs haadmin -failover an-master1001-eqiad-wmnet an-master1002-eqiad-wmnet |
[analytics] |
00:34 |
<razzi> |
sudo -u hdfs /usr/bin/hdfs haadmin -failover an-master1001-eqiad-wmnet an-master1002-eqiad-wmnet |
[analytics] |