View Javadoc
1   package org.kuali.common.devops.jenkins.monitor;
2   
3   import static com.fasterxml.jackson.databind.DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES;
4   import static com.google.common.collect.Iterables.filter;
5   import static com.google.common.collect.Lists.newArrayList;
6   import static com.google.common.collect.Maps.newHashMap;
7   import static java.lang.String.format;
8   import static java.lang.System.currentTimeMillis;
9   import static java.lang.System.getProperty;
10  import static java.util.Arrays.asList;
11  import static java.util.Collections.sort;
12  import static org.apache.commons.io.FileUtils.forceDelete;
13  import static org.apache.commons.io.FileUtils.touch;
14  import static org.apache.commons.io.FileUtils.write;
15  import static org.apache.commons.lang.StringUtils.substringBetween;
16  import static org.apache.commons.lang3.StringUtils.isBlank;
17  import static org.apache.commons.lang3.StringUtils.replace;
18  import static org.kuali.common.core.json.jackson.JacksonContext.newDefaultObjectMapper;
19  import static org.kuali.common.util.Encodings.UTF8;
20  import static org.kuali.common.util.Encodings.encodeUTF8;
21  import static org.kuali.common.util.FormatUtils.getTime;
22  import static org.kuali.common.util.base.Exceptions.illegalState;
23  import static org.kuali.common.util.base.Precondition.checkNotBlank;
24  import static org.kuali.common.util.base.Precondition.checkNotNull;
25  import static org.kuali.common.util.encrypt.Encryption.getDefaultEncryptor;
26  import static org.kuali.common.util.file.Files.getCanonicalFile;
27  import static org.kuali.common.util.log.LoggerUtils.logTable;
28  import static org.kuali.common.util.log.Loggers.newLogger;
29  
30  import java.io.File;
31  import java.io.IOException;
32  import java.util.List;
33  import java.util.Map;
34  
35  import org.apache.commons.io.FileUtils;
36  import org.junit.Test;
37  import org.kuali.common.aws.ec2.api.EC2Service;
38  import org.kuali.common.aws.ec2.impl.DefaultEC2Service;
39  import org.kuali.common.aws.model.ImmutableAWSCredentials;
40  import org.kuali.common.core.cli.api.CommandLineService;
41  import org.kuali.common.core.cli.plexus.PlexusCLIService;
42  import org.kuali.common.core.json.api.JsonService;
43  import org.kuali.common.core.json.jackson.JacksonContext;
44  import org.kuali.common.core.json.jackson.JacksonJsonService;
45  import org.kuali.common.devops.jenkins.monitor.model.JenkinsCredentials;
46  import org.kuali.common.devops.jenkins.monitor.model.JenkinsMaster;
47  import org.kuali.common.devops.jenkins.monitor.model.JenkinsNode;
48  import org.kuali.common.devops.jenkins.monitor.model.JenkinsSlave;
49  import org.kuali.common.devops.jenkins.monitor.model.JenkinsSlaveComparator;
50  import org.kuali.common.devops.jenkins.monitor.ses.DefaultEmailService;
51  import org.kuali.common.devops.jenkins.monitor.ses.Email;
52  import org.kuali.common.devops.jenkins.monitor.ses.EmailService;
53  import org.kuali.common.util.LocationUtils;
54  import org.kuali.common.util.encrypt.Encryptor;
55  import org.slf4j.Logger;
56  
57  import com.amazonaws.auth.AWSCredentials;
58  import com.amazonaws.regions.Region;
59  import com.amazonaws.regions.Regions;
60  import com.amazonaws.services.ec2.model.Instance;
61  import com.fasterxml.jackson.databind.ObjectMapper;
62  import com.google.common.base.Predicate;
63  
64  public class NodeMonitorTest {
65  
66  	private static final Logger logger = newLogger();
67  
68  	private static final CommandLineService CLI = new PlexusCLIService();
69  
70  	@Test
71  	public void test() {
72  		boolean dryRun = Boolean.getBoolean("dryRun");
73  		long now = currentTimeMillis();
74  		String hostname = getHostname();
75  		JenkinsMaster master = getJenkinsMaster(hostname);
76  		List<JenkinsNode> slaves = newArrayList(filter(master.getNodes(), BuildSlavePredicate.INSTANCE));
77  		logger.info(format("total nodes --> %s", master.getNodes().size()));
78  		logger.info(format("total slaves -> %s", slaves.size()));
79  		List<JenkinsSlave> elements = getJenkinsSlaves(slaves);
80  		sort(elements, JenkinsSlaveComparator.INSTANCE);
81  		List<String> columns = asList("node", "jenkins", "amazon", "age");
82  		List<Object[]> rows = newArrayList();
83  		for (JenkinsSlave slave : elements) {
84  			String id = slave.getInstanceId();
85  			String j = slave.isJenkinsOnline() ? "online" : "offline";
86  			String a = slave.isAmazonOnline() ? "online" : "offline";
87  			Object[] row = { id, j, a, getTime(slave.getAge()) };
88  			rows.add(row);
89  		}
90  		logTable(columns, rows);
91  		List<JenkinsSlave> badSlaves = newArrayList(filter(elements, BadSlavePredicate.INSTANCE));
92  		logger.info(format("bad slaves ---> %s", badSlaves.size()));
93  		if (dryRun) {
94  			logger.info(format("dry run, skipping bad slave admin"));
95  		} else {
96  			for (JenkinsSlave badSlave : badSlaves) {
97  				stopJob(hostname, badSlave);
98  				connectNode(hostname, badSlave);
99  			}
100 			if (!badSlaves.isEmpty()) {
101 				runGroovy(hostname);
102 			}
103 		}
104 		logger.info(format("elapsed -> %s", getTime(currentTimeMillis() - now)));
105 		if (!dryRun && !badSlaves.isEmpty()) {
106 			EmailService service = getEmailService();
107 			Email email = getEmail(badSlaves);
108 			service.send(email);
109 		}
110 	}
111 
112 	private Email getEmail(List<JenkinsSlave> badSlaves) {
113 		Encryptor enc = getDefaultEncryptor();
114 		String from = enc.decrypt("U2FsdGVkX18ooea503uMsSTy2OMsXLCJHUp3vqu1ixxibe2yQM5AoyCYa7+KIGLH");
115 		List<String> to = asList(from);
116 		List<String> list = newArrayList();
117 		for (JenkinsSlave badSlave : badSlaves) {
118 			list.add(badSlave.getDisplayName());
119 		}
120 		String master = checkedProperty("JENKINS_MASTER");
121 		String subject = format("[%s] -> disconnected slaves", master);
122 		StringBuilder sb = new StringBuilder();
123 		sb.append("<table border=1 cellpadding=0 cellspacing=0>\n");
124 		String buildUrl = checkedProperty("BUILD_URL");
125 		sb.append(" <tr><td style='padding:3px;' align=right>build url:</td><td style='padding:3px;'>" + buildUrl + "</td></tr>\n");
126 		for (JenkinsSlave badSlave : badSlaves) {
127 			sb.append(" <tr><td style='padding:3px;'>reconnected:</td><td style='padding:3px;'>" + badSlave.getDisplayName() + "</td></tr>\n");
128 		}
129 		sb.append("</table>\n");
130 		String body = sb.toString();
131 		return Email.builder().withSubject(subject).withBody(body).withFrom(from).withTo(to).withHtml(true).build();
132 	}
133 
134 	private String checkedProperty(String key) {
135 		String sys = System.getProperty(key);
136 		if (!isBlank(sys)) {
137 			return sys;
138 		} else {
139 			return checkNotBlank(System.getenv(key), key);
140 		}
141 	}
142 
143 	private static void runGroovy(String hostname) {
144 		File cli = getCanonicalFile("/var/lib/tomcat7/webapps/ROOT/WEB-INF/jenkins-cli.jar");
145 		String groovy = LocationUtils.toString("classpath:org/kuali/common/kuali-devops/ubuntu/12.04/jenkins/scmTriggerThreadCleaner.groovy", UTF8);
146 		File groovyFile = getCanonicalFile("./target/jenkins/interrupt.groovy");
147 		try {
148 			write(groovyFile, groovy);
149 		} catch (IOException e) {
150 			throw illegalState(e);
151 		}
152 		String pem = "/home/tomcat7/.ssh/jcaddel.pem";
153 		List<String> args = asList("-jar", cli.getPath(), "-s", "https://" + hostname + "/", "-noCertificateCheck", "-i", pem, "groovy", groovyFile.getPath());
154 		logger.info(format("cleanup scm polling threads"));
155 		logger.info(format("executing groovy -> %s", groovyFile));
156 		CLI.execute("java", args);
157 		logger.info(format("groovy executed  -> %s", groovyFile));
158 	}
159 
160 	private static void connectNode(String hostname, JenkinsSlave slave) {
161 		logger.info(format("connect node -> %s", slave.getDisplayName()));
162 		List<String> args = asList("-p", "45358", format("jcaddel@%s", hostname), "connect-node", '"' + slave.getDisplayName() + '"', "-f");
163 		CLI.execute("ssh", args);
164 	}
165 
166 	private static void stopJob(String hostname, JenkinsSlave slave) {
167 		JenkinsCredentials creds = getJcaddel();
168 		String username = creds.getUsername();
169 		String password = creds.getApiToken();
170 		String url = format("https://%s:%s@%s/computer/%s/executors/0/stop", username, password, hostname, urlencode(slave.getDisplayName()));
171 		logger.info(format("stop job -> %s", slave.getDisplayName()));
172 		CLI.execute("curl", asList("--fail", "--silent", "--insecure", "--request", "POST", url));
173 	}
174 
175 	private List<JenkinsSlave> getJenkinsSlaves(List<JenkinsNode> slaves) {
176 		String region = getRegion();
177 		AWSCredentials creds = getFoundation();
178 		EC2Service ec2 = new DefaultEC2Service(creds, region);
179 		List<String> instanceIds = newArrayList();
180 		for (JenkinsNode slave : slaves) {
181 			String instanceId = substringBetween(slave.getDisplayName(), "(", ")");
182 			instanceIds.add(instanceId);
183 		}
184 		List<Instance> instances = ec2.getInstances(instanceIds);
185 		Map<String, Instance> map = newHashMap();
186 		for (Instance instance : instances) {
187 			map.put(instance.getInstanceId(), instance);
188 		}
189 		List<JenkinsSlave> list = newArrayList();
190 		long now = currentTimeMillis();
191 		for (JenkinsNode slave : slaves) {
192 			String instanceId = substringBetween(slave.getDisplayName(), "(", ")");
193 			Instance instance = checkNotNull(map.get(instanceId), "instanceId");
194 			boolean jenkinsOnline = !slave.isOffline();
195 			boolean amazonOnline = ec2.isOnline(instanceId);
196 			long age = now - instance.getLaunchTime().getTime();
197 			String displayName = slave.getDisplayName();
198 			JenkinsSlave element = JenkinsSlave.builder().withDisplayName(displayName).withAge(age).withAmazonOnline(amazonOnline).withJenkinsOnline(jenkinsOnline)
199 					.withInstanceId(instanceId).build();
200 			list.add(element);
201 		}
202 		return list;
203 	}
204 
205 	private EmailService getEmailService() {
206 		AWSCredentials creds = getRice();
207 		Region region = Region.getRegion(Regions.US_EAST_1);
208 		return DefaultEmailService.builder().withCredentials(creds).withRegion(region).build();
209 	}
210 
211 	private String getRegion() {
212 		String stack = getProperty("stack", "test");
213 		if (stack.equals("prod")) {
214 			return "us-east-1";
215 		} else {
216 			return "us-west-1";
217 		}
218 	}
219 
220 	private String getHostname() {
221 		String stack = getProperty("stack", "test");
222 		if (stack.equals("prod")) {
223 			return "ci.kuali.org";
224 		} else {
225 			return "testci.kuali.org";
226 		}
227 	}
228 
229 	private static JenkinsMaster getJenkinsMaster(String hostname) {
230 		File file = deleteCreate(getCanonicalFile("./target/jenkins/master.json"));
231 		String url = "https://" + hostname + "/computer/api/json";
232 		logger.info(format("query  -> %s", url));
233 		logger.info(format("create -> %s", file));
234 		CLI.execute("curl", asList("--silent", "--fail", "--insecure", "--output", file.getPath(), url));
235 		String json = readFileToString(file);
236 		ObjectMapper mapper = newDefaultObjectMapper();
237 		mapper.configure(FAIL_ON_UNKNOWN_PROPERTIES, false);
238 		JacksonContext context = JacksonContext.builder().withMapper(mapper).build();
239 		JsonService service = new JacksonJsonService(context);
240 		return service.readString(json, JenkinsMaster.class);
241 	}
242 
243 	private static String readFileToString(File file) {
244 		try {
245 			return FileUtils.readFileToString(file);
246 		} catch (IOException e) {
247 			throw illegalState(e);
248 		}
249 	}
250 
251 	private static File deleteCreate(File file) {
252 		try {
253 			if (file.exists()) {
254 				forceDelete(file);
255 			}
256 			touch(file);
257 			return file;
258 		} catch (IOException e) {
259 			throw illegalState(e);
260 		}
261 	}
262 
263 	private static JenkinsCredentials getJcaddel() {
264 		Encryptor enc = getDefaultEncryptor();
265 		String username = enc.decrypt("U2FsdGVkX182GPpXybAxPqEuhrNntHCXyb1MkleT3oU=");
266 		String apiToken = enc.decrypt("U2FsdGVkX1+p+/EH9W8R19URgB25+aCzFeycX+T31gBFeRXkOvtyDAMNbiM6QC5/tCqHbegwFriKuU57pc65mA==");
267 		return JenkinsCredentials.builder().withUsername(username).withApiToken(apiToken).build();
268 	}
269 
270 	private static AWSCredentials getFoundation() {
271 		Encryptor enc = getDefaultEncryptor();
272 		String accessKey = enc.decrypt("U2FsdGVkX19A2e6dN/ipVfb/9n0DROCPIrLK6H8PvvPmt0h6cBqccGaJW0NSoX3S");
273 		String secretKey = enc.decrypt("U2FsdGVkX19Y9SZ5GAU82/X5Z0xZdeQf7DFuVDW07R9lfyHK4VaOj5R7pviRBKmIyn7jrVT2lv8Edeu7098k1A==");
274 		return new ImmutableAWSCredentials(accessKey, secretKey);
275 	}
276 
277 	private static AWSCredentials getRice() {
278 		Encryptor enc = getDefaultEncryptor();
279 		String accessKey = enc.decrypt("U2FsdGVkX1+Y3FZqrwTo97GtYwtrr5F7dSJnIMiLfy6pW2AbaKduY8Qi+B7KcL6y");
280 		String secretKey = enc.decrypt("U2FsdGVkX18coWWxeff1GdsD4bWbCBuWICgftVFVZbCFH3lboRLz5fWe20alukalx94q51rATj83xEeNbVlKCg==");
281 		return new ImmutableAWSCredentials(accessKey, secretKey);
282 	}
283 
284 	private enum BuildSlavePredicate implements Predicate<JenkinsNode> {
285 		INSTANCE;
286 
287 		@Override
288 		public boolean apply(JenkinsNode node) {
289 			return !"master".equals(node.getDisplayName());
290 		}
291 
292 	}
293 
294 	/**
295 	 * Turned off the minimum age check since Jenkins always thinks the slave is online quicker than the isOnline() method from EC2Service does
296 	 */
297 	private enum BadSlavePredicate implements Predicate<JenkinsSlave> {
298 		INSTANCE;
299 
300 		@Override
301 		public boolean apply(JenkinsSlave slave) {
302 
303 			// Minimum age of 30 minutes is required before we'll act
304 			// long minAge = getMillis(getProperty("ec2.minAge", "30m"));
305 
306 			// If Amazon thinks the node is online, but Jenkins thinks the node is offline, we have issues
307 			boolean uhoh = slave.isAmazonOnline() && !slave.isJenkinsOnline();
308 
309 			// Make sure the slave was launched at least 30 minutes ago before we do anything to it
310 			// boolean oldEnough = slave.getAge() > minAge;
311 
312 			// Only return true for slaves that are at least 30 minutes old, and where Amazon thinks it is online, but Jenkins thinks it's offline
313 			return uhoh; // && oldEnough;
314 		}
315 
316 	}
317 
318 	// curl can't handle the plus symbol, it requires %20
319 	private static String urlencode(String s) {
320 		return replace(encodeUTF8(s), "+", "%20");
321 	}
322 
323 }