1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.kuali.rice.krad.uif.util;
17
18 import org.springframework.expression.spel.InternalParseException;
19 import org.springframework.expression.spel.SpelMessage;
20 import org.springframework.expression.spel.SpelParseException;
21 import org.springframework.util.Assert;
22
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.List;
26
27
28
29
30 public class Tokenizer {
31 String expressionString;
32 char[] toProcess;
33 int pos;
34 int max;
35 List<Token> tokens = new ArrayList<Token>();
36
37 protected Tokenizer(String inputdata) {
38 for (int ch = '0'; ch <= '9'; ch++) {
39 flags[ch] |= IS_DIGIT | IS_HEXDIGIT;
40 }
41 for (int ch = 'A'; ch <= 'F'; ch++) {
42 flags[ch] |= IS_HEXDIGIT;
43 }
44 for (int ch = 'a'; ch <= 'f'; ch++) {
45 flags[ch] |= IS_HEXDIGIT;
46 }
47 for (int ch = 'A'; ch <= 'Z'; ch++) {
48 flags[ch] |= IS_ALPHA;
49 }
50 for (int ch = 'a'; ch <= 'z'; ch++) {
51 flags[ch] |= IS_ALPHA;
52 }
53
54 this.expressionString = inputdata;
55 this.toProcess = (inputdata + "\0").toCharArray();
56 this.max = toProcess.length;
57 this.pos = 0;
58 process();
59 }
60
61 public void process() {
62 while (pos < max) {
63 char ch = toProcess[pos];
64 if (isAlphabetic(ch)) {
65 lexIdentifier();
66 } else {
67 switch (ch) {
68 case '+':
69 pushCharToken(TokenKind.PLUS);
70 break;
71 case '_':
72 lexIdentifier();
73 break;
74 case '-':
75 pushCharToken(TokenKind.MINUS);
76 break;
77 case ':':
78 pushCharToken(TokenKind.COLON);
79 break;
80 case '.':
81 pushCharToken(TokenKind.DOT);
82 break;
83 case ',':
84 pushCharToken(TokenKind.COMMA);
85 break;
86 case '*':
87 pushCharToken(TokenKind.STAR);
88 break;
89 case '/':
90 pushCharToken(TokenKind.DIV);
91 break;
92 case '%':
93 pushCharToken(TokenKind.MOD);
94 break;
95 case '(':
96 pushCharToken(TokenKind.LPAREN);
97 break;
98 case ')':
99 pushCharToken(TokenKind.RPAREN);
100 break;
101 case '[':
102 pushCharToken(TokenKind.LSQUARE);
103 break;
104 case '#':
105 pushCharToken(TokenKind.HASH);
106 break;
107 case ']':
108 pushCharToken(TokenKind.RSQUARE);
109 break;
110 case '{':
111 pushCharToken(TokenKind.LCURLY);
112 break;
113 case '}':
114 pushCharToken(TokenKind.RCURLY);
115 break;
116 case '@':
117 pushCharToken(TokenKind.BEAN_REF);
118 break;
119 case '^':
120 if (isTwoCharToken(TokenKind.SELECT_FIRST)) {
121 pushPairToken(TokenKind.SELECT_FIRST);
122 } else {
123 pushCharToken(TokenKind.POWER);
124 }
125 break;
126 case '!':
127 if (isTwoCharToken(TokenKind.NE)) {
128 pushPairToken(TokenKind.NE);
129 } else if (isTwoCharToken(TokenKind.PROJECT)) {
130 pushPairToken(TokenKind.PROJECT);
131 } else {
132 pushCharToken(TokenKind.NOT);
133 }
134 break;
135 case '=':
136 if (isTwoCharToken(TokenKind.EQ)) {
137 pushPairToken(TokenKind.EQ);
138 } else {
139 pushCharToken(TokenKind.ASSIGN);
140 }
141 break;
142 case '?':
143 if (isTwoCharToken(TokenKind.SELECT)) {
144 pushPairToken(TokenKind.SELECT);
145 } else if (isTwoCharToken(TokenKind.ELVIS)) {
146 pushPairToken(TokenKind.ELVIS);
147 } else if (isTwoCharToken(TokenKind.SAFE_NAVI)) {
148 pushPairToken(TokenKind.SAFE_NAVI);
149 } else {
150 pushCharToken(TokenKind.QMARK);
151 }
152 break;
153 case '$':
154 if (isTwoCharToken(TokenKind.SELECT_LAST)) {
155 pushPairToken(TokenKind.SELECT_LAST);
156 } else {
157 lexIdentifier();
158 }
159 break;
160 case '>':
161 if (isTwoCharToken(TokenKind.GE)) {
162 pushPairToken(TokenKind.GE);
163 } else {
164 pushCharToken(TokenKind.GT);
165 }
166 break;
167 case '<':
168 if (isTwoCharToken(TokenKind.LE)) {
169 pushPairToken(TokenKind.LE);
170 } else {
171 pushCharToken(TokenKind.LT);
172 }
173 break;
174 case '0':
175 case '1':
176 case '2':
177 case '3':
178 case '4':
179 case '5':
180 case '6':
181 case '7':
182 case '8':
183 case '9':
184 lexNumericLiteral(ch == '0');
185 break;
186 case ' ':
187 case '\t':
188 case '\r':
189 case '\n':
190
191 pos++;
192 break;
193 case '\'':
194 lexQuotedStringLiteral();
195 break;
196 case '"':
197 lexDoubleQuotedStringLiteral();
198 break;
199 case 0:
200
201 pos++;
202 break;
203 default:
204 throw new IllegalStateException("Cannot handle ("
205 + Integer.valueOf(ch)
206 + ") '"
207 + ch
208 + "', in expression: "
209 + expressionString);
210 }
211 }
212 }
213 }
214
215 public List<Token> getTokens() {
216 return tokens;
217 }
218
219
220 private void lexQuotedStringLiteral() {
221 int start = pos;
222 boolean terminated = false;
223 while (!terminated) {
224 pos++;
225 char ch = toProcess[pos];
226 if (ch == '\'') {
227
228 if (toProcess[pos + 1] == '\'') {
229 pos++;
230 } else {
231 terminated = true;
232 }
233 }
234 if (ch == 0) {
235 throw new InternalParseException(new SpelParseException(expressionString, start,
236 SpelMessage.NON_TERMINATING_QUOTED_STRING));
237 }
238 }
239 pos++;
240 tokens.add(new Token(TokenKind.LITERAL_STRING, subarray(start, pos), start, pos));
241 }
242
243
244 private void lexDoubleQuotedStringLiteral() {
245 int start = pos;
246 boolean terminated = false;
247 while (!terminated) {
248 pos++;
249 char ch = toProcess[pos];
250 if (ch == '"') {
251 terminated = true;
252 }
253 if (ch == 0) {
254 throw new InternalParseException(new SpelParseException(expressionString, start,
255 SpelMessage.NON_TERMINATING_DOUBLE_QUOTED_STRING));
256 }
257 }
258 pos++;
259 tokens.add(new Token(TokenKind.LITERAL_STRING, subarray(start, pos), start, pos));
260 }
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276 private void lexNumericLiteral(boolean firstCharIsZero) {
277 boolean isReal = false;
278 int start = pos;
279 char ch = toProcess[pos + 1];
280 boolean isHex = ch == 'x' || ch == 'X';
281
282
283 if (firstCharIsZero && isHex) {
284 pos = pos + 1;
285 do {
286 pos++;
287 } while (isHexadecimalDigit(toProcess[pos]));
288 if (isChar('L', 'l')) {
289 pushHexIntToken(subarray(start + 2, pos), true, start, pos);
290 pos++;
291 } else {
292 pushHexIntToken(subarray(start + 2, pos), false, start, pos);
293 }
294 return;
295 }
296
297
298
299
300 do {
301 pos++;
302 } while (isDigit(toProcess[pos]));
303
304
305 ch = toProcess[pos];
306 if (ch == '.') {
307 isReal = true;
308
309 do {
310 pos++;
311 } while (isDigit(toProcess[pos]));
312 }
313
314 int endOfNumber = pos;
315
316
317
318
319 if (isChar('L', 'l')) {
320 if (isReal) {
321 throw new InternalParseException(new SpelParseException(expressionString, start,
322 SpelMessage.REAL_CANNOT_BE_LONG));
323 }
324 pushIntToken(subarray(start, endOfNumber), true, start, endOfNumber);
325 pos++;
326 } else if (isExponentChar(toProcess[pos])) {
327 isReal = true;
328 pos++;
329 char possibleSign = toProcess[pos];
330 if (isSign(possibleSign)) {
331 pos++;
332 }
333
334
335 do {
336 pos++;
337 } while (isDigit(toProcess[pos]));
338 boolean isFloat = false;
339 if (isFloatSuffix(toProcess[pos])) {
340 isFloat = true;
341 endOfNumber = ++pos;
342 } else if (isDoubleSuffix(toProcess[pos])) {
343 endOfNumber = ++pos;
344 }
345 pushRealToken(subarray(start, pos), isFloat, start, pos);
346 } else {
347 ch = toProcess[pos];
348 boolean isFloat = false;
349 if (isFloatSuffix(ch)) {
350 isReal = true;
351 isFloat = true;
352 endOfNumber = ++pos;
353 } else if (isDoubleSuffix(ch)) {
354 isReal = true;
355 endOfNumber = ++pos;
356 }
357 if (isReal) {
358 pushRealToken(subarray(start, endOfNumber), isFloat, start, endOfNumber);
359 } else {
360 pushIntToken(subarray(start, endOfNumber), false, start, endOfNumber);
361 }
362 }
363 }
364
365
366 private String[] alternativeOperatorNames = {"DIV", "EQ", "GE", "GT", "LE", "LT", "MOD", "NE", "NOT"};
367
368 private void lexIdentifier() {
369 int start = pos;
370 do {
371 pos++;
372 } while (isIdentifier(toProcess[pos]));
373 char[] subarray = subarray(start, pos);
374
375
376 if ((pos - start) == 2 || (pos - start) == 3) {
377 String asString = new String(subarray).toUpperCase();
378 int idx = Arrays.binarySearch(alternativeOperatorNames, asString);
379 if (idx >= 0) {
380 pushOneCharOrTwoCharToken(TokenKind.valueOf(asString), start);
381 return;
382 }
383 }
384 tokens.add(new Token(TokenKind.IDENTIFIER, subarray, start, pos));
385 }
386
387 private void pushIntToken(char[] data, boolean isLong, int start, int end) {
388 if (isLong) {
389 tokens.add(new Token(TokenKind.LITERAL_LONG, data, start, end));
390 } else {
391 tokens.add(new Token(TokenKind.LITERAL_INT, data, start, end));
392 }
393 }
394
395 private void pushHexIntToken(char[] data, boolean isLong, int start, int end) {
396 if (data.length == 0) {
397 if (isLong) {
398 throw new InternalParseException(new SpelParseException(expressionString, start, SpelMessage.NOT_A_LONG,
399 expressionString.substring(start, end + 1)));
400 } else {
401 throw new InternalParseException(new SpelParseException(expressionString, start,
402 SpelMessage.NOT_AN_INTEGER, expressionString.substring(start, end)));
403 }
404 }
405 if (isLong) {
406 tokens.add(new Token(TokenKind.LITERAL_HEXLONG, data, start, end));
407 } else {
408 tokens.add(new Token(TokenKind.LITERAL_HEXINT, data, start, end));
409 }
410 }
411
412 private void pushRealToken(char[] data, boolean isFloat, int start, int end) {
413 if (isFloat) {
414 tokens.add(new Token(TokenKind.LITERAL_REAL_FLOAT, data, start, end));
415 } else {
416 tokens.add(new Token(TokenKind.LITERAL_REAL, data, start, end));
417 }
418 }
419
420 private char[] subarray(int start, int end) {
421 char[] result = new char[end - start];
422 System.arraycopy(toProcess, start, result, 0, end - start);
423 return result;
424 }
425
426
427
428
429 private boolean isTwoCharToken(TokenKind kind) {
430 Assert.isTrue(kind.tokenChars.length == 2);
431 Assert.isTrue(toProcess[pos] == kind.tokenChars[0]);
432 return toProcess[pos + 1] == kind.tokenChars[1];
433 }
434
435
436
437
438 private void pushCharToken(TokenKind kind) {
439 tokens.add(new Token(kind, pos, pos + 1));
440 pos++;
441 }
442
443
444
445
446 private void pushPairToken(TokenKind kind) {
447 tokens.add(new Token(kind, pos, pos + 2));
448 pos += 2;
449 }
450
451 private void pushOneCharOrTwoCharToken(TokenKind kind, int pos) {
452 tokens.add(new Token(kind, pos, pos + kind.getLength()));
453 }
454
455
456 private boolean isIdentifier(char ch) {
457 return isAlphabetic(ch) || isDigit(ch) || ch == '_' || ch == '$';
458 }
459
460 private boolean isChar(char a, char b) {
461 char ch = toProcess[pos];
462 return ch == a || ch == b;
463 }
464
465 private boolean isExponentChar(char ch) {
466 return ch == 'e' || ch == 'E';
467 }
468
469 private boolean isFloatSuffix(char ch) {
470 return ch == 'f' || ch == 'F';
471 }
472
473 private boolean isDoubleSuffix(char ch) {
474 return ch == 'd' || ch == 'D';
475 }
476
477 private boolean isSign(char ch) {
478 return ch == '+' || ch == '-';
479 }
480
481 private boolean isDigit(char ch) {
482 if (ch > 255) {
483 return false;
484 }
485 return (flags[ch] & IS_DIGIT) != 0;
486 }
487
488 private boolean isAlphabetic(char ch) {
489 if (ch > 255) {
490 return false;
491 }
492 return (flags[ch] & IS_ALPHA) != 0;
493 }
494
495 private boolean isHexadecimalDigit(char ch) {
496 if (ch > 255) {
497 return false;
498 }
499 return (flags[ch] & IS_HEXDIGIT) != 0;
500 }
501
502 private final byte flags[] = new byte[256];
503 private static final byte IS_DIGIT = 0x01;
504 private static final byte IS_HEXDIGIT = 0x02;
505 private static final byte IS_ALPHA = 0x04;
506
507 public class Token {
508 TokenKind kind;
509 String data;
510 int startpos;
511 int endpos;
512
513
514
515
516
517
518
519 public Token(TokenKind tokenKind, int startpos, int endpos) {
520 this.kind = tokenKind;
521 this.startpos = startpos;
522 this.endpos = endpos;
523 }
524
525 Token(TokenKind tokenKind, char[] tokenData, int pos, int endpos) {
526 this(tokenKind, pos, endpos);
527 this.data = new String(tokenData);
528 }
529
530 public TokenKind getKind() {
531 return kind;
532 }
533
534 public String toString() {
535 StringBuilder s = new StringBuilder();
536 s.append("[").append(kind.toString());
537 if (kind.hasPayload()) {
538 s.append(":").append(data);
539 }
540 s.append("]");
541 s.append("(").append(startpos).append(",").append(endpos).append(")");
542 return s.toString();
543 }
544
545 public boolean isIdentifier() {
546 return kind == TokenKind.IDENTIFIER;
547 }
548
549 public boolean isNumericRelationalOperator() {
550 return kind == TokenKind.GT
551 || kind == TokenKind.GE
552 || kind == TokenKind.LT
553 || kind == TokenKind.LE
554 || kind == TokenKind.EQ
555 || kind == TokenKind.NE;
556 }
557
558 public String stringValue() {
559 return data;
560 }
561
562 public Token asInstanceOfToken() {
563 return new Token(TokenKind.INSTANCEOF, startpos, endpos);
564 }
565
566 public Token asMatchesToken() {
567 return new Token(TokenKind.MATCHES, startpos, endpos);
568 }
569
570 public Token asBetweenToken() {
571 return new Token(TokenKind.BETWEEN, startpos, endpos);
572 }
573 }
574
575 public enum TokenKind {
576
577 LITERAL_INT, LITERAL_LONG, LITERAL_HEXINT, LITERAL_HEXLONG, LITERAL_STRING, LITERAL_REAL, LITERAL_REAL_FLOAT,
578 LPAREN("("), RPAREN(")"), COMMA(","), IDENTIFIER,
579 COLON(":"), HASH("#"), RSQUARE("]"), LSQUARE("["),
580 LCURLY("{"), RCURLY("}"),
581 DOT("."), PLUS("+"), STAR("*"), DIV("/"), NOT("!"), MINUS("-"), SELECT_FIRST("^["), SELECT_LAST("$["), QMARK(
582 "?"), PROJECT("!["),
583 GE(">="), GT(">"), LE("<="), LT("<"), EQ("=="), NE("!="), ASSIGN("="), INSTANCEOF("instanceof"), MATCHES(
584 "matches"), BETWEEN("between"),
585 SELECT("?["), MOD("%"), POWER("^"),
586 ELVIS("?:"), SAFE_NAVI("?."), BEAN_REF("@");
587
588 char[] tokenChars;
589 private boolean hasPayload;
590
591 private TokenKind(String tokenString) {
592 tokenChars = tokenString.toCharArray();
593 hasPayload = tokenChars.length == 0;
594 }
595
596 private TokenKind() {
597 this("");
598 }
599
600 public String toString() {
601 return this.name() + (tokenChars.length != 0 ? "(" + new String(tokenChars) + ")" : "");
602 }
603
604 public boolean hasPayload() {
605 return hasPayload;
606 }
607
608 public int getLength() {
609 return tokenChars.length;
610 }
611 }
612 }