001    package org.apache.torque.engine.sql;
002    
003    /*
004     * Licensed to the Apache Software Foundation (ASF) under one
005     * or more contributor license agreements.  See the NOTICE file
006     * distributed with this work for additional information
007     * regarding copyright ownership.  The ASF licenses this file
008     * to you under the Apache License, Version 2.0 (the
009     * "License"); you may not use this file except in compliance
010     * with the License.  You may obtain a copy of the License at
011     *
012     *   http://www.apache.org/licenses/LICENSE-2.0
013     *
014     * Unless required by applicable law or agreed to in writing,
015     * software distributed under the License is distributed on an
016     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017     * KIND, either express or implied.  See the License for the
018     * specific language governing permissions and limitations
019     * under the License.
020     */
021    
022    import java.io.IOException;
023    import java.io.Reader;
024    import java.util.List;
025    import java.util.ArrayList;
026    
027    /**
028     * A simple Scanner implementation that scans an
029     * sql file into usable tokens.  Used by SQLToAppData.
030     *
031     * @author <a href="mailto:leon@opticode.co.za">Leon Messerschmidt</a>
032     * @author <a href="mailto:jon@latchkey.com">Jon S. Stevens</a>
033     * @author <a href="mailto:andyhot@di.uoa.gr">Andreas Andreou</a>
034     * @version $Id: SQLScanner.java,v 1.1 2007-10-21 07:57:27 abyrne Exp $
035     */
036    public class SQLScanner
037    {
038        /** white spaces */
039        private static final String WHITE = "\f\r\t\n ";
040        /** alphabetic characters */
041        private static final String ALFA
042                = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
043        /** numbers */
044        private static final String NUMER = "0123456789";
045        /** alphanumeric */
046        private static final String ALFANUM = ALFA + NUMER;
047        /** special characters */
048        private static final String SPECIAL = ";(),'";
049        /** comment */
050        private static final char COMMENT_POUND = '#';
051        /** comment */
052        private static final char COMMENT_SLASH = '/';
053        /** comment */
054        private static final char COMMENT_STAR = '*';
055        /** comment */
056        private static final char COMMENT_DASH = '-';
057    
058        /** the input reader */
059        private Reader in;
060        /** character */
061        private int chr;
062        /** token */
063        private String token;
064        /** list of tokens */
065        private List tokens;
066        /** line */
067        private int line;
068        /** column */
069        private int col;
070    
071        /**
072         * Creates a new scanner with no Reader
073         */
074        public SQLScanner()
075        {
076            this(null);
077        }
078    
079        /**
080         * Creates a new scanner with an Input Reader
081         *
082         * @param input the input reader
083         */
084        public SQLScanner(Reader input)
085        {
086            setInput(input);
087        }
088    
089        /**
090         * Set the Input
091         *
092         * @param input the input reader
093         */
094        public void setInput(Reader input)
095        {
096            in = input;
097        }
098    
099    
100        /**
101         * Reads the next character and increments the line and column counters.
102         *
103         * @throws IOException If an I/O error occurs
104         */
105        private void readChar() throws IOException
106        {
107            boolean wasLine = (char) chr == '\r';
108            chr = in.read();
109            if ((char) chr == '\n' || (char) chr == '\r' || (char) chr == '\f')
110            {
111                col = 0;
112                if (!wasLine || (char) chr != '\n')
113                {
114                    line++;
115                }
116            }
117            else
118            {
119                col++;
120            }
121        }
122    
123        /**
124         * Scans an identifier.
125         *
126         * @throws IOException If an I/O error occurs
127         */
128        private void scanIdentifier () throws IOException
129        {
130            token = "";
131            char c = (char) chr;
132            while (chr != -1 && WHITE.indexOf(c) == -1 && SPECIAL.indexOf(c) == -1)
133            {
134                token = token + (char) chr;
135                readChar();
136                c = (char) chr;
137            }
138            int start = col - token.length();
139            tokens.add(new Token(token, line, start));
140        }
141    
142        /**
143         * Scans an identifier which had started with the negative sign.
144         *
145         * @throws IOException If an I/O error occurs
146         */
147        private void scanNegativeIdentifier () throws IOException
148        {
149            token = "-";
150            char c = (char) chr;
151            while (chr != -1 && WHITE.indexOf(c) == -1 && SPECIAL.indexOf(c) == -1)
152            {
153                token = token + (char) chr;
154                readChar();
155                c = (char) chr;
156            }
157            int start = col - token.length();
158            tokens.add(new Token(token, line, start));
159        }
160    
161        /**
162         * Scan the input Reader and returns a list of tokens.
163         *
164         * @return a list of tokens
165         * @throws IOException If an I/O error occurs
166         */
167        public List scan () throws IOException
168        {
169            line = 1;
170            col = 0;
171            boolean inComment = false;
172            boolean inCommentSlashStar = false;
173            boolean inCommentDash = false;
174    
175            boolean inNegative;
176    
177            tokens = new ArrayList();
178            readChar();
179            while (chr != -1)
180            {
181                char c = (char) chr;
182                inNegative = false;
183    
184                if (c == COMMENT_DASH)
185                {
186                    readChar();
187                    if ((char) chr == COMMENT_DASH)
188                    {
189                        inCommentDash = true;
190                    }
191                    else
192                    {
193                        inNegative = true;
194                        c = (char) chr;
195                    }
196                }
197    
198                if (inCommentDash)
199                {
200                    if (c == '\n' || c == '\r')
201                    {
202                        inCommentDash = false;
203                    }
204                    readChar();
205                }
206                else if (c == COMMENT_POUND)
207                {
208                    inComment = true;
209                    readChar();
210                }
211                else if (c == COMMENT_SLASH)
212                {
213                    readChar();
214                    if ((char) chr == COMMENT_STAR)
215                    {
216                        inCommentSlashStar = true;
217                    }
218                }
219                else if (inComment || inCommentSlashStar)
220                {
221                    if (c == '*')
222                    {
223                        readChar();
224                        if ((char) chr == COMMENT_SLASH)
225                        {
226                            inCommentSlashStar = false;
227                        }
228                    }
229                    else if (c == '\n' || c == '\r')
230                    {
231                        inComment = false;
232                    }
233                    readChar();
234                }
235                else if (ALFANUM.indexOf(c) >= 0)
236                {
237                    if (inNegative)
238                    {
239                        scanNegativeIdentifier();
240                    }
241                    else
242                    {
243                        scanIdentifier();
244                    }
245                }
246                else if (SPECIAL.indexOf(c) >= 0)
247                {
248                    tokens.add(new Token("" + c, line, col));
249                    readChar();
250                }
251                else
252                {
253                    readChar();
254                }
255            }
256            return tokens;
257        }
258    }