Skip to content

Basic SQL Parsing

This tutorial covers the essentials of parsing various SQL statements and extracting key information using General SQL Parser.

🎯 Objectives

  • Parse SELECT, INSERT, UPDATE, DELETE statements
  • Extract table names, column names, and conditions
  • Work with complex queries involving JOINs and subqueries
  • Handle different SQL statement types
  • Apply best practices for SQL parsing

📋 Prerequisites

⏱️ Time Required

Approximately 45 minutes

📝 Step-by-Step Guide

1. Parsing SELECT Statements

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
// com.example.gsptutorial.BasicParsing
import gudusoft.gsqlparser.TGSqlParser;
import gudusoft.gsqlparser.EDbVendor;
import gudusoft.gsqlparser.stmt.TSelectSqlStatement;
import gudusoft.gsqlparser.nodes.*;
import gudusoft.gsqlparser.ETableSource;

public class BasicSelectParsing {
    public static void main(String[] args) {
        TGSqlParser parser = new TGSqlParser(EDbVendor.dbvoracle);
        parser.setSqltext("SELECT e.name, d.department_name " +
                         "FROM employees e " +
                         "INNER JOIN departments d ON e.dept_id = d.id " +
                         "WHERE e.salary > 50000 AND d.location = 'New York'");

        if (parser.parse() == 0) {
            TSelectSqlStatement selectStmt = (TSelectSqlStatement) parser.getSqlstatements().get(0);

            // Extract columns from SELECT list
            System.out.println("=== SELECT Columns ===");
            TResultColumnList columns = selectStmt.getResultColumnList();
            for (int i = 0; i < columns.size(); i++) {
                TResultColumn column = columns.getResultColumn(i);
                System.out.println("  - " + column.getExpr().toString());

                // Show alias if present
                if (column.getAliasClause() != null) {
                    System.out.println("    AS " + column.getAliasClause().toString());
                }
            }

            // Extract tables from FROM clause
            System.out.println("\n=== FROM Clause Tables ===");
            TFromClause fromClause = selectStmt.getFromClause();
            if (fromClause != null) {
                for (TTable table : fromClause.getRelations()) {
                    System.out.println("  - " + table.getFullName());

                    // Show table alias
                    if (table.getAliasClause() != null) {
                        System.out.println("    Alias: " + table.getAliasName());
                    }

                    // Show table type
                    System.out.println("    Type: " + table.getTableType());

                    // Handle JOIN tables
                    if (table.getTableType() == ETableSource.join && table.getJoinExpr() != null) {
                        analyzeJoinExpression(table.getJoinExpr());
                    }
                }
            }

            // Extract WHERE clause conditions
            System.out.println("\n=== WHERE Clause ===");
            TWhereClause whereClause = selectStmt.getWhereClause();
            if (whereClause != null) {
                System.out.println("  " + whereClause.getCondition().toString());
            } else {
                System.out.println("  No WHERE clause");
            }
        } else {
            System.err.println("Parse failed: " + parser.getErrormessage());
        }
    }

    // Helper method to analyze JOIN expressions
    private static void analyzeJoinExpression(TJoinExpr joinExpr) {
        System.out.println("    JOIN Details:");
        System.out.println("      Type: " + joinExpr.getJointype());

        if (joinExpr.getLeftTable() != null) {
            System.out.println("      Left: " + joinExpr.getLeftTable().getFullName());
        }
        if (joinExpr.getRightTable() != null) {
            System.out.println("      Right: " + joinExpr.getRightTable().getFullName());
        }
        if (joinExpr.getOnCondition() != null) {
            System.out.println("      ON: " + joinExpr.getOnCondition().toString());
        }
        if (joinExpr.getUsingColumns() != null) {
            System.out.println("      USING: " + joinExpr.getUsingColumns().toString());
        }
    }
}

2. Advanced JOIN Analysis

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
// com.example.gsptutorial.JoinAnalysis
import gudusoft.gsqlparser.TGSqlParser;
import gudusoft.gsqlparser.EDbVendor;
import gudusoft.gsqlparser.stmt.TSelectSqlStatement;
import gudusoft.gsqlparser.nodes.*;
import gudusoft.gsqlparser.ETableSource;
import java.util.*;

public class JoinAnalysis {
    public static void main(String[] args) {
        TGSqlParser parser = new TGSqlParser(EDbVendor.dbvoracle);
        parser.setSqltext("SELECT u.name, o.total, p.product_name, a.address, c.country_name " +
                         "FROM users u " +
                         "LEFT JOIN (orders o INNER JOIN order_items oi ON o.id = oi.order_id) ON u.id = o.user_id " +
                         "INNER JOIN products p ON oi.product_id = p.id " +
                         "RIGHT JOIN (addresses a LEFT JOIN countries c ON a.country_code = c.code) ON u.id = a.user_id");

        if (parser.parse() == 0) {
            TSelectSqlStatement selectStmt = (TSelectSqlStatement) parser.getSqlstatements().get(0);

            System.out.println("=== Complex JOIN Analysis ===");
            analyzeFromClause(selectStmt.getFromClause());
        }
    }

    /**
     * JDK 1.8 compatible string repeat method.
     */
    private static String repeatString(String str, int times) {
        if (times <= 0) return "";
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < times; i++) {
            sb.append(str);
        }
        return sb.toString();
    }

    public static void analyzeFromClause(TFromClause fromClause) {
        if (fromClause == null) {
            System.out.println("No FROM clause");
            return;
        }

        System.out.println("Processing FROM clause relations:");
        for (TTable table : fromClause.getRelations()) {
            System.out.println("  Top-level relation: " + getTableName(table) + 
                              " (Type: " + table.getTableType() + ")");

            if (table.getTableType() == ETableSource.join && table.getJoinExpr() != null) {
                System.out.println("\n  --- Starting JOIN Analysis for a Root Join Expression ---");
                // Build the tree from the root join expression
                JoinInfo rootJoin = extractJoinDetailsRecursively(table.getJoinExpr(), 0);
                List<JoinInfo> rootJoinsList = new ArrayList<>();
                if (rootJoin != null) {
                    rootJoinsList.add(rootJoin);
                }
                printJoinSummary(rootJoinsList); // printJoinSummary now expects a list of root JoinInfo objects
                System.out.println("  --- End JOIN Analysis for this Root Join Expression ---\n");
            } else if (table.getTableType() == ETableSource.subquery && table.getSubquery() != null) {
                System.out.println("    Contains a subquery. Analyzing subquery's FROM clause:");
                analyzeFromClause(table.getSubquery().getFromClause()); // Recursive call for subqueries
            }
        }
    }

    /**
     * Recursively extracts and analyzes join information from a TJoinExpr.
     * This version attempts to build a tree structure.
     */
    private static JoinInfo extractJoinDetailsRecursively(TJoinExpr currentJoinExpr, int depth) {
        if (currentJoinExpr == null) {
            return null;
        }

        JoinInfo currentJoinInfo = new JoinInfo();
        currentJoinInfo.joinType = currentJoinExpr.getJointype().toString();
        currentJoinInfo.nestingLevel = depth;
        currentJoinInfo.condition = (currentJoinExpr.getOnCondition() != null) 
                                   ? currentJoinExpr.getOnCondition().toString() 
                                   : (currentJoinExpr.getUsingColumns() != null 
                                      ? "USING(" + currentJoinExpr.getUsingColumns().toString() + ")" 
                                      : "NATURAL");

        TTable leftTableNode = currentJoinExpr.getLeftTable();
        if (leftTableNode != null) {
            currentJoinInfo.leftTable = getJoinParticipantName(leftTableNode); // Use descriptive name
            if (leftTableNode.getTableType() == ETableSource.join && leftTableNode.getJoinExpr() != null) {
                // Overwrite leftTable with a placeholder if it's a join, the detail is in leftNestedJoin
                 currentJoinInfo.leftTable = "[Nested JOIN Structure]";
                currentJoinInfo.leftNestedJoin = extractJoinDetailsRecursively(leftTableNode.getJoinExpr(), depth + 1);
            }
        } else {
            currentJoinInfo.leftTable = "[ERROR: Left Table Missing]";
        }

        TTable rightTableNode = currentJoinExpr.getRightTable();
        if (rightTableNode != null) {
            currentJoinInfo.rightTable = getJoinParticipantName(rightTableNode); // Use descriptive name
            if (rightTableNode.getTableType() == ETableSource.join && rightTableNode.getJoinExpr() != null) {
                // Overwrite rightTable with a placeholder if it's a join
                 currentJoinInfo.rightTable = "[Nested JOIN Structure]";
                currentJoinInfo.rightNestedJoin = extractJoinDetailsRecursively(rightTableNode.getJoinExpr(), depth + 1);
            }
        } else {
            currentJoinInfo.rightTable = "[ERROR: Right Table Missing]";
        }
        return currentJoinInfo;
    }

    // Helper class to store join information
    static class JoinInfo {
        String joinType;
        String leftTable; // Description of the left table/join
        String rightTable; // Description of the right table/join
        String condition;
        int nestingLevel;

        JoinInfo leftNestedJoin;  // Holds the JoinInfo if the left operand is a join
        JoinInfo rightNestedJoin; // Holds the JoinInfo if the right operand is a join

        public JoinInfo() {
            // Fields will be set by the parser
        }
    }

    private static void printJoinSummary(List<JoinInfo> topLevelJoins) {
        System.out.println("    Detailed JOIN Operations (AST Structure):");
        if (topLevelJoins.isEmpty()) {
            System.out.println("      No explicit JOIN operations found in this branch.");
            return;
        }
        for (JoinInfo join : topLevelJoins) {
            // Assuming the list contains only root joins of distinct join trees
             printJoinTree(join, "", true); // Start with isLast = true for the root
        }

        // Optional: Analyze join type distribution
        Map<String, Integer> joinTypeCounts = new HashMap<>();
        List<JoinInfo> allJoinsForCounts = new ArrayList<>();
        collectAllJoins(topLevelJoins, allJoinsForCounts);

        for (JoinInfo join : allJoinsForCounts) {
            joinTypeCounts.put(join.joinType, joinTypeCounts.getOrDefault(join.joinType, 0) + 1);
        }

        System.out.println("\\n    Join type distribution for this branch:");
        for (Map.Entry<String, Integer> entry : joinTypeCounts.entrySet()) {
            System.out.println("      " + entry.getKey() + ": " + entry.getValue());
        }
    }

    // Helper to collect all joins for stats, traversing the new structure
    private static void collectAllJoins(List<JoinInfo> currentLevelJoins, List<JoinInfo> collectedJoins) {
        for (JoinInfo join : currentLevelJoins) {
            collectedJoins.add(join);
            if (join.leftNestedJoin != null) {
                collectAllJoins(Collections.singletonList(join.leftNestedJoin), collectedJoins);
            }
            if (join.rightNestedJoin != null) {
                collectAllJoins(Collections.singletonList(join.rightNestedJoin), collectedJoins);
            }
        }
    }

    private static String getJoinParticipantName(TTable table) {
        if (table == null) return "[NULL_TABLE]";
        if (table.getTableType() == ETableSource.subquery && table.getSubquery() != null) {
            return "(Subquery" + (table.getAliasClause() != null ? " AS " + table.getAliasClause().getAliasName() : "") + ")";
        }
        return getTableName(table); // Uses the existing getTableName for actual tables
    }

    private static void printJoinTree(JoinInfo currentJoin, String indent, boolean isLastChildOfParent) {
        // Top line of the TJoinExpr: "TJoinExpr (TYPE with right_table_name)"
        // The "with X" part in the example seems to refer to the rightmost table in the *entire* join expression,
        // or the right table of the *current* join type. For simplicity and consistency with typical ASTs,
        // let's use the immediate right table of the current TJoinExpr.
        String rightParticipantName = currentJoin.rightNestedJoin != null ? 
                                      "nested join structure" : 
                                      currentJoin.rightTable;
        // A slight modification to match the screenshot's "TJoinExpr (RIGHT JOIN with customers)"
        // Here, "customers" is the rightmost table of that specific TJoinExpr.
        System.out.println(indent + "TJoinExpr (" + currentJoin.joinType + " with " + currentJoin.rightTable.replaceAll("\\sAS\\s.*", "") + ")");

        String childIndent = indent + (isLastChildOfParent ? "   " : "| ");
        String connector = "|-";
        String lastConnector = "|_ ";

        // 1. jointype
        System.out.println(indent + connector + "jointype: " + currentJoin.joinType);

        // 2. leftOperand
        if (currentJoin.leftNestedJoin != null) {
            System.out.println(indent + connector + "leftOperand: TFromTable (nested join structure)");
            // The nested joinExpr should be indented further and connected from leftOperand
            printJoinTree(currentJoin.leftNestedJoin, childIndent + "  ", false); // Pass childIndent, isLast is false
        } else {
            System.out.println(indent + connector + "leftTable: TTable (" + currentJoin.leftTable + ")");
        }

        // 3. rightOperand
        // The AST shows rightTable (if not a join) or rightOperand (if it is a join)
        // then onCondition.
        // If the right side is a nested join, it's shown similar to the left.
        if (currentJoin.rightNestedJoin != null) {
             System.out.println(indent + connector + "rightOperand: TFromTable (nested join structure)");
             printJoinTree(currentJoin.rightNestedJoin, childIndent + "  ", true); // isLast for condition
        } else {
            System.out.println(indent + connector + "rightTable: TTable (" + currentJoin.rightTable + ")");
        }

        // 4. onCondition (always last for this TJoinExpr)
        System.out.println(indent + lastConnector + "onCondition: TExpression (" + currentJoin.condition + ")");
    }

    // Helper method to safely get table names, including aliases
    private static String getTableName(TTable table) {
        if (table == null) return "[NULL_TABLE_REFERENCE]";

        String name;
        if (table.getTableType() == ETableSource.subquery) {
            name = "(Subquery)";
        } else if (table.getTableType() == ETableSource.join) {
            // This case should ideally be handled by the recursive call structure
            // but if getTableName is called on a TTable that is a join source directly,
            // we indicate it's a join expression.
            name = "[JOIN EXPRESSION: " + table.getJoinExpr().getJointype() + "]";
        } else {
            name = table.getFullName();
        }

        if (table.getAliasClause() != null && table.getAliasClause().getAliasName() != null) {
            name += " AS " + table.getAliasClause().getAliasName().toString();
        }
        return name;
    }
}

3. Parsing INSERT Statements

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
// com.example.gsptutorial.InsertParsing
import gudusoft.gsqlparser.TGSqlParser;
import gudusoft.gsqlparser.EDbVendor;
import gudusoft.gsqlparser.stmt.TInsertSqlStatement;
import gudusoft.gsqlparser.nodes.*;
import gudusoft.gsqlparser.EInsertSource;

public class InsertParsing {
    public static void main(String[] args) {
        TGSqlParser parser = new TGSqlParser(EDbVendor.dbvoracle);
        parser.setSqltext("INSERT INTO customers (id, name, email, registration_date) " +
                         "VALUES (1, 'John Doe', 'john.doe@example.com', SYSDATE)");

        if (parser.parse() == 0) {
            TInsertSqlStatement insertStmt = (TInsertSqlStatement) parser.getSqlstatements().get(0);

            System.out.println("=== INSERT Statement Analysis ===");

            // Target table
            System.out.println("Target Table: " + insertStmt.getTargetTable().getFullName());

            // Column list
            System.out.println("\nColumns:");
            TObjectNameList columns = insertStmt.getColumnList();
            if (columns != null) {
                for (int i = 0; i < columns.size(); i++) {
                    System.out.println("  " + (i + 1) + ": " + columns.getObjectName(i).toString());
                }
            } else {
                System.out.println("  All columns (no explicit column list)");
            }

            // Values
            System.out.println("\nValues:");
            if (insertStmt.getInsertSource() == EInsertSource.values && insertStmt.getValues() != null) {
            TMultiTargetList values = insertStmt.getValues();
                for (int i = 0; i < values.size(); i++) {
                    TMultiTarget row = values.getMultiTarget(i);
                    System.out.println("  Row " + (i + 1) + ":");

                    if (row.getColumnList() != null) {
                        for (int j = 0; j < row.getColumnList().size(); j++) {
                            TResultColumn col = row.getColumnList().getResultColumn(j);
                            System.out.println("    " + (j + 1) + ": " + col.getExpr().toString());
                        }
                    }
                }
            } else if (insertStmt.getInsertSource() == EInsertSource.subquery) {
                System.out.println("  Data source: Subquery");
                System.out.println("  Query: " + insertStmt.getSubQuery().toString());
            }
        } else {
            System.err.println("Parse failed: " + parser.getErrormessage());
        }
    }
}

4. Parsing UPDATE Statements

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
// com.example.gsptutorial.UpdateParsing
import gudusoft.gsqlparser.TGSqlParser;
import gudusoft.gsqlparser.EDbVendor;
import gudusoft.gsqlparser.stmt.TUpdateSqlStatement;
import gudusoft.gsqlparser.nodes.*;

public class UpdateParsing {
    public static void main(String[] args) {
        TGSqlParser parser = new TGSqlParser(EDbVendor.dbvoracle);
        parser.setSqltext("UPDATE employees " +
                         "SET salary = salary * 1.1, last_updated = SYSDATE " +
                         "WHERE department = 'Engineering' AND hire_date < '2020-01-01'");

        if (parser.parse() == 0) {
            TUpdateSqlStatement updateStmt = (TUpdateSqlStatement) parser.getSqlstatements().get(0);

            System.out.println("=== UPDATE Statement Analysis ===");

            // Target table
            System.out.println("Target Table: " + updateStmt.getTargetTable().getFullName());

            // SET clause
            System.out.println("\nSET Assignments:");
            TResultColumnList setClause = updateStmt.getResultColumnList();
            if (setClause != null) {
                for (int i = 0; i < setClause.size(); i++) {
                    TResultColumn assignment = setClause.getResultColumn(i);
                    System.out.println("  " + assignment.toString());
                }
            }

            // WHERE clause
            System.out.println("\nWHERE Condition:");
            TWhereClause whereClause = updateStmt.getWhereClause();
            if (whereClause != null) {
                System.out.println("  " + whereClause.getCondition().toString());
            } else {
                System.out.println("  WARNING: No WHERE clause - all rows will be updated!");
            }

            // FROM clause (for multi-table updates)
            if (updateStmt.getJoins() != null && updateStmt.getJoins().size() > 0) {
                System.out.println("\nAdditional Tables (FROM clause):");
                // Note: This would need specific handling based on database vendor
                System.out.println("  Multi-table update detected");
            }
        } else {
            System.err.println("Parse failed: " + parser.getErrormessage());
        }
    }
}

5. Error Handling and Best Practices

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
// com.example.gsptutorial.ErrorHandling
import gudusoft.gsqlparser.TGSqlParser;
import gudusoft.gsqlparser.EDbVendor;
import gudusoft.gsqlparser.TSyntaxError;

public class ErrorHandling {
    public static void main(String[] args) {
        TGSqlParser parser = new TGSqlParser(EDbVendor.dbvoracle);

        // Intentionally malformed SQL for demonstration
        parser.setSqltext("SELECT name, email FROM users WHERE invalid syntax here");

        int parseResult = parser.parse();
        if (parseResult != 0) {
            System.out.println("=== Parse Errors ===");
            System.out.println("Total errors: " + parser.getErrorCount());

            for (TSyntaxError error : parser.getSyntaxErrors()) {
                System.out.printf("Line %d, Column %d: %s%n",
                    error.getLineNo(),
                    error.getColumnNo(),
                    error.getErrorMessage()
                );
            }
        } else {
            System.out.println("Parse successful!");
            // Process the parsed statements...
        }
    }

    // Best practice: Always handle parsing errors
    public static boolean safeParse(TGSqlParser parser, String sql) {
        parser.setSqltext(sql);

        try {
            if (parser.parse() == 0) {
                return true;
            } else {
                System.err.println("SQL Parse Error in: " + sql);
                for (TSyntaxError error : parser.getSyntaxErrors()) {
                    System.err.printf("  Line %d: %s%n", 
                        error.getLineNo(), 
                        error.getErrorMessage());
                }
                return false;
            }
        } catch (Exception e) {
            System.err.println("Unexpected error parsing SQL: " + e.getMessage());
            return false;
        }
    }
}

💡 Key Takeaways

API Best Practices

  1. Use proper accessor methods: Always use getFromClause().getRelations() instead of deprecated direct field access
  2. Handle different table types: Check table.getTableType() to handle various table sources correctly
  3. JOIN analysis: Use table.getJoinExpr() for tables with type ETableSource.join
  4. Error handling: Always check parse result and handle syntax errors gracefully
  5. Resource management: Use appropriate cleanup for large parsing operations

Common Patterns

  • FROM clause iteration: fromClause.getRelations() returns ArrayList<TTable>
  • JOIN detection: Check table.getTableType() == ETableSource.join
  • Alias handling: Use table.getAliasClause() and table.getAliasName()
  • Subquery extraction: Handle ETableSource.subquery type tables

✅ Summary

In this tutorial, you learned how to:

  • Parse various SQL statement types using the current GSP API
  • Extract table information from FROM clauses using proper accessor methods
  • Analyze complex JOIN structures with TJoinExpr
  • Handle different table types (base tables, subqueries, functions, joins)
  • Extract columns, conditions, and values from SQL statements
  • Implement proper error handling and best practices

🔗 What's Next?