001package gudusoft.gsqlparser.nodes; 002 003 004import java.util.ArrayList; 005 006/** 007 * Represents indirection elements in SQL expressions, supporting both array access and qualified name syntax. 008 * This class handles two distinct syntactic patterns found in PostgreSQL and other SQL dialects. 009 * 010 * <h3>Array Access Syntax (ARRAY_ACCESS)</h3> 011 * Used for accessing array elements and slicing operations: 012 * <ul> 013 * <li><code>array_column[5]</code> - Single element access</li> 014 * <li><code>array_column[2:4]</code> - Array slice from index 2 to 4</li> 015 * <li><code>array_column[:3]</code> - Array slice from start to index 3</li> 016 * <li><code>array_column[2:]</code> - Array slice from index 2 to end</li> 017 * <li><code>array_column[:]</code> - Full array slice</li> 018 * </ul> 019 * 020 * <h3>Qualified Name Syntax (QUALIFIED_NAME)</h3> 021 * Used for accessing object fields and properties: 022 * <ul> 023 * <li><code>json_column.field_name</code> - Access specific field</li> 024 * <li><code>record_column.*</code> - Access all fields</li> 025 * <li><code>composite_type.attribute</code> - Access composite type attribute</li> 026 * </ul> 027 * 028 * <h3>Usage Examples</h3> 029 * <pre>{@code 030 * // Example SQL: SELECT users[1].name FROM user_array; 031 * // This creates two TIndices: 032 * // 1. TIndices for [1] with type ARRAY_ACCESS 033 * // 2. TIndices for .name with type QUALIFIED_NAME 034 * 035 * TIndices arrayAccess = ...; // represents [1] 036 * if (arrayAccess.isArrayAccess()) { 037 * TExpression index = arrayAccess.getLowerSubscript(); // gets "1" 038 * // Process array index... 039 * } 040 * 041 * TIndices fieldAccess = ...; // represents .name 042 * if (fieldAccess.isQualifiedName()) { 043 * TObjectName fieldName = fieldAccess.getAttributeName(); // gets "name" 044 * // Process field access... 045 * } 046 * }</pre> 047 * 048 * <h3>SQL Examples by Database</h3> 049 * <table border="1"> 050 * <tr><th>Database</th><th>Array Access</th><th>Field Access</th></tr> 051 * <tr><td>PostgreSQL</td><td><code>arr[1], arr[1:3]</code></td><td><code>json_col.field, record.*</code></td></tr> 052 * <tr><td>Databricks</td><td><code>array_col[0]</code></td><td><code>struct_col.attr, json.key</code></td></tr> 053 * <tr><td>Snowflake</td><td><code>array_col[0]</code></td><td><code>variant_col.field, object.*</code></td></tr> 054 * </table> 055 * 056 * <h3>Type Safety and Backward Compatibility</h3> 057 * This class maintains backward compatibility with existing code while providing type-safe access: 058 * <ul> 059 * <li>{@link #isRealIndices()} - Legacy method, still works as before</li> 060 * <li>{@link #isArrayAccess()} - New type-safe method for array access</li> 061 * <li>{@link #isQualifiedName()} - New type-safe method for field access</li> 062 * <li>{@link #getIndicesType()} - Get explicit type information</li> 063 * </ul> 064 * 065 * <h3>Complete Usage Example</h3> 066 * <pre>{@code 067 * // Processing SQL: SELECT data[1:3].users.name FROM complex_table; 068 * // This would create a chain of TIndices in TIndirection 069 * 070 * public void processIndirection(TIndirection indirection) { 071 * for (TIndices indices : indirection.getIndices()) { 072 * switch (indices.getIndicesType()) { 073 * case ARRAY_ACCESS: 074 * System.out.println("Array access detected:"); 075 * TExpression lower = indices.getLowerSubscript(); 076 * TExpression upper = indices.getUpperSubscript(); 077 * 078 * if (lower != null && upper != null) { 079 * System.out.println(" Slice: [" + lower + ":" + upper + "]"); 080 * } else if (lower != null) { 081 * System.out.println(" Single element: [" + lower + "]"); 082 * } 083 * break; 084 * 085 * case QUALIFIED_NAME: 086 * System.out.println("Field access detected:"); 087 * TObjectName field = indices.getAttributeName(); 088 * System.out.println(" Field: ." + field.toString()); 089 * break; 090 * } 091 * } 092 * } 093 * 094 * // Legacy compatibility - existing code still works 095 * if (indices.isRealIndices()) { 096 * // Handle as array access (legacy approach) 097 * } else { 098 * // Handle as field access (legacy approach) 099 * } 100 * }</pre> 101 * 102 * @see TIndirection 103 * @see TExpression 104 * @see TObjectName 105 * @since PostgreSQL parser support 106 */ 107 108public class TIndices extends TParseTreeNode { 109 110 /** 111 * Enum to distinguish between array access syntax and qualified name syntax. 112 * 113 * <p><strong>ARRAY_ACCESS</strong> - Represents bracket notation for array/list operations: 114 * <ul> 115 * <li><code>[5]</code> - Single element at index 5</li> 116 * <li><code>[2:4]</code> - Slice from index 2 to 4 (inclusive)</li> 117 * <li><code>[:3]</code> - Slice from start to index 3</li> 118 * <li><code>[2:]</code> - Slice from index 2 to end</li> 119 * <li><code>[:]</code> - Full array/list slice</li> 120 * </ul> 121 * 122 * <p><strong>QUALIFIED_NAME</strong> - Represents dot notation for field/property access: 123 * <ul> 124 * <li><code>.field_name</code> - Access specific field or property</li> 125 * <li><code>.*</code> - Access all fields (wildcard expansion)</li> 126 * </ul> 127 * 128 * @since 1.0 129 */ 130 public enum IndicesType { 131 /** Array/list access using bracket notation: [expr], [expr:expr], etc. */ 132 ARRAY_ACCESS, 133 /** Field/property access using dot notation: .field, .* */ 134 QUALIFIED_NAME, 135 /** Function call via dot notation: .func(), .func(args) */ 136 FUNCTION_CALL 137 } 138 private TExpression lowerSubscript; 139 private TExpression upperSubscript; 140 141 private TObjectName attributeName; 142 143 private IndicesType indicesType; 144 145 /** 146 * Flag indicating whether this array access represents a slice operation (contains colon syntax). 147 * 148 * <h3>Purpose and Rationale</h3> 149 * <p>This property was introduced to solve a fundamental ambiguity problem in SQL text generation. 150 * The core issue was distinguishing between two syntactically different but internally identical 151 * array access patterns: 152 * 153 * <table border="1"> 154 * <tr><th>Original SQL</th><th>Internal Representation</th><th>Expected Output</th></tr> 155 * <tr><td><code>array[5]</code></td><td>lower=5, upper=null</td><td><code>array[5]</code></td></tr> 156 * <tr><td><code>array[5:]</code></td><td>lower=5, upper=null</td><td><code>array[5:]</code></td></tr> 157 * </table> 158 * 159 * <p>Without this flag, both patterns would have identical field values (lowerSubscript=5, upperSubscript=null), 160 * making it impossible for the script generator to determine whether to include the colon in the output. 161 * 162 * <h3>How It Works</h3> 163 * <p>The parser sets this flag based on the presence of colon syntax in the original BNF rule: 164 * <ul> 165 * <li><strong>Single Element Access</strong>: <code>[expr]</code> → isSlice = false</li> 166 * <li><strong>Slice Operations</strong>: <code>[expr:expr]</code>, <code>[:expr]</code>, <code>[expr:]</code>, <code>[:]</code> → isSlice = true</li> 167 * </ul> 168 * 169 * <h3>Usage in Script Generation</h3> 170 * <p>The {@link gudusoft.gsqlparser.scriptWriter.TScriptGeneratorVisitor#preVisit(TIndices)} method 171 * uses this flag to determine whether to include a colon in the generated SQL: 172 * 173 * <pre>{@code 174 * if (node.isSlice()) { 175 * // Output: [lower:upper] or [lower:] or [:upper] or [:] 176 * acceptSymbol(":"); 177 * } else { 178 * // Output: [lower] (no colon) 179 * } 180 * }</pre> 181 * 182 * <h3>Impact on Accuracy</h3> 183 * <p>This property ensures that the generated SQL text exactly matches the original input: 184 * <ul> 185 * <li><code>SELECT array[5] FROM table</code> → regenerates as <code>SELECT array[5] FROM table</code></li> 186 * <li><code>SELECT array[5:] FROM table</code> → regenerates as <code>SELECT array[5:] FROM table</code></li> 187 * </ul> 188 * 189 * <p>This level of accuracy is crucial for SQL formatting tools, query analysis, and any application 190 * that needs to preserve the exact semantic meaning of array access operations. 191 * 192 * @see #isSlice() 193 * @see #setSlice(boolean) 194 * @see gudusoft.gsqlparser.scriptWriter.TScriptGeneratorVisitor#preVisit(TIndices) 195 * @since Introduction of enhanced array slice support 196 */ 197 private boolean isSlice; 198 199 /** 200 * Gets the attribute name for qualified name access patterns. 201 * This is only populated for QUALIFIED_NAME type indices (e.g., .field, .*). 202 * 203 * <p>Examples: 204 * <ul> 205 * <li>For <code>json_col.user_name</code> → returns "user_name"</li> 206 * <li>For <code>record.*</code> → returns "*"</li> 207 * <li>For <code>array[5]</code> → returns null (not a qualified name)</li> 208 * </ul> 209 * 210 * @return the attribute/field name for qualified access, or null for array access 211 * @see #isQualifiedName() 212 */ 213 public TObjectName getAttributeName() { 214 return attributeName; 215 } 216 217 /** 218 * Gets the lower bound expression for array access patterns. 219 * This represents the starting index or single index in array access operations. 220 * 221 * <p>Examples: 222 * <ul> 223 * <li>For <code>arr[5]</code> → returns expression "5"</li> 224 * <li>For <code>arr[2:8]</code> → returns expression "2"</li> 225 * <li>For <code>arr[:3]</code> → returns null (no lower bound)</li> 226 * <li>For <code>obj.field</code> → returns null (not array access)</li> 227 * </ul> 228 * 229 * @return the lower bound expression, or null if not applicable 230 * @see #getUpperSubscript() 231 * @see #isArrayAccess() 232 */ 233 public TExpression getLowerSubscript() { 234 return lowerSubscript; 235 } 236 237 /** 238 * Gets the upper bound expression for array slice patterns. 239 * This represents the ending index in array slice operations (e.g., [start:end]). 240 * 241 * <p>Examples: 242 * <ul> 243 * <li>For <code>arr[2:8]</code> → returns expression "8"</li> 244 * <li>For <code>arr[:3]</code> → returns expression "3"</li> 245 * <li>For <code>arr[5]</code> → returns null (single element, not slice)</li> 246 * <li>For <code>arr[2:]</code> → returns null (no upper bound)</li> 247 * </ul> 248 * 249 * @return the upper bound expression for slices, or null if not a slice or no upper bound 250 * @see #getLowerSubscript() 251 * @see #isArrayAccess() 252 */ 253 public TExpression getUpperSubscript() { 254 return upperSubscript; 255 } 256 257 /** 258 * Legacy method to check if this represents "real" array indices. 259 * 260 * <p><strong>Note:</strong> This method is maintained for backward compatibility. 261 * For new code, prefer using {@link #isArrayAccess()} and {@link #isQualifiedName()} 262 * for clearer semantic meaning. 263 * 264 * <p>Returns true for array access patterns and false for qualified name patterns: 265 * <ul> 266 * <li><code>arr[5]</code> → true (real array indices)</li> 267 * <li><code>arr[2:4]</code> → true (real array slice)</li> 268 * <li><code>obj.field</code> → false (not real indices, just field access)</li> 269 * </ul> 270 * 271 * @return true if this represents array access, false for qualified name access 272 * @deprecated Use {@link #isArrayAccess()} for better code clarity 273 */ 274 public boolean isRealIndices(){ 275 return (this.attributeName == null); 276 } 277 278 /** 279 * Gets the explicit type of this indices element. 280 * 281 * <p>This method returns the semantic type of the indirection, helping distinguish 282 * between array access operations and qualified name field access. The method 283 * includes fallback logic for backward compatibility with legacy code. 284 * 285 * <p>Type determination logic: 286 * <ol> 287 * <li>If explicit type was set via {@link #setIndicesType(IndicesType)}, return that</li> 288 * <li>Otherwise, infer from legacy fields: attributeName present → QUALIFIED_NAME, absent → ARRAY_ACCESS</li> 289 * </ol> 290 * 291 * @return {@link IndicesType#ARRAY_ACCESS} for bracket notation or {@link IndicesType#QUALIFIED_NAME} for dot notation 292 * @see #isArrayAccess() 293 * @see #isQualifiedName() 294 */ 295 public IndicesType getIndicesType() { 296 if (indicesType != null) { 297 return indicesType; 298 } 299 // Fallback to legacy logic for backward compatibility 300 return (attributeName != null) ? IndicesType.QUALIFIED_NAME : IndicesType.ARRAY_ACCESS; 301 } 302 303 /** 304 * Sets the explicit type of this indices element. 305 * 306 * <p>This method is typically called by the parser to explicitly mark whether 307 * an indices element represents array access or qualified name access. 308 * 309 * @param indicesType the semantic type to set - {@link IndicesType#ARRAY_ACCESS} 310 * for bracket notation or {@link IndicesType#QUALIFIED_NAME} for dot notation 311 * @see #getIndicesType() 312 */ 313 public void setIndicesType(IndicesType indicesType) { 314 this.indicesType = indicesType; 315 } 316 317 /** 318 * Checks if this represents an array slice operation (contains colon syntax). 319 * 320 * <p>Returns true for slice patterns that contain a colon: 321 * <ul> 322 * <li><code>arr[2:8]</code> - Slice with both bounds</li> 323 * <li><code>arr[:3]</code> - Slice with upper bound only</li> 324 * <li><code>arr[2:]</code> - Slice with lower bound only</li> 325 * <li><code>arr[:]</code> - Full slice</li> 326 * </ul> 327 * 328 * <p>Returns false for single element access: 329 * <ul> 330 * <li><code>arr[5]</code> - Single element access (no colon)</li> 331 * </ul> 332 * 333 * @return true if this represents a slice operation with colon syntax 334 * @see #setSlice(boolean) 335 */ 336 public boolean isSlice() { 337 return isSlice; 338 } 339 340 /** 341 * Sets whether this represents an array slice operation. 342 * 343 * <p>This method is typically called by the parser to indicate whether 344 * the original syntax contained a colon (slice) or not (single element). 345 * 346 * @param isSlice true if this represents slice syntax with colon, false for single element 347 * @see #isSlice() 348 */ 349 public void setSlice(boolean isSlice) { 350 this.isSlice = isSlice; 351 } 352 353 /** 354 * Checks if this represents array access syntax using bracket notation. 355 * 356 * <p>Returns true for all forms of array/list access patterns: 357 * <ul> 358 * <li><code>arr[5]</code> - Single element access</li> 359 * <li><code>arr[2:8]</code> - Array slice with both bounds</li> 360 * <li><code>arr[:3]</code> - Array slice to index 3</li> 361 * <li><code>arr[2:]</code> - Array slice from index 2</li> 362 * <li><code>arr[:]</code> - Full array slice</li> 363 * </ul> 364 * 365 * <p>This is the preferred method for type-safe checking of array access patterns. 366 * 367 * @return true if this represents array access using bracket notation 368 * @see #isQualifiedName() 369 * @see #getLowerSubscript() 370 * @see #getUpperSubscript() 371 */ 372 public boolean isArrayAccess() { 373 return getIndicesType() == IndicesType.ARRAY_ACCESS; 374 } 375 376 /** 377 * Checks if this represents qualified name syntax using dot notation. 378 * 379 * <p>Returns true for all forms of field/property access patterns: 380 * <ul> 381 * <li><code>obj.field_name</code> - Access specific field</li> 382 * <li><code>json_col.key</code> - Access JSON object key</li> 383 * <li><code>record.*</code> - Access all fields (wildcard)</li> 384 * <li><code>composite_type.attribute</code> - Access composite type attribute</li> 385 * </ul> 386 * 387 * <p>This is the preferred method for type-safe checking of qualified name patterns. 388 * 389 * @return true if this represents qualified name access using dot notation 390 * @see #isArrayAccess() 391 * @see #getAttributeName() 392 */ 393 public boolean isQualifiedName() { 394 return getIndicesType() == IndicesType.QUALIFIED_NAME; 395 } 396 397 /** 398 * Initializes this TIndices with the specified components. 399 * 400 * <p>This method is typically called by the parser during AST construction. 401 * The parameters determine the type and content of the indices element: 402 * 403 * <p><strong>For Array Access:</strong> arg1=null, arg2=lowerBound, arg3=upperBound 404 * <p><strong>For Qualified Names:</strong> arg1=attributeName, arg2=null, arg3=null 405 * 406 * @param arg1 attribute name for qualified access, or null for array access 407 * @param arg2 lower bound expression for array access, or null for qualified names 408 * @param arg3 upper bound expression for array slices, or null for single elements or qualified names 409 */ 410 public void init(Object arg1, Object arg2, Object arg3){ 411 this.attributeName = (TObjectName)arg1; 412 this.lowerSubscript = (TExpression)arg2; 413 this.upperSubscript = (TExpression)arg3; 414 } 415 416 /** 417 * Sets the lower bound expression for array access operations. 418 * 419 * <p>This represents the starting index in array access patterns: 420 * <ul> 421 * <li>For <code>[5]</code> → set to expression "5"</li> 422 * <li>For <code>[2:8]</code> → set to expression "2"</li> 423 * <li>For <code>[:3]</code> → set to null (no lower bound)</li> 424 * </ul> 425 * 426 * @param lowerSubscript the expression representing the lower bound, or null 427 * @see #getLowerSubscript() 428 */ 429 public void setLowerSubscript(TExpression lowerSubscript) { 430 this.lowerSubscript = lowerSubscript; 431 } 432 433 /** 434 * Sets the upper bound expression for array slice operations. 435 * 436 * <p>This represents the ending index in array slice patterns: 437 * <ul> 438 * <li>For <code>[2:8]</code> → set to expression "8"</li> 439 * <li>For <code>[:3]</code> → set to expression "3"</li> 440 * <li>For <code>[5]</code> → set to null (single element, not a slice)</li> 441 * </ul> 442 * 443 * @param upperSubscript the expression representing the upper bound, or null 444 * @see #getUpperSubscript() 445 */ 446 public void setUpperSubscript(TExpression upperSubscript) { 447 this.upperSubscript = upperSubscript; 448 } 449 450 /** 451 * Sets the attribute name for qualified name access operations. 452 * 453 * <p>This represents the field or property name in dot notation patterns: 454 * <ul> 455 * <li>For <code>.field_name</code> → set to "field_name"</li> 456 * <li>For <code>.*</code> → set to "*"</li> 457 * <li>For array access → should be null</li> 458 * </ul> 459 * 460 * @param attributeName the object name representing the field/property, or null for array access 461 * @see #getAttributeName() 462 */ 463 public void setAttributeName(TObjectName attributeName) { 464 this.attributeName = attributeName; 465 } 466 467 public TExpressionList getSubscriptList() { 468 return subscriptList; 469 } 470 471 private TExpressionList subscriptList; 472 473 private TExpressionList functionArgs; 474 475 public TExpressionList getFunctionArgs() { 476 return functionArgs; 477 } 478 479 public void setFunctionArgs(TExpressionList functionArgs) { 480 this.functionArgs = functionArgs; 481 } 482 483 public void addSubscript(TExpression expr){ 484 if (lowerSubscript == null){ 485 lowerSubscript = expr; 486 }else{ 487 if (subscriptList == null){ 488 subscriptList = new TExpressionList(); 489 } 490 subscriptList.addExpression(expr); 491 } 492 } 493 494 public static void addSubscript(ArrayList<TIndices> indicesArrayList,TExpression expr){ 495 indicesArrayList.get(indicesArrayList.size()-1).addSubscript(expr); 496 } 497 498 public void accept(TParseTreeVisitor v){ 499 v.preVisit(this); 500 v.postVisit(this); 501 } 502 503 public void acceptChildren(TParseTreeVisitor v){ 504 v.preVisit(this); 505 v.postVisit(this); 506 } 507 508}