001package gudusoft.gsqlparser.nodes; 002 003 004import java.util.ArrayList; 005 006/** 007 * Represents indirection elements in SQL expressions, supporting both array access and qualified name syntax. 008 * This class handles two distinct syntactic patterns found in PostgreSQL and other SQL dialects. 009 * 010 * <h3>Array Access Syntax (ARRAY_ACCESS)</h3> 011 * Used for accessing array elements and slicing operations: 012 * <ul> 013 * <li><code>array_column[5]</code> - Single element access</li> 014 * <li><code>array_column[2:4]</code> - Array slice from index 2 to 4</li> 015 * <li><code>array_column[:3]</code> - Array slice from start to index 3</li> 016 * <li><code>array_column[2:]</code> - Array slice from index 2 to end</li> 017 * <li><code>array_column[:]</code> - Full array slice</li> 018 * </ul> 019 * 020 * <h3>Qualified Name Syntax (QUALIFIED_NAME)</h3> 021 * Used for accessing object fields and properties: 022 * <ul> 023 * <li><code>json_column.field_name</code> - Access specific field</li> 024 * <li><code>record_column.*</code> - Access all fields</li> 025 * <li><code>composite_type.attribute</code> - Access composite type attribute</li> 026 * </ul> 027 * 028 * <h3>Usage Examples</h3> 029 * <pre>{@code 030 * // Example SQL: SELECT users[1].name FROM user_array; 031 * // This creates two TIndices: 032 * // 1. TIndices for [1] with type ARRAY_ACCESS 033 * // 2. TIndices for .name with type QUALIFIED_NAME 034 * 035 * TIndices arrayAccess = ...; // represents [1] 036 * if (arrayAccess.isArrayAccess()) { 037 * TExpression index = arrayAccess.getLowerSubscript(); // gets "1" 038 * // Process array index... 039 * } 040 * 041 * TIndices fieldAccess = ...; // represents .name 042 * if (fieldAccess.isQualifiedName()) { 043 * TObjectName fieldName = fieldAccess.getAttributeName(); // gets "name" 044 * // Process field access... 045 * } 046 * }</pre> 047 * 048 * <h3>SQL Examples by Database</h3> 049 * <table border="1"> 050 * <tr><th>Database</th><th>Array Access</th><th>Field Access</th></tr> 051 * <tr><td>PostgreSQL</td><td><code>arr[1], arr[1:3]</code></td><td><code>json_col.field, record.*</code></td></tr> 052 * <tr><td>Databricks</td><td><code>array_col[0]</code></td><td><code>struct_col.attr, json.key</code></td></tr> 053 * <tr><td>Snowflake</td><td><code>array_col[0]</code></td><td><code>variant_col.field, object.*</code></td></tr> 054 * </table> 055 * 056 * <h3>Type Safety and Backward Compatibility</h3> 057 * This class maintains backward compatibility with existing code while providing type-safe access: 058 * <ul> 059 * <li>{@link #isRealIndices()} - Legacy method, still works as before</li> 060 * <li>{@link #isArrayAccess()} - New type-safe method for array access</li> 061 * <li>{@link #isQualifiedName()} - New type-safe method for field access</li> 062 * <li>{@link #getIndicesType()} - Get explicit type information</li> 063 * </ul> 064 * 065 * <h3>Complete Usage Example</h3> 066 * <pre>{@code 067 * // Processing SQL: SELECT data[1:3].users.name FROM complex_table; 068 * // This would create a chain of TIndices in TIndirection 069 * 070 * public void processIndirection(TIndirection indirection) { 071 * for (TIndices indices : indirection.getIndices()) { 072 * switch (indices.getIndicesType()) { 073 * case ARRAY_ACCESS: 074 * System.out.println("Array access detected:"); 075 * TExpression lower = indices.getLowerSubscript(); 076 * TExpression upper = indices.getUpperSubscript(); 077 * 078 * if (lower != null && upper != null) { 079 * System.out.println(" Slice: [" + lower + ":" + upper + "]"); 080 * } else if (lower != null) { 081 * System.out.println(" Single element: [" + lower + "]"); 082 * } 083 * break; 084 * 085 * case QUALIFIED_NAME: 086 * System.out.println("Field access detected:"); 087 * TObjectName field = indices.getAttributeName(); 088 * System.out.println(" Field: ." + field.toString()); 089 * break; 090 * } 091 * } 092 * } 093 * 094 * // Legacy compatibility - existing code still works 095 * if (indices.isRealIndices()) { 096 * // Handle as array access (legacy approach) 097 * } else { 098 * // Handle as field access (legacy approach) 099 * } 100 * }</pre> 101 * 102 * @see TIndirection 103 * @see TExpression 104 * @see TObjectName 105 * @since PostgreSQL parser support 106 */ 107 108public class TIndices extends TParseTreeNode { 109 110 /** 111 * Enum to distinguish between array access syntax and qualified name syntax. 112 * 113 * <p><strong>ARRAY_ACCESS</strong> - Represents bracket notation for array/list operations: 114 * <ul> 115 * <li><code>[5]</code> - Single element at index 5</li> 116 * <li><code>[2:4]</code> - Slice from index 2 to 4 (inclusive)</li> 117 * <li><code>[:3]</code> - Slice from start to index 3</li> 118 * <li><code>[2:]</code> - Slice from index 2 to end</li> 119 * <li><code>[:]</code> - Full array/list slice</li> 120 * </ul> 121 * 122 * <p><strong>QUALIFIED_NAME</strong> - Represents dot notation for field/property access: 123 * <ul> 124 * <li><code>.field_name</code> - Access specific field or property</li> 125 * <li><code>.*</code> - Access all fields (wildcard expansion)</li> 126 * </ul> 127 * 128 * @since 1.0 129 */ 130 public enum IndicesType { 131 /** Array/list access using bracket notation: [expr], [expr:expr], etc. */ 132 ARRAY_ACCESS, 133 /** Field/property access using dot notation: .field, .* */ 134 QUALIFIED_NAME 135 } 136 private TExpression lowerSubscript; 137 private TExpression upperSubscript; 138 139 private TObjectName attributeName; 140 141 private IndicesType indicesType; 142 143 /** 144 * Flag indicating whether this array access represents a slice operation (contains colon syntax). 145 * 146 * <h3>Purpose and Rationale</h3> 147 * <p>This property was introduced to solve a fundamental ambiguity problem in SQL text generation. 148 * The core issue was distinguishing between two syntactically different but internally identical 149 * array access patterns: 150 * 151 * <table border="1"> 152 * <tr><th>Original SQL</th><th>Internal Representation</th><th>Expected Output</th></tr> 153 * <tr><td><code>array[5]</code></td><td>lower=5, upper=null</td><td><code>array[5]</code></td></tr> 154 * <tr><td><code>array[5:]</code></td><td>lower=5, upper=null</td><td><code>array[5:]</code></td></tr> 155 * </table> 156 * 157 * <p>Without this flag, both patterns would have identical field values (lowerSubscript=5, upperSubscript=null), 158 * making it impossible for the script generator to determine whether to include the colon in the output. 159 * 160 * <h3>How It Works</h3> 161 * <p>The parser sets this flag based on the presence of colon syntax in the original BNF rule: 162 * <ul> 163 * <li><strong>Single Element Access</strong>: <code>[expr]</code> → isSlice = false</li> 164 * <li><strong>Slice Operations</strong>: <code>[expr:expr]</code>, <code>[:expr]</code>, <code>[expr:]</code>, <code>[:]</code> → isSlice = true</li> 165 * </ul> 166 * 167 * <h3>Usage in Script Generation</h3> 168 * <p>The {@link gudusoft.gsqlparser.scriptWriter.TScriptGeneratorVisitor#preVisit(TIndices)} method 169 * uses this flag to determine whether to include a colon in the generated SQL: 170 * 171 * <pre>{@code 172 * if (node.isSlice()) { 173 * // Output: [lower:upper] or [lower:] or [:upper] or [:] 174 * acceptSymbol(":"); 175 * } else { 176 * // Output: [lower] (no colon) 177 * } 178 * }</pre> 179 * 180 * <h3>Impact on Accuracy</h3> 181 * <p>This property ensures that the generated SQL text exactly matches the original input: 182 * <ul> 183 * <li><code>SELECT array[5] FROM table</code> → regenerates as <code>SELECT array[5] FROM table</code></li> 184 * <li><code>SELECT array[5:] FROM table</code> → regenerates as <code>SELECT array[5:] FROM table</code></li> 185 * </ul> 186 * 187 * <p>This level of accuracy is crucial for SQL formatting tools, query analysis, and any application 188 * that needs to preserve the exact semantic meaning of array access operations. 189 * 190 * @see #isSlice() 191 * @see #setSlice(boolean) 192 * @see gudusoft.gsqlparser.scriptWriter.TScriptGeneratorVisitor#preVisit(TIndices) 193 * @since Introduction of enhanced array slice support 194 */ 195 private boolean isSlice; 196 197 /** 198 * Gets the attribute name for qualified name access patterns. 199 * This is only populated for QUALIFIED_NAME type indices (e.g., .field, .*). 200 * 201 * <p>Examples: 202 * <ul> 203 * <li>For <code>json_col.user_name</code> → returns "user_name"</li> 204 * <li>For <code>record.*</code> → returns "*"</li> 205 * <li>For <code>array[5]</code> → returns null (not a qualified name)</li> 206 * </ul> 207 * 208 * @return the attribute/field name for qualified access, or null for array access 209 * @see #isQualifiedName() 210 */ 211 public TObjectName getAttributeName() { 212 return attributeName; 213 } 214 215 /** 216 * Gets the lower bound expression for array access patterns. 217 * This represents the starting index or single index in array access operations. 218 * 219 * <p>Examples: 220 * <ul> 221 * <li>For <code>arr[5]</code> → returns expression "5"</li> 222 * <li>For <code>arr[2:8]</code> → returns expression "2"</li> 223 * <li>For <code>arr[:3]</code> → returns null (no lower bound)</li> 224 * <li>For <code>obj.field</code> → returns null (not array access)</li> 225 * </ul> 226 * 227 * @return the lower bound expression, or null if not applicable 228 * @see #getUpperSubscript() 229 * @see #isArrayAccess() 230 */ 231 public TExpression getLowerSubscript() { 232 return lowerSubscript; 233 } 234 235 /** 236 * Gets the upper bound expression for array slice patterns. 237 * This represents the ending index in array slice operations (e.g., [start:end]). 238 * 239 * <p>Examples: 240 * <ul> 241 * <li>For <code>arr[2:8]</code> → returns expression "8"</li> 242 * <li>For <code>arr[:3]</code> → returns expression "3"</li> 243 * <li>For <code>arr[5]</code> → returns null (single element, not slice)</li> 244 * <li>For <code>arr[2:]</code> → returns null (no upper bound)</li> 245 * </ul> 246 * 247 * @return the upper bound expression for slices, or null if not a slice or no upper bound 248 * @see #getLowerSubscript() 249 * @see #isArrayAccess() 250 */ 251 public TExpression getUpperSubscript() { 252 return upperSubscript; 253 } 254 255 /** 256 * Legacy method to check if this represents "real" array indices. 257 * 258 * <p><strong>Note:</strong> This method is maintained for backward compatibility. 259 * For new code, prefer using {@link #isArrayAccess()} and {@link #isQualifiedName()} 260 * for clearer semantic meaning. 261 * 262 * <p>Returns true for array access patterns and false for qualified name patterns: 263 * <ul> 264 * <li><code>arr[5]</code> → true (real array indices)</li> 265 * <li><code>arr[2:4]</code> → true (real array slice)</li> 266 * <li><code>obj.field</code> → false (not real indices, just field access)</li> 267 * </ul> 268 * 269 * @return true if this represents array access, false for qualified name access 270 * @deprecated Use {@link #isArrayAccess()} for better code clarity 271 */ 272 public boolean isRealIndices(){ 273 return (this.attributeName == null); 274 } 275 276 /** 277 * Gets the explicit type of this indices element. 278 * 279 * <p>This method returns the semantic type of the indirection, helping distinguish 280 * between array access operations and qualified name field access. The method 281 * includes fallback logic for backward compatibility with legacy code. 282 * 283 * <p>Type determination logic: 284 * <ol> 285 * <li>If explicit type was set via {@link #setIndicesType(IndicesType)}, return that</li> 286 * <li>Otherwise, infer from legacy fields: attributeName present → QUALIFIED_NAME, absent → ARRAY_ACCESS</li> 287 * </ol> 288 * 289 * @return {@link IndicesType#ARRAY_ACCESS} for bracket notation or {@link IndicesType#QUALIFIED_NAME} for dot notation 290 * @see #isArrayAccess() 291 * @see #isQualifiedName() 292 */ 293 public IndicesType getIndicesType() { 294 if (indicesType != null) { 295 return indicesType; 296 } 297 // Fallback to legacy logic for backward compatibility 298 return (attributeName != null) ? IndicesType.QUALIFIED_NAME : IndicesType.ARRAY_ACCESS; 299 } 300 301 /** 302 * Sets the explicit type of this indices element. 303 * 304 * <p>This method is typically called by the parser to explicitly mark whether 305 * an indices element represents array access or qualified name access. 306 * 307 * @param indicesType the semantic type to set - {@link IndicesType#ARRAY_ACCESS} 308 * for bracket notation or {@link IndicesType#QUALIFIED_NAME} for dot notation 309 * @see #getIndicesType() 310 */ 311 public void setIndicesType(IndicesType indicesType) { 312 this.indicesType = indicesType; 313 } 314 315 /** 316 * Checks if this represents an array slice operation (contains colon syntax). 317 * 318 * <p>Returns true for slice patterns that contain a colon: 319 * <ul> 320 * <li><code>arr[2:8]</code> - Slice with both bounds</li> 321 * <li><code>arr[:3]</code> - Slice with upper bound only</li> 322 * <li><code>arr[2:]</code> - Slice with lower bound only</li> 323 * <li><code>arr[:]</code> - Full slice</li> 324 * </ul> 325 * 326 * <p>Returns false for single element access: 327 * <ul> 328 * <li><code>arr[5]</code> - Single element access (no colon)</li> 329 * </ul> 330 * 331 * @return true if this represents a slice operation with colon syntax 332 * @see #setSlice(boolean) 333 */ 334 public boolean isSlice() { 335 return isSlice; 336 } 337 338 /** 339 * Sets whether this represents an array slice operation. 340 * 341 * <p>This method is typically called by the parser to indicate whether 342 * the original syntax contained a colon (slice) or not (single element). 343 * 344 * @param isSlice true if this represents slice syntax with colon, false for single element 345 * @see #isSlice() 346 */ 347 public void setSlice(boolean isSlice) { 348 this.isSlice = isSlice; 349 } 350 351 /** 352 * Checks if this represents array access syntax using bracket notation. 353 * 354 * <p>Returns true for all forms of array/list access patterns: 355 * <ul> 356 * <li><code>arr[5]</code> - Single element access</li> 357 * <li><code>arr[2:8]</code> - Array slice with both bounds</li> 358 * <li><code>arr[:3]</code> - Array slice to index 3</li> 359 * <li><code>arr[2:]</code> - Array slice from index 2</li> 360 * <li><code>arr[:]</code> - Full array slice</li> 361 * </ul> 362 * 363 * <p>This is the preferred method for type-safe checking of array access patterns. 364 * 365 * @return true if this represents array access using bracket notation 366 * @see #isQualifiedName() 367 * @see #getLowerSubscript() 368 * @see #getUpperSubscript() 369 */ 370 public boolean isArrayAccess() { 371 return getIndicesType() == IndicesType.ARRAY_ACCESS; 372 } 373 374 /** 375 * Checks if this represents qualified name syntax using dot notation. 376 * 377 * <p>Returns true for all forms of field/property access patterns: 378 * <ul> 379 * <li><code>obj.field_name</code> - Access specific field</li> 380 * <li><code>json_col.key</code> - Access JSON object key</li> 381 * <li><code>record.*</code> - Access all fields (wildcard)</li> 382 * <li><code>composite_type.attribute</code> - Access composite type attribute</li> 383 * </ul> 384 * 385 * <p>This is the preferred method for type-safe checking of qualified name patterns. 386 * 387 * @return true if this represents qualified name access using dot notation 388 * @see #isArrayAccess() 389 * @see #getAttributeName() 390 */ 391 public boolean isQualifiedName() { 392 return getIndicesType() == IndicesType.QUALIFIED_NAME; 393 } 394 395 /** 396 * Initializes this TIndices with the specified components. 397 * 398 * <p>This method is typically called by the parser during AST construction. 399 * The parameters determine the type and content of the indices element: 400 * 401 * <p><strong>For Array Access:</strong> arg1=null, arg2=lowerBound, arg3=upperBound 402 * <p><strong>For Qualified Names:</strong> arg1=attributeName, arg2=null, arg3=null 403 * 404 * @param arg1 attribute name for qualified access, or null for array access 405 * @param arg2 lower bound expression for array access, or null for qualified names 406 * @param arg3 upper bound expression for array slices, or null for single elements or qualified names 407 */ 408 public void init(Object arg1, Object arg2, Object arg3){ 409 this.attributeName = (TObjectName)arg1; 410 this.lowerSubscript = (TExpression)arg2; 411 this.upperSubscript = (TExpression)arg3; 412 } 413 414 /** 415 * Sets the lower bound expression for array access operations. 416 * 417 * <p>This represents the starting index in array access patterns: 418 * <ul> 419 * <li>For <code>[5]</code> → set to expression "5"</li> 420 * <li>For <code>[2:8]</code> → set to expression "2"</li> 421 * <li>For <code>[:3]</code> → set to null (no lower bound)</li> 422 * </ul> 423 * 424 * @param lowerSubscript the expression representing the lower bound, or null 425 * @see #getLowerSubscript() 426 */ 427 public void setLowerSubscript(TExpression lowerSubscript) { 428 this.lowerSubscript = lowerSubscript; 429 } 430 431 /** 432 * Sets the upper bound expression for array slice operations. 433 * 434 * <p>This represents the ending index in array slice patterns: 435 * <ul> 436 * <li>For <code>[2:8]</code> → set to expression "8"</li> 437 * <li>For <code>[:3]</code> → set to expression "3"</li> 438 * <li>For <code>[5]</code> → set to null (single element, not a slice)</li> 439 * </ul> 440 * 441 * @param upperSubscript the expression representing the upper bound, or null 442 * @see #getUpperSubscript() 443 */ 444 public void setUpperSubscript(TExpression upperSubscript) { 445 this.upperSubscript = upperSubscript; 446 } 447 448 /** 449 * Sets the attribute name for qualified name access operations. 450 * 451 * <p>This represents the field or property name in dot notation patterns: 452 * <ul> 453 * <li>For <code>.field_name</code> → set to "field_name"</li> 454 * <li>For <code>.*</code> → set to "*"</li> 455 * <li>For array access → should be null</li> 456 * </ul> 457 * 458 * @param attributeName the object name representing the field/property, or null for array access 459 * @see #getAttributeName() 460 */ 461 public void setAttributeName(TObjectName attributeName) { 462 this.attributeName = attributeName; 463 } 464 465 public TExpressionList getSubscriptList() { 466 return subscriptList; 467 } 468 469 private TExpressionList subscriptList; 470 471 472 public void addSubscript(TExpression expr){ 473 if (lowerSubscript == null){ 474 lowerSubscript = expr; 475 }else{ 476 if (subscriptList == null){ 477 subscriptList = new TExpressionList(); 478 } 479 subscriptList.addExpression(expr); 480 } 481 } 482 483 public static void addSubscript(ArrayList<TIndices> indicesArrayList,TExpression expr){ 484 indicesArrayList.get(indicesArrayList.size()-1).addSubscript(expr); 485 } 486 487 public void accept(TParseTreeVisitor v){ 488 v.preVisit(this); 489 v.postVisit(this); 490 } 491 492 public void acceptChildren(TParseTreeVisitor v){ 493 v.preVisit(this); 494 v.postVisit(this); 495 } 496 497}