001package gudusoft.gsqlparser.ir.builder.oracle;
002
003import java.util.Collections;
004import java.util.HashSet;
005import java.util.Set;
006
007/**
008 * Classifies Oracle built-in functions, system packages, and other
009 * external dependencies that should NOT create virtual nodes in the call graph.
010 * <p>
011 * Instead of creating virtual nodes for these well-known Oracle constructs,
012 * they are marked as external dependencies ({@code ExternalCallEdge}),
013 * keeping the call graph focused on user-defined code entities.
014 * <p>
015 * Classification types:
016 * <ul>
017 *   <li>{@code BUILTIN_FUNCTION} — scalar built-in functions (NVL, TO_CHAR, etc.)</li>
018 *   <li>{@code AGGREGATE_FUNCTION} — aggregate functions (SUM, COUNT, etc.)</li>
019 *   <li>{@code CONVERSION_FUNCTION} — type conversion functions (TO_NUMBER, CAST, etc.)</li>
020 *   <li>{@code SYSTEM_PACKAGE} — DBMS_*, UTL_*, SYS.* package calls</li>
021 *   <li>{@code SYSTEM_FUNCTION} — system utility functions (RAISE_APPLICATION_ERROR, etc.)</li>
022 * </ul>
023 */
024public final class OracleExternalDepClassifier {
025
026    private OracleExternalDepClassifier() {}
027
028    // ---- Built-in function sets ----
029
030    private static final Set<String> STRING_FUNCTIONS;
031    private static final Set<String> NUMERIC_FUNCTIONS;
032    private static final Set<String> DATE_FUNCTIONS;
033    private static final Set<String> CONVERSION_FUNCTIONS;
034    private static final Set<String> NULL_FUNCTIONS;
035    private static final Set<String> AGGREGATE_FUNCTIONS;
036    private static final Set<String> ANALYTIC_FUNCTIONS;
037    private static final Set<String> SYSTEM_FUNCTIONS;
038    private static final Set<String> LOB_FUNCTIONS;
039    private static final Set<String> XML_FUNCTIONS;
040    private static final Set<String> JSON_FUNCTIONS;
041    private static final Set<String> COLLECTION_FUNCTIONS;
042    private static final Set<String> MISC_FUNCTIONS;
043
044    /** Known system package prefixes (uppercase). */
045    private static final Set<String> SYSTEM_PACKAGE_PREFIXES;
046
047    /** Security-sensitive external dependencies. */
048    private static final Set<String> SECURITY_SENSITIVE;
049
050    /** Combined set of all built-in function names (uppercase). */
051    private static final Set<String> ALL_BUILTINS;
052
053    static {
054        STRING_FUNCTIONS = unmodifiableSet(
055                "ASCII", "CHR", "CONCAT", "INITCAP", "INSTR", "INSTRB", "INSTRC",
056                "LENGTH", "LENGTHB", "LENGTHC", "LOWER", "LPAD", "LTRIM",
057                "NLS_INITCAP", "NLS_LOWER", "NLS_UPPER", "NLSSORT",
058                "REGEXP_COUNT", "REGEXP_INSTR", "REGEXP_REPLACE", "REGEXP_SUBSTR",
059                "REPLACE", "REVERSE", "RPAD", "RTRIM", "SOUNDEX",
060                "SUBSTR", "SUBSTRB", "SUBSTRC", "TRANSLATE", "TRIM",
061                "UPPER", "DUMP", "VSIZE"
062        );
063
064        NUMERIC_FUNCTIONS = unmodifiableSet(
065                "ABS", "ACOS", "ASIN", "ATAN", "ATAN2", "BITAND",
066                "CEIL", "COS", "COSH", "EXP", "FLOOR", "LN", "LOG",
067                "MOD", "NANVL", "POWER", "REMAINDER", "ROUND",
068                "SIGN", "SIN", "SINH", "SQRT", "TAN", "TANH",
069                "TRUNC", "WIDTH_BUCKET"
070        );
071
072        DATE_FUNCTIONS = unmodifiableSet(
073                "ADD_MONTHS", "CURRENT_DATE", "CURRENT_TIMESTAMP",
074                "DBTIMEZONE", "EXTRACT", "FROM_TZ", "LAST_DAY",
075                "LOCALTIMESTAMP", "MONTHS_BETWEEN", "NEW_TIME",
076                "NEXT_DAY", "NUMTODSINTERVAL", "NUMTOYMINTERVAL",
077                "ROUND", "SESSIONTIMEZONE", "SYSDATE", "SYSTIMESTAMP",
078                "TO_DSINTERVAL", "TO_YMINTERVAL",
079                "TRUNC", "TZ_OFFSET"
080        );
081
082        CONVERSION_FUNCTIONS = unmodifiableSet(
083                "ASCIISTR", "BIN_TO_NUM", "CAST", "CHARTOROWID",
084                "COMPOSE", "CONVERT", "DECOMPOSE",
085                "HEXTORAW", "NUMTODSINTERVAL", "NUMTOYMINTERVAL",
086                "RAWTOHEX", "RAWTONHEX", "ROWIDTOCHAR", "ROWIDTONCHAR",
087                "SCN_TO_TIMESTAMP", "TIMESTAMP_TO_SCN",
088                "TO_BINARY_DOUBLE", "TO_BINARY_FLOAT",
089                "TO_BLOB", "TO_CHAR", "TO_CLOB", "TO_DATE",
090                "TO_DSINTERVAL", "TO_LOB", "TO_MULTI_BYTE",
091                "TO_NCHAR", "TO_NCLOB", "TO_NUMBER",
092                "TO_SINGLE_BYTE", "TO_TIMESTAMP",
093                "TO_TIMESTAMP_TZ", "TO_YMINTERVAL",
094                "TREAT", "UNISTR", "VALIDATE_CONVERSION"
095        );
096
097        NULL_FUNCTIONS = unmodifiableSet(
098                "COALESCE", "DECODE", "GREATEST", "LEAST",
099                "LNNVL", "NULLIF", "NVL", "NVL2"
100        );
101
102        AGGREGATE_FUNCTIONS = unmodifiableSet(
103                "AVG", "COLLECT", "CORR", "COUNT",
104                "COVAR_POP", "COVAR_SAMP", "CUME_DIST",
105                "DENSE_RANK", "FIRST", "GROUP_ID",
106                "GROUPING", "GROUPING_ID", "LAST",
107                "LISTAGG", "MAX", "MEDIAN", "MIN",
108                "PERCENT_RANK", "PERCENTILE_CONT", "PERCENTILE_DISC",
109                "RANK", "REGR_AVGX", "REGR_AVGY", "REGR_COUNT",
110                "REGR_INTERCEPT", "REGR_R2", "REGR_SLOPE",
111                "REGR_SXX", "REGR_SXY", "REGR_SYY",
112                "STATS_BINOMIAL_TEST", "STATS_CROSSTAB",
113                "STATS_F_TEST", "STATS_KS_TEST", "STATS_MODE",
114                "STATS_MW_TEST", "STATS_ONE_WAY_ANOVA",
115                "STATS_T_TEST_ONE", "STATS_T_TEST_PAIRED",
116                "STATS_T_TEST_INDEP", "STATS_T_TEST_INDEPU",
117                "STATS_WSR_TEST", "STDDEV", "STDDEV_POP",
118                "STDDEV_SAMP", "SUM", "VAR_POP", "VAR_SAMP",
119                "VARIANCE", "WM_CONCAT", "XMLAGG"
120        );
121
122        ANALYTIC_FUNCTIONS = unmodifiableSet(
123                "CUME_DIST", "DENSE_RANK", "FIRST_VALUE",
124                "LAG", "LAST_VALUE", "LEAD", "LISTAGG",
125                "NTH_VALUE", "NTILE", "PERCENT_RANK",
126                "RANK", "RATIO_TO_REPORT", "ROW_NUMBER"
127        );
128
129        SYSTEM_FUNCTIONS = unmodifiableSet(
130                "RAISE_APPLICATION_ERROR",
131                "SYS_CONTEXT", "SYS_GUID", "SYS_TYPEID",
132                "UID", "USER", "USERENV",
133                "ORA_HASH", "STANDARD_HASH",
134                "SYS_EXTRACT_UTC", "SYS_XMLGEN", "SYS_XMLAGG",
135                "DBMS_ASSERT", "SQLCODE", "SQLERRM"
136        );
137
138        LOB_FUNCTIONS = unmodifiableSet(
139                "BFILENAME", "EMPTY_BLOB", "EMPTY_CLOB",
140                "TO_BLOB", "TO_CLOB", "TO_NCLOB"
141        );
142
143        XML_FUNCTIONS = unmodifiableSet(
144                "XMLELEMENT", "XMLFOREST", "XMLAGG", "XMLROOT",
145                "XMLPARSE", "XMLPI", "XMLQUERY", "XMLSERIALIZE",
146                "XMLTABLE", "XMLCAST", "XMLEXISTS",
147                "XMLCOLATTVAL", "XMLCONCAT", "XMLSEQUENCE",
148                "EXTRACTVALUE", "EXISTSNODE", "UPDATEXML",
149                "DELETEXML", "INSERTCHILDXML", "INSERTXMLBEFORE",
150                "APPENDCHILDXML"
151        );
152
153        JSON_FUNCTIONS = unmodifiableSet(
154                "JSON_VALUE", "JSON_QUERY", "JSON_TABLE",
155                "JSON_EXISTS", "JSON_OBJECT", "JSON_ARRAY",
156                "JSON_OBJECTAGG", "JSON_ARRAYAGG",
157                "JSON_SERIALIZE", "JSON_MERGEPATCH"
158        );
159
160        COLLECTION_FUNCTIONS = unmodifiableSet(
161                "CARDINALITY", "COLLECT", "MULTISET",
162                "POWERMULTISET", "POWERMULTISET_BY_CARDINALITY",
163                "SET", "TABLE"
164        );
165
166        MISC_FUNCTIONS = unmodifiableSet(
167                "CURSOR", "DEREF", "LNNVL", "MAKE_REF",
168                "REF", "REFTOHEX", "VALUE",
169                "APPROX_COUNT_DISTINCT", "APPROX_PERCENTILE",
170                "ANY_VALUE", "FEATURE_COMPARE",
171                "PREDICTION", "PREDICTION_COST",
172                "CLUSTER_ID", "CLUSTER_PROBABILITY"
173        );
174
175        // System package prefixes — any call matching "PREFIX.xxx" is a system package call
176        SYSTEM_PACKAGE_PREFIXES = unmodifiableSet(
177                "DBMS_OUTPUT", "DBMS_SQL", "DBMS_LOB", "DBMS_UTILITY",
178                "DBMS_METADATA", "DBMS_RANDOM", "DBMS_CRYPTO",
179                "DBMS_XMLGEN", "DBMS_SESSION", "DBMS_LOCK",
180                "DBMS_JOB", "DBMS_SCHEDULER", "DBMS_APPLICATION_INFO",
181                "DBMS_PIPE", "DBMS_ALERT", "DBMS_AQ", "DBMS_AQADM",
182                "DBMS_RESOURCE_MANAGER", "DBMS_STATS",
183                "DBMS_ROWID", "DBMS_SPACE", "DBMS_MVIEW",
184                "DBMS_PARALLEL_EXECUTE", "DBMS_PROFILER",
185                "DBMS_REDEFINITION", "DBMS_REPAIR",
186                "DBMS_RESUMABLE", "DBMS_SERVICE",
187                "DBMS_SHARED_POOL", "DBMS_SPACE_ADMIN",
188                "DBMS_SQLTUNE", "DBMS_TRANSACTION",
189                "DBMS_TYPES", "DBMS_WARNING",
190                "DBMS_XDB", "DBMS_XMLDOM", "DBMS_XMLPARSER",
191                "DBMS_XMLQUERY", "DBMS_XMLSAVE",
192                "DBMS_DEBUG", "DBMS_DESCRIBE",
193                "DBMS_DDL", "DBMS_ERRLOG",
194                "DBMS_FLASHBACK", "DBMS_FGA",
195                "DBMS_HS_PASSTHROUGH", "DBMS_IOT",
196                "DBMS_JAVA", "DBMS_LDAP",
197                "DBMS_LIBCACHE", "DBMS_LOGMNR",
198                "DBMS_LOGSTDBY", "DBMS_MONITOR",
199                "DBMS_OBFUSCATION_TOOLKIT",
200                "DBMS_OUTLN", "DBMS_PCLXUTIL",
201                "DBMS_PICKLER", "DBMS_PREPROCESSOR",
202                "DBMS_PRIVILEGE_CAPTURE",
203                "DBMS_RESULT_CACHE", "DBMS_RLS",
204                "DBMS_RULE", "DBMS_RULE_ADM",
205                "DBMS_SYS_SQL", "DBMS_TRACE",
206                "DBMS_TTS", "DBMS_WORKLOAD_REPOSITORY",
207                "DBMS_XPLAN", "DBMS_ASSERT",
208                "UTL_FILE", "UTL_HTTP", "UTL_RAW",
209                "UTL_SMTP", "UTL_TCP", "UTL_URL",
210                "UTL_COMPRESS", "UTL_ENCODE",
211                "UTL_I18N", "UTL_INADDR", "UTL_LMS",
212                "UTL_MATCH", "UTL_NLA", "UTL_RECOMP",
213                "UTL_REF", "UTL_SPADV",
214                "HTL_FILE", "HTP", "HTF", "OWA_UTIL",
215                "CTX_DDL", "CTX_DOC", "CTX_QUERY",
216                "CTX_THES", "CTX_REPORT",
217                "SDO_GEOM", "SDO_UTIL", "SDO_CS",
218                "APEX_UTIL", "APEX_ITEM", "APEX_APPLICATION",
219                "STANDARD", "SYS"
220        );
221
222        SECURITY_SENSITIVE = unmodifiableSet(
223                "DBMS_SYS_SQL", "UTL_FILE", "UTL_HTTP",
224                "UTL_TCP", "UTL_SMTP", "DBMS_PIPE",
225                "DBMS_HS_PASSTHROUGH", "DBMS_JAVA",
226                "DBMS_SCHEDULER"
227        );
228
229        // Combine all scalar/function names into a single lookup set
230        Set<String> all = new HashSet<String>();
231        all.addAll(STRING_FUNCTIONS);
232        all.addAll(NUMERIC_FUNCTIONS);
233        all.addAll(DATE_FUNCTIONS);
234        all.addAll(CONVERSION_FUNCTIONS);
235        all.addAll(NULL_FUNCTIONS);
236        all.addAll(AGGREGATE_FUNCTIONS);
237        all.addAll(ANALYTIC_FUNCTIONS);
238        all.addAll(SYSTEM_FUNCTIONS);
239        all.addAll(LOB_FUNCTIONS);
240        all.addAll(XML_FUNCTIONS);
241        all.addAll(JSON_FUNCTIONS);
242        all.addAll(COLLECTION_FUNCTIONS);
243        all.addAll(MISC_FUNCTIONS);
244        ALL_BUILTINS = Collections.unmodifiableSet(all);
245    }
246
247    /**
248     * Returns true if the given call name is a known Oracle external dependency
249     * (built-in function, system package call, etc.).
250     * <p>
251     * For qualified names like "DBMS_OUTPUT.PUT_LINE", checks the package prefix.
252     * For unqualified names like "NVL", checks the built-in function set.
253     */
254    public static boolean isExternal(String name) {
255        if (name == null || name.isEmpty()) return false;
256        String upper = name.toUpperCase().trim();
257
258        // Check for system package prefix (e.g., DBMS_OUTPUT.PUT_LINE, UTL_FILE.FOPEN)
259        int dot = upper.indexOf('.');
260        if (dot > 0) {
261            String prefix = upper.substring(0, dot);
262            if (SYSTEM_PACKAGE_PREFIXES.contains(prefix)) return true;
263            // Any DBMS_* or UTL_* prefix pattern
264            if (prefix.startsWith("DBMS_") || prefix.startsWith("UTL_")) return true;
265            // SYS schema prefix
266            if (prefix.equals("SYS")) return true;
267        }
268
269        // Check unqualified name against all built-in functions
270        // Use the last part for qualified names (handles schema-qualified calls)
271        String objectName = dot >= 0 ? upper.substring(dot + 1) : upper;
272        return ALL_BUILTINS.contains(objectName);
273    }
274
275    /**
276     * Classifies the external dependency type.
277     *
278     * @return classification string (e.g., "BUILTIN_FUNCTION", "SYSTEM_PACKAGE")
279     */
280    public static String classify(String name) {
281        if (name == null) return "BUILTIN_FUNCTION";
282        String upper = name.toUpperCase().trim();
283
284        // Check for system package prefix first
285        int dot = upper.indexOf('.');
286        if (dot > 0) {
287            String prefix = upper.substring(0, dot);
288            if (SYSTEM_PACKAGE_PREFIXES.contains(prefix)
289                    || prefix.startsWith("DBMS_")
290                    || prefix.startsWith("UTL_")
291                    || prefix.equals("SYS")) {
292                return "SYSTEM_PACKAGE";
293            }
294        }
295
296        String objectName = dot >= 0 ? upper.substring(dot + 1) : upper;
297
298        if (AGGREGATE_FUNCTIONS.contains(objectName)) return "AGGREGATE_FUNCTION";
299        if (ANALYTIC_FUNCTIONS.contains(objectName)) return "ANALYTIC_FUNCTION";
300        if (CONVERSION_FUNCTIONS.contains(objectName)) return "CONVERSION_FUNCTION";
301        if (SYSTEM_FUNCTIONS.contains(objectName)) return "SYSTEM_FUNCTION";
302        return "BUILTIN_FUNCTION";
303    }
304
305    /**
306     * Returns true if the name is security-sensitive (audit-worthy).
307     * These are typically Oracle packages that provide file I/O, network access,
308     * or OS command execution capabilities.
309     */
310    public static boolean isSecuritySensitive(String name) {
311        if (name == null) return false;
312        String upper = name.toUpperCase().trim();
313        int dot = upper.indexOf('.');
314        if (dot > 0) {
315            String prefix = upper.substring(0, dot);
316            return SECURITY_SENSITIVE.contains(prefix);
317        }
318        return SECURITY_SENSITIVE.contains(upper);
319    }
320
321    private static Set<String> unmodifiableSet(String... items) {
322        Set<String> set = new HashSet<String>();
323        for (String item : items) {
324            set.add(item);
325        }
326        return Collections.unmodifiableSet(set);
327    }
328}