001package gudusoft.gsqlparser.parser.powerquery;
002
003/**
004 * Decodes Power Query M-language string escape sequences.
005 *
006 * <p>M uses a mix of embedded-quote doubling and {@code #(...)} character
007 * escapes.  The standard forms we must support:
008 *
009 * <pre>
010 *   ""             → literal "
011 *   #(lf)          → \n
012 *   #(cr)          → \r
013 *   #(cr,lf)       → \r\n
014 *   #(tab)         → \t
015 *   #(XXXX)        → Unicode codepoint (4 hex digits)
016 *   #(XXXXXXXX)    → Unicode codepoint (8 hex digits)
017 * </pre>
018 *
019 * <p>Any unrecognised {@code #(…)} sequence is preserved verbatim in the
020 * output — the plan's "never silent wrong lineage" rule applies: we'd
021 * rather leave a suspicious literal alone than silently produce a wrong
022 * SQL string.  Callers can detect such cases via {@link Result#hasWarnings()}.
023 */
024public final class PowerQueryEscapeDecoder {
025
026    private PowerQueryEscapeDecoder() {}
027
028    public static final class Result {
029        private final String decoded;
030        private final boolean hasWarnings;
031        private final String warning;
032
033        Result(String decoded, String warning) {
034            this.decoded = decoded;
035            this.hasWarnings = warning != null;
036            this.warning = warning;
037        }
038
039        public String getDecoded()  { return decoded; }
040        public boolean hasWarnings() { return hasWarnings; }
041        public String getWarning()  { return warning; }
042    }
043
044    /**
045     * Decode the raw text <em>inside</em> the surrounding double quotes
046     * of an M string literal.  Do not pass the surrounding quotes.
047     */
048    public static Result decode(String raw) {
049        if (raw == null) return new Result(null, null);
050        StringBuilder out = new StringBuilder(raw.length());
051        StringBuilder firstWarning = null;
052
053        int i = 0;
054        while (i < raw.length()) {
055            char c = raw.charAt(i);
056
057            if (c == '"' && i + 1 < raw.length() && raw.charAt(i + 1) == '"') {
058                out.append('"');
059                i += 2;
060                continue;
061            }
062
063            if (c == '#' && i + 1 < raw.length() && raw.charAt(i + 1) == '(') {
064                int close = raw.indexOf(')', i + 2);
065                if (close < 0) {
066                    out.append(c);
067                    i++;
068                    continue;
069                }
070                String body = raw.substring(i + 2, close).trim();
071                String expanded = expandEscapeBody(body);
072                if (expanded != null) {
073                    out.append(expanded);
074                } else {
075                    out.append(raw, i, close + 1);
076                    if (firstWarning == null) {
077                        firstWarning = new StringBuilder();
078                        firstWarning.append("Unrecognised M escape #(").append(body).append(")");
079                    }
080                }
081                i = close + 1;
082                continue;
083            }
084
085            out.append(c);
086            i++;
087        }
088        return new Result(out.toString(),
089                firstWarning == null ? null : firstWarning.toString());
090    }
091
092    private static String expandEscapeBody(String body) {
093        if (body == null || body.isEmpty()) return null;
094        String lower = body.toLowerCase(java.util.Locale.ROOT);
095
096        StringBuilder sb = new StringBuilder();
097        for (String part : splitTopLevelCommas(lower)) {
098            String trimmed = part.trim();
099            switch (trimmed) {
100                case "lf":  sb.append('\n'); break;
101                case "cr":  sb.append('\r'); break;
102                case "tab": sb.append('\t'); break;
103                default:
104                    if (looksLikeHex(trimmed)) {
105                        try {
106                            int cp = Integer.parseInt(trimmed, 16);
107                            sb.appendCodePoint(cp);
108                        } catch (NumberFormatException nfe) {
109                            return null;
110                        }
111                    } else {
112                        return null;
113                    }
114            }
115        }
116        return sb.toString();
117    }
118
119    private static boolean looksLikeHex(String s) {
120        if (s.isEmpty() || s.length() > 8) return false;
121        for (int i = 0; i < s.length(); i++) {
122            char c = s.charAt(i);
123            if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'))) return false;
124        }
125        return true;
126    }
127
128    private static java.util.List<String> splitTopLevelCommas(String s) {
129        java.util.List<String> out = new java.util.ArrayList<>();
130        int start = 0;
131        for (int i = 0; i < s.length(); i++) {
132            if (s.charAt(i) == ',') {
133                out.add(s.substring(start, i));
134                start = i + 1;
135            }
136        }
137        out.add(s.substring(start));
138        return out;
139    }
140}