/*
    This file is part of Msc-generator.
    Copyright (C) 2008-2023 Zoltan Turanyi
    Distributed under GNU Affero General Public License.

    Msc-generator is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    Msc-generator is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with Msc-generator.  If not, see <http://www.gnu.org/licenses/>.
*/
/** @file msc_parse_tools.cpp Utilities for parsing.
 * @ingroup libmscgen_files
 */

//We do not compile csh, so define the below macro as a no-op
#include "cgen_attribute.h"  //for case insensitive compares
#include "msc_parse_tools.h"

using namespace msc;

/** Converts 'emphasis' (a deprecated style name) to 'box' and emits warning if doing so.
 * We return 'box' or 'emptybox' if the input style name was 'emphasis' or
 * 'emptyemphasis' and generate a warning. Else we return `style`.*/
std::string_view msc::ConvertEmphasisToBox(std::string_view style, const FileLineCol &loc, MscChart &msc)
{
    if (style != "emphasis" && style != "emptyemphasis")
        return style;
    const std::string_view newname = style == "emphasis" ? "box" : "emptybox";
    msc.Error.Warning(loc, StrCat("Stylename '", style, "' is deprecated, using ", newname, " instead."));
    return newname;
}

/** Beautifies an error message generated by yacc.
 * The TOK_XXX names are substituted for more understandable values
 * that make more sense to the user.
 * @returns the beautified error message and (optionally) an
 * explanatory 'once' message.
 * @param [in] msg The message we need to beautify.
 * @param [in] last_token The source text of the token causing the error. */
std::pair<std::string, std::string>
msc::BeautifySyntaxError(std::string msg, std::string_view last_token) {
    static const std::pair<std::string_view, std::string_view> tokens[] = {
    {"unexpected", "Unexpected"}, {"TOK_REL_SOLID_TO", "'->'"}, {"TOK_REL_SOLID_FROM", "'<-'"},
    {"TOK_REL_SOLID_BIDIR", "'<->'"}, {"TOK_REL_DOUBLE_TO", "'=>'"}, {"TOK_REL_DOUBLE_FROM", "'<='"},
    {"TOK_REL_DOUBLE_BIDIR", "'<=>'"}, {"TOK_REL_DASHED_TO", "'>>'"}, {"TOK_REL_DASHED_FROM", "'<<'"},
    {"TOK_REL_DASHED_BIDIR", "'<<>>'"}, {"TOK_REL_DOTTED_TO", "'>'"}, {"TOK_REL_DOTTED_FROM", "'<'"},
    {"TOK_REL_DOTTED_BIDIR", "'<>'"}, {"TOK_EMPH", "'..', '--', '=='"}, {"TOK_SPECIAL_ARC", "'...', '---'"},
    {"syntax error, ", ""}, {", expecting $end", ""}, {"$end", "end of input"},
    {"TOK_OCBRACKET", "'{'"}, {"TOK_CCBRACKET", "'}'"}, {"TOK_OSBRACKET", "'['"},
    {"TOK_CSBRACKET", "']'"}, {"TOK_EQUAL", "'='"}, {"TOK_DASH", "'-'"},
    {"TOK_PLUS", "'+'"}, {"TOK_COMMA", "','"}, {"TOK_SEMICOLON", "';'"},
    {"TOK_MSC", "'msc'"}, {"TOK_BOOLEAN", "'yes', 'no'"}, {"TOK_COMMAND_HEADING", "'heading'"},
    {"TOK_COMMAND_NUDGE", "'nudge'"}, {"TOK_COMMAND_NEWPAGE", "'newpage'"}, {"TOK_COMMAND_DEFCOLOR", "'defcolor'"},
    {"TOK_COMMAND_DEFSTYLE", "'defstyle'"}, {"TOK_COMMAND_DEFDESIGN", "'defdesign'"}, {"TOK_COMMAND_BIG", "'block'"},
    {"TOK_COMMAND_PIPE", "'pipe'"}, {"TOK_COMMAND_MARK", "'mark'"}, {"TOK_COMMAND_PARALLEL", "'parallel'"},
    {"TOK_VERTICAL", "'vertical'"}, {"TOK_AT", "'at'"}, {"TOK_AT_POS", "'left', 'right' or 'center'"},
    {"TOK_SHOW", "'show'"}, {"TOK_HIDE", "'hide'"}, {"TOK_BYE", "'bye'"},
    {"TOK_COMMAND_VSPACE", "'vspace'"}, {"TOK_COMMAND_HSPACE", "'hspace'"}, {"TOK_COMMAND_SYMBOL", "'symbol'"},
    {"TOK_COMMAND_NOTE", "'note'"}, {"TOK_COMMAND_COMMENT", "'comment'"}, {"TOK_COLON_STRING", "':'"},  //just say colon to the user
    {"TOK_COLON_QUOTED_STRING", "':'"},  //just say colon to the user
    {"TOK_NUMBER", "number"}, {"TOK_STRING", "string"}, {"TOK_EStyleType::NAME", "style name"},
    {"TOK_QSTRING", "quoted string"}};

    const size_t tokArrayLen = sizeof(tokens) / sizeof(std::pair<std::string_view, std::string_view>);
    string once_msg;

    //replace tokens in string. We assume
    //-each toke appears only once
    //-replaced strings will not be mistaken for a a token
    for (unsigned i=0; i<tokArrayLen; i++) {
        string::size_type pos = 0;
        //Search for the current token
        pos = msg.find(tokens[i].first, pos);
        //continue if not found
        if (pos == string::npos) continue;
        //if msg continues with an uppercase letter or _ we are not matching
        char next = msg.c_str()[pos+tokens[i].first.length()];
        if ((next<='Z' && next>='A') || next == '_') continue;

        //Ok, token found, create substitution
        string ins(tokens[i].second);
        //special comment for unexpected symbols
        //special handling for numbers and strings
        if (i>tokArrayLen-5) {
            string::size_type exppos = msg.find("expecting");
            //If we replace what was unexpected, use actual token text
            if (pos < exppos) {
                if (i==tokArrayLen-1)
                    ins.append(": ").append(last_token);
                else
                    ins.append(": '").append(last_token).push_back('\'');
                if (i==tokArrayLen-2) {
                    string hint(last_token);
                    string::size_type pos2 = hint.find_first_not_of("abcdefghijklmnopqrstuvwxyz");
                    hint.insert(pos2," ");
                    once_msg = "Try splitting it with a space: '"+hint+"'.";
                }
            } else {
                ins = "a <" + ins + ">";
            }
        }
        //replace token
        msg.replace(pos, tokens[i].first.length(), ins);
    }
    string::size_type pos = msg.rfind("', '");
    if (pos != string::npos) {
        msg.erase(pos+1, 1);
        msg.insert(pos+1, " or");
    }
    msg.append(".");
    return {std::move(msg), std::move(once_msg)};
};


/** Preprocess multiline quoted strings.
 * We do all the following:
 * - skip heading and trailing quotation mark and whitespace inside
 * - Replace any internal CR or CRLF (and surrounding whitespaces) to "\n".
 * - Update `lex_loc` to point to the end of the token
 * - Insert \0x2(file,line,col,reason;...) escapes where needed we changed the length of the
 *   preceding string, so that if we generate an error to any escapes thereafter
 *   those will have the right location in the input file. To cater for multiple inclusions
 *   (e.g., procedure replays) we include a FileLineCol, which may be more than one level.
 * All the while take potential utf-8 characters into account: we count characters, not bytes in location.
 *
 * The function copies the result to new memory and the caller shall free().*/
std::string msc::process_multiline_qstring(std::string_view s, FileLineCol loc)
{
    std::string ret;
    const size_t original_line = loc.line;
    int old_pos = 1; //actually s begins with the quotation mark, we skip that
    loc.col += 1;

    while (1) {
        //the current line begins at old_pos
        int end_line = old_pos;
        int start_line = old_pos;
        //find the end of the line
        while (end_line < (int)s.size() && s[end_line] != 10 && s[end_line] != 13)
            end_line++;
        //store the ending char to later see how to proceed
        const char ending = (int)s.size() <= end_line ? 0 : s[end_line];
        //skip over the heading whitespace at the beginning of the line
        while (s[start_line]==' ' || s[start_line]=='\t')
            start_line++;
        //find the last non-whitespace in the line
        int term_line = end_line-1;
        //if we are at the very end, ignore trailing quotation mark
        if (ending==0) term_line--;
        //term_line can be smaller than start_line here if line is empty
        while (term_line>=start_line && (s[term_line]==' ' || s[term_line]=='\t'))
            term_line--;
        //Generate a \l(file,line,col) escape and append
        //We have only stepped over whitespace, no UTF-8 characters: we can just add "start_line-old_pos"
        ret += FileLineCol(loc, loc.line, loc.col + (start_line-old_pos)).Print();
        //now append the line (without the whitespaces)
        ret.append(s.substr(start_line, term_line+1-start_line));
        //if ending was a null we are done with processing all lines
        if (!ending) {
            //consider the utf-8 characters
            loc.col += UTF8len(s.substr(old_pos, end_line - old_pos -1));
            break;
        }
        //append ESCAPE_CHAR_SOFT_NEWLINE escape for msc-generator,
        //but only if not an empty first line
        //append "\" + ESCAPE_CHAR_SOFT_NEWLINE if line ended with odd number of \s
        if (start_line<=term_line || original_line != loc.line) {
            //add a space for empty lines, if line did not contain a comment
            if (start_line>term_line)
                ret += ' ';
            //test for how many \s we have at the end of the line
            int pp = (int)ret.length()-1;
            while (pp>=0 && ret[pp]=='\\') pp--;
            //if odd, we insert an extra '\' to keep lines ending with \s
            if ((ret.length()-pp)%2==0) ret += '\\';
            ret += "\\" ESCAPE_STRING_SOFT_NEWLINE;
        }
        //Check for a two character CRLF, skip over the LF, too
        if (ending == 13 && s[end_line+1] == 10) end_line++;
        old_pos = end_line+1;

        //Now advance loc
        loc.line++;
        loc.col = 1;
    }
    return ret;
}
