/* $Id: Suspect_rule.cpp 655340 2022-09-01 12:56:53Z fukanchi $
 * ===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 * ===========================================================================
 *
 * Author:  J. Chen
 *
 * File Description:
 *   suspect product name check against rule
 *
 * Remark:
 *   This code was originally generated by application DATATOOL
 *   using the following specifications:
 *   'macro.asn'.
 */


#include <ncbi_pch.hpp>
#include <objects/macro/Suspect_rule.hpp>
#include <util/compile_time.hpp>

BEGIN_NCBI_SCOPE
BEGIN_objects_SCOPE // namespace ncbi::objects::


static bool IsStringConstraintEmpty(const CString_constraint* constraint)
{
    if (!constraint) {
        return true;
    }
    if (constraint->GetIs_all_caps() || constraint->GetIs_all_lower()|| constraint->GetIs_all_punct()) {
        return false;
    }
    if (!constraint->CanGetMatch_text() || constraint->GetMatch_text().empty()) {
        return true;
    }
    return false;
};


static const string SkipWeasel(const string& str)
{
    static constexpr std::array<string_view, 10> weasels = {
        "candidate",
        "hypothetical",
        "novel",
        "possible",
        "potential",
        "predicted,"
        "probable",
        "putative",
        "uncharacterized",
        "unique",
    };

    if (str.empty()) {
        return kEmptyStr;
    }
    string ret_str;
    vector <string> arr;
    arr = NStr::Split(str, " ", arr, 0);
    if (arr.size() == 1) {
        return str;
    }
    int i;
    unsigned len, len_w;
    bool find_w;
    for (i=0; i< (int)(arr.size() - 1); i++) {
        len = arr[i].size();
        find_w = false;
        for(auto& it: weasels) {
            len_w = it.size();
            if (len != len_w || !NStr::EqualNocase(arr[i], 0, len, it)) {
                continue;
            }
            else {
                find_w = true;
                break;
            }
        }
        if (!find_w) {
            break;
        }
    }
    for ( ; i< (int)(arr.size()-1); i++) {
        ret_str += arr[i] + ' ';
    }
    ret_str += arr[arr.size()-1];
    return (ret_str);
}


bool IsAllCaps(const string& str)
{
    string up_str = str;
    //if (up_str.find_first_not_of(alpha_str) != string::npos) return false;
    up_str = NStr::ToUpper(up_str);
    if (up_str == str) return true;
    else return false;
}


bool IsAllLowerCase(const string& str)
{
    string low_str = str;
    //if (low_str.find_first_not_of(alpha_str) != string::npos) return false;
    low_str = NStr::ToLower(low_str);
    if (low_str == str) return true;
    else return false;
}


bool IsAllPunctuation(const string& str)
{
    for (unsigned i=0; i< str.size(); i++) {
        if (!ispunct(str[i])) return false;
    }
    return true;
}


static bool CaseNCompareEqual(string str1, string str2, unsigned len1, bool case_sensitive)
{
    if (!len1) {
        return false;
    }
    string comp_str1, comp_str2;
    comp_str1 = str1.substr(0, len1);
    comp_str2 = str2.substr(0, len1);
    if (case_sensitive) {
        return (comp_str1 == comp_str2);
    }
    else {
        return (NStr::EqualNocase(comp_str1, 0, len1, comp_str2));
    }
};


static bool AdvancedStringCompare(const string& str, const string& str_match, const CString_constraint* str_cons, bool is_start, unsigned* ini_target_match_len = 0)
{
    if (!str_cons) {
        return true;
    }

    size_t pos_match = 0, pos_str = 0;
    bool wd_case, whole_wd, word_start_m, word_start_s;
    bool match = true, recursive_match = false;
    unsigned len_m = str_match.size(), len_s = str.size(), target_match_len=0;
    string cp_m, cp_s;
    bool ig_space = str_cons->GetIgnore_space();
    bool ig_punct = str_cons->GetIgnore_punct();
    bool str_case = str_cons->GetCase_sensitive();
    EString_location loc = str_cons->GetMatch_location();
    unsigned len1, len2;
    char ch1, ch2;
    vector <string> word_word;
    bool has_word = !(str_cons->GetIgnore_words().Get().empty());
    string strtmp;
    ITERATE (list <CRef <CWord_substitution> >, it, str_cons->GetIgnore_words().Get()) {
        strtmp = ((*it)->CanGetWord()) ? (*it)->GetWord() : kEmptyStr;
        word_word.push_back(strtmp);
    }

    unsigned i;
    while (match && pos_match < len_m && pos_str < len_s && !recursive_match) {
        cp_m = str_match.substr(pos_match);
        cp_s = str.substr(pos_str);

        /* first, check to see if we're skipping synonyms */
        i=0;
        if (has_word) {
            ITERATE (list <CRef <CWord_substitution> >, it, str_cons->GetIgnore_words().Get()) {
                wd_case = (*it)->GetCase_sensitive();
                whole_wd = (*it)->GetWhole_word();
                len1 = word_word[i].size();
                //text match
                if (CaseNCompareEqual(word_word[i++], cp_m, len1, wd_case)) {
                    word_start_m = (!pos_match && is_start) || !isalpha(str_match[pos_match - 1]);
                    ch1 = (cp_m.size() <= len1) ? ' ' : cp_m[len1];

                    // whole word mch
                    if (!whole_wd || (!isalpha(ch1) && word_start_m)) {
                        if ( !(*it)->CanGetSynonyms() || (*it)->GetSynonyms().empty()) {
                            if (AdvancedStringCompare(cp_s, cp_m.substr(len1), str_cons, word_start_m, &target_match_len)) {
                                recursive_match = true;
                                break;
                            }
                        }
                        else {
                            ITERATE (list <string>, sit, (*it)->GetSynonyms()) {
                                len2 = (*sit).size();

                                // text match
                                if (CaseNCompareEqual(*sit, cp_s, len2, wd_case)) {
                                    word_start_s = (!pos_str && is_start) || !isalpha(str[pos_str - 1]);
                                    ch2 = (cp_s.size() <= len2) ? ' ' : cp_s[len2];
                                    // whole word match
                                    if (!whole_wd || (!isalpha(ch2) && word_start_s)) {
                                        if (AdvancedStringCompare(cp_s.substr(len2), cp_m.substr(len1), str_cons, word_start_m & word_start_s, &target_match_len)) {
                                            recursive_match = true;
                                            break;
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

        if (!recursive_match) {
            if (CaseNCompareEqual(cp_m, cp_s, 1, str_case)) {
                pos_match++;
                pos_str++;
                target_match_len++;
            }
            else if ( ig_space && (isspace(cp_m[0]) || isspace(cp_s[0])) ) {
                if (isspace(cp_m[0])) {
                    pos_match++;
                }
                if (isspace(cp_s[0])) {
                    pos_str++;
                    target_match_len++;
                }
            }
            else if (ig_punct && ( ispunct(cp_m[0]) || ispunct(cp_s[0]) )) {
                if (ispunct(cp_m[0])) {
                    pos_match++;
                }
                if (ispunct(cp_s[0])) {
                    pos_str++;
                    target_match_len++;
                }
            }
            else {
                match = false;
            }
        }
    }

    if (match && !recursive_match) {
        while (pos_str < str.size() && ((ig_space && isspace(str[pos_str])) || (ig_punct && ispunct(str[pos_str])))) {
            pos_str++;
            target_match_len++;
        }
        while (pos_match < str_match.size() && ((ig_space && isspace(str_match[pos_match])) || (ig_punct && ispunct(str_match[pos_match])))) {
            pos_match++;
        }

        if (pos_match < str_match.size()) {
            match = false;
        }
        else if ((loc == eString_location_ends || loc == eString_location_equals) && pos_str < len_s) {
            match = false;
        }
        else if (str_cons->GetWhole_word() && (!is_start || (pos_str < len_s && isalpha (str[pos_str])))) {
            match = false;
        }
    }
    if (match && ini_target_match_len) {
        *ini_target_match_len += target_match_len;
    }
    return match;
}


static bool AdvancedStringMatch(const string& str, const CString_constraint* str_cons)
{
    if (!str_cons) {
        return true;
    }
    bool rval = false;
    string match_text = str_cons->CanGetMatch_text() ? str_cons->GetMatch_text() : kEmptyStr;

    if (AdvancedStringCompare(str, match_text, str_cons, true)) {
        return true;
    }
    else if (str_cons->GetMatch_location() == eString_location_starts || str_cons->GetMatch_location() == eString_location_equals) {
        return false;
    }
    else {
        size_t pos = 1;
        unsigned len = str.size();
        while (!rval && pos < len) {
            if (str_cons->GetWhole_word()) {
                while (pos < len && isalpha (str[pos-1])) pos++;
            }
            if (pos < len) {
                if (AdvancedStringCompare(str.substr(pos), match_text, str_cons, true)) rval = true;
                else pos++;
            }
        }
    }
    return rval;
}


static bool DisallowCharacter(const char ch, bool disallow_slash)
{
    if (isalpha(ch) || isdigit(ch) || ch == '_' || ch == '-') {
        return true;
    }
    else if (disallow_slash && ch == '/') {
        return true;
    }
    return false;
}


static string StripUnimportantCharacters(const string& str, bool strip_space, bool strip_punct)
{
    if (str.empty()) {
        return kEmptyStr;
    }
    string result;
    result.reserve(str.size());
    string::const_iterator it = str.begin();
    do {
        if ((strip_space && isspace(*it)) || (strip_punct && ispunct(*it))) {
        }
        else {
            result += *it;
        }
    } while (++it != str.end());

    return result;
}


static bool IsWholeWordMatch(const string& start, const size_t& found, const unsigned& match_len, bool disallow_slash = false)
{
    bool rval = true;
    unsigned after_idx;

    if (!match_len) {
        rval = true;
    }
    else if (start.empty() || found == string::npos) {
        rval = false;
    }
    else {
        if (found) {
            if (DisallowCharacter(start[found-1], disallow_slash)) {
                return false;
            }
        }
        after_idx = found + match_len;
        if (after_idx < start.size() && DisallowCharacter(start[after_idx], disallow_slash)) {
            rval = false;
        }
    }
    return rval;
}


static bool GetSpanFromHyphenInString(const string& str, const size_t& hyphen, string& first, string& second)
{
    if (!hyphen) {
        return false;
    }

    /* find range start */
    size_t cp = str.substr(0, hyphen-1).find_last_not_of(' ');
    if (cp != string::npos) {
        cp = str.substr(0, cp).find_last_not_of(" ,;");
    }
    if (cp == string::npos) {
        cp = 0;
    }

    unsigned len = hyphen - cp;
    first = str.substr(cp, len);
    NStr::TruncateSpacesInPlace(first);

    /* find range end */
    cp = str.find_first_not_of(' ', hyphen+1);
    if (cp != string::npos) {
        cp = str.find_first_not_of(" ,;");
    }
    if (cp == string::npos) {
        cp = str.size() -1;
    }

    len = cp - hyphen;
    if (!isspace (str[cp])) {
        len--;
    }
    second = str.substr(hyphen+1, len);
    NStr::TruncateSpacesInPlace(second);

    bool rval = true;
    if (first.empty() || second.empty()) {
        rval = false;
    }
    else if (!isdigit (first[first.size() - 1]) || !isdigit (second[second.size() - 1])) {
        /* if this is a span, then neither end point can end with anything other than a number */
        rval = false;
    }
    if (!rval) {
        first = second = kEmptyStr;
    }
    return rval;
}


static bool StringIsPositiveAllDigits(const string& str)
{
    if (str.find_first_not_of(digit_str) != string::npos) {
        return false;
    }
    return true;
};


static bool IsStringInSpan(const string& str, const string& first, const string& second)
{
    string new_first, new_second, new_str;
    if (str.empty()) {
        return false;
    }
    else if (str == first || str == second) {
        return true;
    }
    else if (first.empty() || second.empty()) {
        return false;
    }

    int str_num, first_num, second_num;
    str_num = first_num = second_num = 0;
    bool rval = false;
    size_t prefix_len;
    string comp_str1, comp_str2;
    if (StringIsPositiveAllDigits(first)) {
        if (StringIsPositiveAllDigits (str) && StringIsPositiveAllDigits (second)) {
            str_num = NStr::StringToUInt (str);
            first_num = NStr::StringToUInt (first);
            second_num = NStr::StringToUInt (second);
            if ((str_num > first_num && str_num < second_num) || (str_num > second_num && str_num < first_num)) {
                rval = true;
            }
        }
    }
    else if (StringIsPositiveAllDigits(second)) {
        prefix_len = first.find_first_of(digit_str) + 1;

        new_str = str.substr(prefix_len - 1);
        new_first = first.substr(prefix_len - 1);
        comp_str1 = str.substr(0, prefix_len);
        comp_str2 = first.substr(0, prefix_len);
        if (comp_str1 == comp_str2 && StringIsPositiveAllDigits (new_str) && StringIsPositiveAllDigits (new_first)) {
            first_num = NStr::StringToUInt(new_first);
            second_num = NStr::StringToUInt (second);
            str_num = NStr::StringToUInt (str);
            if ((str_num > first_num && str_num < second_num) || (str_num > second_num && str_num < first_num)) {
                rval = true;
            }
        }
    }
    else {
        /* determine length of prefix */
        prefix_len = 0;
        while (prefix_len < first.size() && prefix_len < second.size() && first[prefix_len] == second[prefix_len]) {
            prefix_len ++;
        }
        prefix_len ++;

        comp_str1 = str.substr(0, prefix_len);
        comp_str2 = first.substr(0, prefix_len);
        if (prefix_len <= first.size() && prefix_len <= second.size() && isdigit (first[prefix_len-1]) && isdigit (second[prefix_len-1]) && comp_str1 == comp_str2) {
            new_first = first.substr(prefix_len);
            new_second = second.substr(prefix_len);
            new_str = str.substr(prefix_len);
            if (StringIsPositiveAllDigits (new_first) && StringIsPositiveAllDigits (new_second) && StringIsPositiveAllDigits (new_str)) {
                first_num = NStr::StringToUInt(new_first);
                second_num = NStr::StringToUInt (new_second);
                str_num = NStr::StringToUInt (new_str);
                if ((str_num > first_num && str_num < second_num) || (str_num > second_num && str_num < first_num)) {
                    rval = true;
                }
            }
            else {
                /* determine whether there is a suffix */
                size_t idx1, idx2, idx_str;
                string suf1, suf2, sub_str;
                idx1 = first.find_first_not_of(digit_str);
                suf1 = first.substr(prefix_len + idx1);
                idx2 = second.find_first_not_of(digit_str);
                suf2 = second.substr(prefix_len + idx2);
                idx_str = str.find_first_not_of(digit_str);
                sub_str = str.substr(prefix_len + idx_str);
                if (suf1 == suf2 && suf1 == sub_str) {
                    /* suffixes match */
                    first_num = NStr::StringToUInt(CTempString(first).substr(prefix_len, idx1));
                    second_num = NStr::StringToUInt(CTempString(second).substr(prefix_len, idx2));
                    str_num = NStr::StringToUInt(CTempString(str).substr(prefix_len, idx_str));
                    if ((str_num > first_num && str_num < second_num) || (str_num > second_num && str_num < first_num)) {
                        rval = true;
                    }
                }
            }
        }
    }
    return rval;
}


static bool IsStringInSpanInList(const string& str, const string& list)
{
    if (list.empty() || str.empty()) {
        return false;
    }

    size_t idx = str.find_first_not_of(alpha_str);
    if (idx == string::npos) {
        return false;
    }

    idx = str.substr(idx).find_first_not_of(digit_str);

    /* find ranges */
    size_t hyphen = list.find('-');
    bool rval = false;
    string range_start, range_end;
    while (hyphen != string::npos && !rval) {
        if (!hyphen) {
            hyphen = list.substr(1).find('-');
        }
        else {
            if (GetSpanFromHyphenInString(list, hyphen, range_start, range_end)) {
                if (IsStringInSpan(str, range_start, range_end)) {
                    rval = true;
                }
            }
            hyphen = list.find('-', hyphen + 1);
        }
    }
    return rval;
}


static bool DoesSingleStringMatchConstraint(const string& str, const CString_constraint* str_cons)
{
    bool rval = false;
    string tmp_match;
    CString_constraint tmp_cons;

    string this_str(str);
    if (!str_cons) {
        return true;
    }
    if (str.empty()) {
        return false;
    }
    if (IsStringConstraintEmpty(str_cons)) {
        rval = true;
    }
    else {
        if (str_cons->GetIgnore_weasel()) {
            this_str = SkipWeasel(str);
        }
        if (str_cons->GetIs_all_caps() && !IsAllCaps(this_str)) {
            rval = false;
        }
        else if (str_cons->GetIs_all_lower() && !IsAllLowerCase(this_str)) {
            rval = false;
        }
        else if (str_cons->GetIs_all_punct() && !IsAllPunctuation(this_str)) {
            rval = false;
        }
        else if (!str_cons->CanGetMatch_text() ||str_cons->GetMatch_text().empty()) {
            rval = true;
        }
        else {
            tmp_cons.Assign(*str_cons);
            tmp_match = tmp_cons.CanGetMatch_text() ? tmp_cons.GetMatch_text() : kEmptyStr;
            if (str_cons->GetIgnore_weasel()) {
                tmp_cons.SetMatch_text(SkipWeasel(str_cons->GetMatch_text()));
            }
            if ((str_cons->GetMatch_location() != eString_location_inlist) && str_cons->CanGetIgnore_words()) {
                tmp_cons.SetMatch_text(tmp_match);
                rval = AdvancedStringMatch(str, &tmp_cons);
            }
            else {
                string search(this_str), pattern(tmp_cons.GetMatch_text());
                bool ig_space = str_cons->GetIgnore_space();
                bool ig_punct = str_cons->GetIgnore_punct();
                if ( (str_cons->GetMatch_location() != eString_location_inlist) && (ig_space || ig_punct)) {
                    search = StripUnimportantCharacters(search, ig_space, ig_punct);
                    pattern = StripUnimportantCharacters(pattern, ig_space, ig_punct);
                }

                size_t pFound = str_cons->GetCase_sensitive() ?  search.find(pattern) : NStr::FindNoCase(search, pattern);
                switch (str_cons->GetMatch_location()) {
                    case eString_location_contains:
                        if (string::npos == pFound) {
                            rval = false;
                        }
                        else if (str_cons->GetWhole_word()) {
                            rval = IsWholeWordMatch (search, pFound, pattern.size());
                            while (!rval && pFound != string::npos) {
                                pFound = (str_cons->GetCase_sensitive()) ?
                                search.find(pattern, pFound+1):
                                NStr::FindNoCase(search, pattern, pFound+1);
                                rval = (pFound != string::npos)?
                                IsWholeWordMatch (search, pFound, pattern.size()):
                                false;
                            }
                        }
                        else  {
                            rval = true;
                        }
                        break;
                    case eString_location_starts:
                        if (!pFound) {
                            rval = (str_cons->GetWhole_word()) ? IsWholeWordMatch (search, pFound, pattern.size()) : true;
                        }
                        break;
                    case eString_location_ends:
                        while (pFound != string::npos && !rval) {
                            if ((pFound + pattern.size()) == search.size()) {
                                rval = str_cons->GetWhole_word() ?  IsWholeWordMatch (search, pFound, pattern.size()): true;
                                /* stop the search, we're at the end of the string */
                                pFound = string::npos;
                            }
                            else {
                                if (pattern.empty()) {
                                    pFound = false;
                                }
                                else {
                                    pFound = str_cons->GetCase_sensitive() ? search.find(pattern, pFound+1) : NStr::FindNoCase(search, pattern, pFound+1);
                                }
                            }
                        }
                        break;
                    case eString_location_equals:
                        if (str_cons->GetCase_sensitive() && search==pattern) {
                            rval= true;
                        }
                        else if (!str_cons->GetCase_sensitive() && NStr::EqualNocase(search, pattern)) {
                            rval = true;
                        }
                        break;
                    case eString_location_inlist:
                        pFound = (str_cons->GetCase_sensitive())?
                        pattern.find(search) : NStr::FindNoCase(pattern, search);
                        if (pFound == string::npos) {
                            rval = false;
                        }
                        else {
                            rval = IsWholeWordMatch(pattern, pFound, search.size(), true);
                            while (!rval && pFound != string::npos) {
                                pFound = (str_cons->GetCase_sensitive()) ? CTempString(pattern).substr(pFound + 1).find(search) : NStr::FindNoCase(CTempString(pattern).substr(pFound + 1), search);
                                if (pFound != string::npos) {
                                    rval = IsWholeWordMatch(pattern, pFound, str.size(), true);
                                }
                            }
                        }
                        if (!rval) {
                            /* look for spans */
                            rval = IsStringInSpanInList (search, pattern);
                        }
                        break;
                    default: break;
                }
            }
        }
    }
    return rval;
}


bool CSuspect_rule::StringMatchesSuspectProductRule(const CMatchString& str) const
{
    // CSearch_func: only about string
    const CSearch_func& func = GetFind();
    if (func.Empty() || !func.Match(str)) {
        return false;
    }
    if (CanGetExcept()) {
        const CSearch_func& exc_func = GetExcept();
        if (!exc_func.Empty() && exc_func.Match(str)) {
            return false;
        }
    }
    if (CanGetFeat_constraint()) {
        const CConstraint_choice_set& conset = GetFeat_constraint();
        for (auto& it: conset.Get()) {
            if (it->Which() != CConstraint_choice::e_String) {
                cerr << "Bad suspect rule constraint!\n";
                continue;
            }
            const CString_constraint& constr = it->GetString();
            bool b = DoesSingleStringMatchConstraint (str, &constr);
            if (constr.GetNot_present()) {
                b = !b;
            }
            if (!b) {
                return false;
            }
        }
    }
    return true;
};


bool CSuspect_rule::ApplyToString(string& val) const
{
    return ApplyToString(val, CMatchString(val));
}

bool CSuspect_rule::ApplyToString(string& result, const CMatchString& str) const

{
    if (!IsSetReplace() || !StringMatchesSuspectProductRule(str)) {
        return false;
    }

    CConstRef<CString_constraint> constraint(NULL);
    if (IsSetFind() && GetFind().IsString_constraint()) {
        constraint.Reset(&GetFind().GetString_constraint());
    }
    return GetReplace().ApplyToString(result, str, constraint);
}


string CSuspect_rule::SummarizeReplaceRule(const CReplace_rule& repl) const
{
    string str = "Unknown replacement function";
    const CReplace_func& func = repl.GetReplace_func();
    if (func.Which() == CReplace_func::e_Simple_replace) {
        const CSimple_replace& simple = func.GetSimple_replace();
        str = "replace ";
        str += simple.GetWhole_string() ? "entire name with " : "with ";
        str += simple.CanGetReplace() ? "'" + simple.GetReplace() + "'": "''";
        str += simple.GetWeasel_to_putative() ? ", retain and normalize 'putative' synonym" : kEmptyStr;
    }
    else if (func.Which() == CReplace_func::e_Haem_replace) {
        str = "replace '" + func.GetHaem_replace() + "' with 'heme' if whole word, 'hem' otherwise";
    }
    str += repl.GetMove_to_note() ? ", move original to note" : kEmptyStr;
    return str;
}


string CSuspect_rule::SummarizeStringConstraint(const CString_constraint& cons) const
{
    if (cons.CanGetMatch_text()) {
        string loc_word;
        switch (cons.GetMatch_location()) {
            case eString_location_contains:
                loc_word = cons.GetNot_present() ? "does not contain" : "contains";
                break;
            case eString_location_equals:
                loc_word = cons.GetNot_present() ? "does not equal" : "equals";
                break;
            case eString_location_starts:
                loc_word = cons.GetNot_present() ? "does not start with" : "starts with";
                break;
            case eString_location_ends:
                loc_word = cons.GetNot_present() ? "does not end with" : "ends with";
                break;
            case eString_location_inlist:
                loc_word = cons.GetNot_present() ? "is not one of" : "is one of";
                break;
        }
        string sub_words;
        if (cons.CanGetIgnore_words()) {
            ITERATE (list<CRef<CWord_substitution> >, it, cons.GetIgnore_words().Get()) {
                string syns;
                if ((*it)->CanGetSynonyms() && !(*it)->GetSynonyms().empty()) {
                    const CWord_substitution::TSynonyms& synonyms = (*it)->GetSynonyms();
                    ITERATE (CWord_substitution::TSynonyms, sn, synonyms) {
                        if (!syns.empty()) {
                            CWord_substitution::TSynonyms::const_iterator z = sn;
                            syns += (++z == synonyms.end()) ? " and " : ", ";
                        }
                        syns += "\'" + *sn + "\'";
                    }
                    sub_words += sub_words.empty() ? "" : ", ";
                    sub_words += "allow '" + ((*it)->CanGetWord() ? (*it)->GetWord() : "") + "' to be replaced by " + syns;
                    if ((*it)->GetCase_sensitive()) sub_words += ", case-sensitive";
                    if ((*it)->GetWhole_word()) sub_words += ", whole word";
                }
            }
        }
        string params;
        params += cons.GetCase_sensitive() ? "case-sensitive" : kEmptyStr;
        params += cons.GetWhole_word() ? params.empty() ? "whole word" : ", whole word" : kEmptyStr;
        params += cons.GetIgnore_space() ? params.empty() ? "ignore spaces" : ", ignore spaces" : kEmptyStr;
        params += cons.GetIgnore_punct() ? params.empty() ? "ignore punctuation" : ", ignore punctuation" : kEmptyStr;
        params += cons.GetIgnore_weasel() ? params.empty() ? "ignore \'putative\' synonyms" : ", ignore \'putative\' synonyms" : kEmptyStr;

        string str = loc_word + " '" + cons.GetMatch_text() + "'";
        str += params.empty() ? kEmptyStr : " (" + params + ")";
        str += cons.GetIs_all_caps() ? ", all letters are uppercase" : kEmptyStr;
        str += cons.GetIs_all_lower() ? ", all letters are lowercase" : kEmptyStr;
        str += cons.GetIs_all_punct() ? ", all characters are punctuation" : kEmptyStr;
        return str;
    }
    return kEmptyStr;
}


string CSuspect_rule::SummarizeSearchFunc(const CSearch_func& func) const
{
    string summ;
    switch (func.Which()) {
        case CSearch_func::e_String_constraint:
            return SummarizeStringConstraint(func.GetString_constraint());
        case CSearch_func::e_Contains_plural:
            return "may contain plural";
        case CSearch_func::e_N_or_more_brackets_or_parentheses:
            return "contains " + NStr::IntToString(func.GetN_or_more_brackets_or_parentheses()) + " or more brackets or parentheses";
        case CSearch_func::e_Three_numbers:
            return "Three or more numbers together";
        case CSearch_func::e_Underscore:
            return "contains underscore";
        case CSearch_func::e_Prefix_and_numbers:
            return "is '" + func.GetPrefix_and_numbers() + "' followed by numbers";
        case CSearch_func::e_All_caps:
            return "is all capital letters";
        case CSearch_func::e_Unbalanced_paren:
            return "contains unbalanced brackets or parentheses";
        case CSearch_func::e_Too_long:
            return "is longer than " + NStr::IntToString(func.GetToo_long()) + " characters";
        case CSearch_func::e_Has_term:
            //if (short_version) summ = "contains " + strtmp;
            return "contains \'" + func.GetHas_term() + "\' at start or separated from other letters by numbers, spaces, or punctuation, but does not also contain 'domain'";
        default:
            break;
    }
    return "Unknown search function";
}


string CSuspect_rule::SummarizeEndDistance(const CLocation_pos_constraint& pos) const
{
    switch (pos.Which()) {
        case CLocation_pos_constraint::e_Dist_from_end:
            return "exactly " + NStr::IntToString(pos.GetDist_from_end());
        case CLocation_pos_constraint::e_Max_dist_from_end:
            return "no more than " + NStr::IntToString(pos.GetMax_dist_from_end());
        case CLocation_pos_constraint::e_Min_dist_from_end:
            return "no less than " + NStr::IntToString(pos.GetMin_dist_from_end());
        default:
            break;
    }
    return kEmptyStr;
}


string CSuspect_rule::SummarizeLocationConstraint(const CLocation_constraint& loc) const
{
    string partial;
    EPartial_constraint partial5 = loc.GetPartial5();
    EPartial_constraint partial3 = loc.GetPartial3();
    if (partial5 == ePartial_constraint_either && partial3 != ePartial_constraint_either) {
        partial = partial3 == ePartial_constraint_partial ? " that are 3\' partial" : " that are 3\' complete";
    }
    else if (partial3 == ePartial_constraint_either && partial5 != ePartial_constraint_either) {
        partial = partial5 == ePartial_constraint_partial ? " that are 5\' partial" : " that are 5\' complete";
    }
    else if (partial5 == ePartial_constraint_partial && partial3 == ePartial_constraint_partial) {
        partial = " that are partial on both ends";
    }
    else if (partial5 == ePartial_constraint_complete && partial3 == ePartial_constraint_complete) {
        partial = " that are complete on both ends";
    }
    else if (partial5 == ePartial_constraint_complete && partial3 == ePartial_constraint_partial) {
        partial = " that are 5' complete and 3' partial";
    }
    else if (partial5 == ePartial_constraint_partial && partial3 == ePartial_constraint_complete) {
        partial = " that are 5' partial and 3' complete";
    }
    string location_type;
    if (loc.GetLocation_type() == eLocation_type_constraint_single_interval) {
        location_type = " with single interval";
    }
    else if (loc.GetLocation_type() == eLocation_type_constraint_joined) {
        location_type = " with joined intervals";
    }
    else if (loc.GetLocation_type() == eLocation_type_constraint_ordered) {
        location_type = " with ordered intervals";
    }
    string dist5;
    if (loc.CanGetEnd5()) {
        dist5 = SummarizeEndDistance(loc.GetEnd5());
        dist5 = dist5.empty() ? dist5 : " with 5\' end " + dist5;
    }
    string dist3;
    if (loc.CanGetEnd3()) {
        dist3 = SummarizeEndDistance(loc.GetEnd3());
        dist3 = dist3.empty() ? dist3 : " with 3\' end " + dist3;
    }
    string seq_word;
    if (loc.GetSeq_type() == eSeqtype_constraint_nuc) {
        seq_word = "nucleotide sequences";
    }
    else if (loc.GetSeq_type() == eSeqtype_constraint_prot) {
        seq_word = "protein sequences";
    }
    string strand;
    if (loc.GetStrand() == eStrand_constraint_plus) {
        strand = " on plus strands";
    }
    else if (loc.GetStrand() == eStrand_constraint_minus) {
        strand = " on minus strands";
    }
    if (partial.empty() && location_type.empty() && dist5.empty() && dist3.empty() && seq_word.empty() && strand.empty()) {
        return kEmptyStr;
    }
    string str = "only objects";

    if (strand.empty() && !seq_word.empty()) {
        str += " on " + seq_word;
    }
    else if (!strand.empty()) {
        str += strand;
        str += seq_word.empty() ? kEmptyStr : " of " + seq_word;
    }
    str += partial;
    str += location_type;
    str += dist5;
    str += dist3;
    return str;
}


string CSuspect_rule::SummarizeFieldType(const CField_type& vnp) const
{
    string str = "Invalid field type";
    switch (vnp.Which()) {
        case CField_type::e_not_set: return "missing field";
        case CField_type::e_Source_qual:
return "e_Source_qual";
            //return SummarizeSourceQual (vnp.GetSource_qual());
        case CField_type::e_Feature_field:
            {
                const CFeature_field& ff = vnp.GetFeature_field();
                if (ff.GetField().Which() == CFeat_qual_choice::e_not_set) {
                    return "missing field";
                }
                else {
                    string label = ENUM_METHOD_NAME(EMacro_feature_type)()->FindName(ff.GetType(), false);
                    //string label = thisInfo.feattype_name[ff.GetType()];
                    //if (label.empty()) {
                    //    return "Unknown feature";
                    //}
                    //else str = FeatureFieldLabel (label, ff.GetField());
                    return label.empty() ? "Unknown feature" : label;
                }

            }
        case CField_type::e_Cds_gene_prot:
return "e_Cds_gene_prot";
	        //str = thisInfo.cgp_field_name[vnp.GetCds_gene_prot()];
            //if (str.empty()) str = "Invalid CDS-Gene-Prot Field";
        case CField_type::e_Molinfo_field:
return "e_Molinfo_field";
            //str = GetSequenceQualName (vnp.GetMolinfo_field());
            //if (str.empty()) str = "Invalid Sequence Qual Field";
        case CField_type::e_Pub:
return "e_Pub";
            //str = thisInfo.pubfield_label[vnp.GetPub()];
            //if (str.empty()) str = "Invalid field type";
        case CField_type::e_Rna_field:
return "e_Rna_field";
            //str = SummarizeRnaQual (vnp.GetRna_field());
        case CField_type::e_Struc_comment_field:
return "e_Struc_comment_field";
            //str = SummarizeStructuredCommentField (vnp.GetStruc_comment_field());
        case CField_type::e_Dblink:
return "e_Dblink";
            //str = thisInfo.dblink_names[(int)vnp.GetDblink()];
        case CField_type::e_Misc:
return "e_Misc";
            //str = thisInfo.miscfield_names[(int)vnp.GetMisc()];
    }
    return str;
}


string CSuspect_rule::SummarizeFieldConstraint(const CField_constraint& field) const
{
    string summ = SummarizeStringConstraint(field.GetString_constraint());
    string label = SummarizeFieldType(field.GetField());
    return summ.empty() || label.empty() ? kEmptyStr : "where " + label + " " + summ;
}


string CSuspect_rule::SummarizeSourceConstraint(const CSource_constraint& cons) const
{
        return kEmptyStr;
}


string CSuspect_rule::SummarizeConstraint(const CConstraint_choice& choice) const
{
    switch (choice.Which()) {
        case CConstraint_choice::e_String:
            {   string tmp = SummarizeStringConstraint(choice.GetString());
                return tmp.empty() ? kEmptyStr : "where object text " + tmp;
            }
        case CConstraint_choice::e_Location:
            return SummarizeLocationConstraint(choice.GetLocation()) + " [[LOCATION CONSTRAINT]]";
        case CConstraint_choice::e_Source:
            return SummarizeSourceConstraint (choice.GetSource()) + " [[SOURCE CONSTRAINT]]";
        case CConstraint_choice::e_Cdsgeneprot_qual:
            //phrase = SummarizeCDSGeneProtQualConstraint (cons_choice.GetCdsgeneprot_qual());
            return "[[CDS Gene Prot QUAL CONSTRAINT]]";
        case CConstraint_choice::e_Cdsgeneprot_pseudo:
            return "[[CDS Gene Prot PSEUDO CONSTRAINT]]";
            //phrase = SummarizeCDSGeneProtPseudoConstraint (cons_choice.GetCdsgeneprot_pseudo());
        case CConstraint_choice::e_Sequence:
            return "[[SEQUENCE CONSTRAINT]]";
            //phrase = SummarizeSequenceConstraint (cons_choice.GetSequence());
        case CConstraint_choice::e_Pub:
            return "[[PUB CONSTRAINT]]";
            //phrase = SummarizePublicationConstraint (cons_choice.GetPub());
        case CConstraint_choice::e_Field:
            return SummarizeFieldConstraint(choice.GetField());
        case CConstraint_choice::e_Molinfo:
            return "[[MOLINFO CONSTRAINT]]";
            //phrase = SummarizeMolinfoFieldConstraint (cons_choice.GetMolinfo());
        case CConstraint_choice::e_Field_missing:
            return "[[FIELD MISSING CONSTRAINT]]";
            //phrase = SummarizeMissingFieldConstraint (cons_choice.GetField_missing());
        case CConstraint_choice::e_Translation:
            return "[[TRANSLATION CONSTRAINT]]";
            //phrase = SummarizeTranslationConstraint (cons_choice.GetTranslation());
            break;
        default:
            break;
    }
    return kEmptyStr;
}


string CSuspect_rule::SummarizeConstraintSet(const CConstraint_choice_set& cons) const
{
    string str;
    ITERATE (list<CRef<CConstraint_choice> >, it, cons.Get()) {
        string tmp = SummarizeConstraint(**it);
        str += tmp.empty() || str.empty() ? kEmptyStr : " and ";
        str += tmp;
    }
    return str;
}


string CSuspect_rule::GetRuleTypeName() const
{
    static const char* rule_type[] = {
        "None",
        "Typo",
        "Putative Typo",
        "Quick fix",
        "Organelles not appropriate in prokaryote",
        "Suspicious phrase; should this be nonfunctional?",
        "May contain database identifier more appropriate in note; remove from product name",
        "Remove organism from product name",
        "Possible parsing error or incorrect formatting; remove inappropriate symbols",
        "Implies evolutionary relationship; change to -like protein",
        "Consider adding 'protein' to the end of the product name",
        "Correct the name or use 'hypothetical protein'",
        "Use American spelling",
        "Use short product name instead of descriptive phrase",
        "use protein instead of gene as appropriate"
    };
    return rule_type[GetRule_type()];
}


string CSuspect_rule::SummarizeRule() const
{
    string type = GetRuleTypeName();
    string descr = IsSetDescription() ? GetDescription() : kEmptyStr;
    string find = SummarizeSearchFunc(GetFind());
    string except = CanGetExcept() ? SummarizeSearchFunc(GetExcept()) : kEmptyStr;
    string replace = CanGetReplace() ? SummarizeReplaceRule(GetReplace()) : kEmptyStr;
    string feat_constraint = CanGetFeat_constraint() ? SummarizeConstraintSet(GetFeat_constraint()) : kEmptyStr;

    string out = find;
    if (!except.empty()) out += " but not " + except;
    if (!feat_constraint.empty()) out += ", " + feat_constraint;
    if (!replace.empty()) out += ", " + replace;
    if (!type.empty()) out += " (" + type + ")";
    if (!descr.empty()) out += " Description: " + descr;
    return out;

// debug output
/*
    string out = "Type: " + GetRuleTypeName();
    out += descr.empty() ? kEmptyStr : "\n\t\tDescr: " + descr;
    out += find.empty() ? kEmptyStr : "\n\t\tFind: " + find;
    out += except.empty() ? kEmptyStr : "\n\t\tExcept: " + except;
    out += feat_constraint.empty() ? kEmptyStr : "\n\t\tFeat-Constr: " + feat_constraint;
    out += replace.empty() ? kEmptyStr : "\n\t\tReplace: " + replace;
    return out;
*/
}


END_objects_SCOPE // namespace ncbi::objects::
END_NCBI_SCOPE
