%%% This file is part of RefactorErl.
%%%
%%% RefactorErl is free software: you can redistribute it and/or modify
%%% it under the terms of the GNU Lesser General Public License as published
%%% by the Free Software Foundation, either version 3 of the License, or
%%% (at your option) any later version.
%%%
%%% RefactorErl is distributed in the hope that it will be useful,
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%%% GNU Lesser General Public License for more details.
%%%
%%% You should have received a copy of the GNU Lesser General Public License
%%% along with RefactorErl.  If not, see <http://plc.inf.elte.hu/erlang/>.
%%%
%%% The Original Code is RefactorErl.
%%%
%%% The Initial Developer of the Original Code is Eötvös Loránd University.
%%% Portions created  by Eötvös Loránd University and ELTE-Soft Ltd.
%%% are Copyright 2007-2025 Eötvös Loránd University, ELTE-Soft Ltd.
%%% and Ericsson Hungary. All Rights Reserved.


%%% @doc Generic lexical scanner module. Works with state transition tables
%%% that are generated by {@link refgen_scanc}.
%%%
%%% === Mapping style scanner ===
%%%
%%% This mode is similar to calling `lists:map' on the list of input tokens
%%% with a user-supplied callback function. The input is split into tokens,
%%% the callback function is called with a single argument which described the
%%% token, and the return value of the scanned is the list of return values
%%% from the function calls.
%%%
%%% === Accumulator style scanner ===
%%%
%%% This mode is similar to calling `lists:foldl' on the list of input tokens.
%%% The user-supplied callback function has two arguments, the first describes
%%% the token, and the second is a user-supplied state data. Subsequent calls
%%% will get the return value of the previous call as state data, and finally,
%%% a separate function call is made which signals the end of the input. The
%%% return value of the scanner is the return value from the last function
%%% call.
%%%
%%% @author Laszlo Lovei <lovei@inf.elte.hu>

-module(refgen_scanner).
-vsn("$Rev: 17551 $").

-include("gen.hrl").

%% Client exports
-export([new/1, convert_dfa/1]).

%% =============================================================================
%% Implementation

%% @type map_scanner() = (string()) -> {ok, [term()]} | {error, pos(), char()}.
%% Type signature of mapping style scanners.
%%
%% @type map_callback() = (token()) -> term().
%% Type signature of callback functions for mapping style scanner.

%% @type fold_scanner() = (string(), term()) -> {ok, term()} |
%%                                              {error, pos(), char()}.
%% Type signature of accumulator style scanners.
%%
%% @type fold_callback() = (token(), term()) -> term().
%% Type signature of callback functions for accumulator style scanners.

%% @type token() = {Type::atom(), Text::string(), {Start::pos(), End::pos()}}.
%% Token information provided for the callback functions. `Start' is the
%% position of the first character of the token, `End' is the position right
%% after the last character of the token.

%% @type pos() = {Line::integer(), Column::integer()}. Position information
%% provided by the scanner. `Line' and `Column' values start from `0'.

%% @spec new([Opt]) -> map_scanner() | fold_scanner()
%% @doc Creates a new scanner function. `Opt' may be one of the following:
%%
%% <dl>
%%
%% <dt>`{file, File}'</dt> <dd>Read state transition table from `File'.
%%   Currently this option is mandatory (i.e. no other way to provide the
%%   table). </dd>
%%
%% <dt>`{callback, Fun}'</dt> <dd>`Fun' is the callback function which is
%%   called by the scanner for every token to calculate the result. The
%%   default is the identity function. The type signature of `Fun' may be
%%   {@type map_callback()}, in which case a {@type map_scanner()} type
%%   scanner is returned, or {@type fold_callback()}, in which case a {@type
%%   fold_scanner()} type scanner is returned.</dd>
%%
%% </dl>
new(Opts) ->
    Callback = proplists:get_value(callback, Opts, fun token_data/1),
    case get_table(Opts) of
        {ok, DFA}     -> scanner(DFA, Callback);
        {error, Reason} -> throw(Reason)
    end.

get_table(Opts) ->
    case proplists:lookup(dfa, Opts) of
        {dfa, Table} -> {ok, Table};
        %{table, Table} -> {ok, convert_dfa(Table)};
        _ -> read_table(Opts)
    end.

read_table(Opts) ->
    {file, File} = proplists:lookup(file, Opts),
    case file:consult(File) of
        {ok, [Table]} -> {ok, Table};
        {error, Reason} -> {error, {File, file:format_error(Reason)}}
    end.


token_data(Token) ->
    Token.

scanner(DFA, Callback) ->
    case erlang:fun_info(Callback, arity) of
        {arity, 1} ->
            fun (String) -> string(String, Callback, DFA) end;
        {arity, 2} ->
            fun (String, Init) -> string(String, Callback, Init, DFA) end
    end.

convert_dfa(DFA) ->
    Acc = [{S,A} || {S, {_,A}} <- DFA],
    Trans = [list_to_tuple([action(Ch, Tr, Acc) || Ch <- ?CODE_POINTS])
             || {_St, {Tr, _Acc}} <- lists:sort(DFA)],
    list_to_tuple(Trans).

action(Ch, Tr, Acc) ->
    case orddict:find(Ch, Tr) of
        {ok, To} ->
            {ok, A} = orddict:find(To, Acc),
            {To, A};
        error -> undefined
    end.

string(String, Callback, DFA) ->
    string(String, collect(Callback), [], DFA).

collect(Fun) ->
    fun
        ({eof, _}, Lst)   -> lists:reverse(Lst);
        (Token, Lst) -> [Fun(Token) | Lst]
    end.

string(String, Callback, State, DFA) ->
    string(String, {0,0}, 0, Callback, State, DFA).

string("", LPos, SPos, Callback, State, _DFA) ->
    {ok, Callback({eof, LPos, SPos}, State)};
string(String, LStart, SStart, Callback, State, DFA) ->
    case next_token(String, LStart, SStart, 0, "", none, DFA) of
        {Type, Token, LEnd, SEnd, Rest} ->
            NextState = Callback({Type, Token, {LStart, LEnd}, {SStart, SEnd}}, State),
            string(Rest, LEnd, SEnd, Callback, NextState, DFA);
        none ->
            {error, LStart, SStart, hd(String)}
    end.

next_token("", _, _, _, _, Accept, _) ->
    accept(Accept);
next_token([Head|Tail], LCPos, SPos, St, Token, Accept, DFA) ->
    Trans = element(St+1, DFA),
    try element(Head+1, Trans) of
        {NextSt, Acc} ->
            NewToken = [Head | Token],
            NextLCPos = nextLC(Head, LCPos),
            NextSPos = nextS(Head,SPos), %% TODO simple increment
            NewAccept =
                if
                    Acc =:= [] -> Accept;
                    true -> {Acc, NewToken, NextLCPos, NextSPos, Tail}
                end,
            next_token(Tail, NextLCPos, NextSPos, NextSt, NewToken, NewAccept, 
            															   DFA);
        undefined ->
            accept(Accept)
    catch
        _:_ ->          
            error_logger:warning_msg(
                referl_misc:format("Code point (~p) at position (~p) was not found in the scanner table. " 
                    "To eliminate the scanning issue the tool replaced the code point (~p) with 95 (_). "
                    "Please increase the character table size with the -char_limit flag for proper handling of unicode characters!~n", [Head, LCPos, Head])),
            %none
            next_token([95|Tail], LCPos, SPos, St, Token, Accept, DFA)
    end.

accept({A, Tk, LEnd, SEnd, Tl}) -> {A, lists:reverse(Tk), LEnd, SEnd, Tl};
accept(Acc)         -> Acc.

%next(A,B) -> nextLC(A,B).
nextLC($\n, {Ln, _}) -> {Ln+1, 0};
nextLC(_, {Ln, Col}) -> {Ln, Col+1}.

nextS(_, Sc) -> Sc+1 .
