1:- module(parser, [
    2                   parse/2      % +YAML, -PL
    3                  ]).

YAML parser base on https://github.com/openbohemians/diet-yaml.

Core part of YAML spec is implemented.

author
- Hongxin Liang
license
- Apache License Version 2.0 */
   12:- use_module(library(date)).   13
   14:- thread_local
   15    condition/1.
 parse(+YAML, -PL) is semidet
Parse given YAML atom or chars to Prolog term. No comment is allowed to appear in YAML. read_yaml/2 can be used to help read in YAML file.
   23parse(YAML, PL) :-
   24    atom(YAML), !,
   25    atom_chars(YAML, YAML0),
   26    yaml(PL, YAML0, []).
   27
   28parse(YAML, PL) :-
   29    yaml(PL, YAML, []).
   30
   31yaml(YAML) --> start, optional_whitespaces, data(YAML), optional_whitespaces, end.
   32
   33start --> newline, dash, dash, dash, !.
   34start --> [].
   35
   36end --> newline, ['.', '.', '.'], !.
   37end --> newline, dash, dash, dash, !.
   38end --> [].
   39
   40data(Data) --> sequence(Data), !.
   41data(Data) --> mapping(Data), !.
   42data([]) --> ['[', ']'], !.
   43data(_{}) --> ['{', '}'], !.
   44data(Data) --> optional_whitespaces, scalar(Data), !.
   45
   46scalar(Scalar) --> string(String),
   47    {
   48     (   atom(String)
   49     ->  (   atom_number(String, Scalar)
   50         ->  true
   51         ;   (   parse_time(String, iso_8601, T)
   52             ->  stamp_date_time(T, Scalar, 'UTC')
   53             ;   (   String = '~'
   54                 ->  Scalar = nil
   55                 ;   Scalar = String
   56                 )
   57             )
   58         )
   59     ;   Scalar = String
   60     )
   61    }.
   62
   63sequence([H|T], ['['|Chars], Rest) :-
   64    setup_call_cleanup(asserta(condition(sequence)),
   65                       (
   66                        optional_whitespaces(Chars, Rest1),
   67                        data(H, Rest1, Rest2),
   68                        comma_data(T, Rest2, Rest3),
   69                        optional_whitespaces(Rest3, [']'|Rest])
   70                       ),
   71                       retract(condition(sequence))), !.
   72
   73sequence([H|T], Chars, Rest) :-
   74    optional_spaces(Count, Chars, Rest1),
   75    dash(Rest1, Rest2),
   76    Count1 is Count + 1,
   77    pushback(Count1, Rest2, Rest3),
   78    Rest4 = ['\n'|Rest3],
   79    whitespaces_with_pushback(Rest4, Rest5),
   80    data(H, Rest5, Rest6),
   81    dash_data(Count, T, Rest6, Rest).
   82
   83mapping(Mapping, ['{'|Chars], Rest) :-
   84    setup_call_cleanup(asserta(condition(mapping)),
   85                       (
   86                        optional_whitespaces(Chars, Rest1),
   87                        key(Key, Rest1, Rest2),
   88                        optional_spaces(_, Rest2, Rest3),
   89                        colon(Rest3, Rest4),
   90                        at_least_one_whitespace(Rest4, Rest5),
   91                        data(Data, Rest5, Rest6),
   92                        colon_data(Dict, Rest6, Rest7),
   93                        optional_whitespaces(Rest7, ['}'|Rest]),
   94                        Mapping = Dict.put(Key, Data)
   95                       ),
   96                       retract(condition(mapping))), !.
   97
   98mapping(Mapping) -->
   99    optional_spaces(Count),
  100    key(Key),
  101    optional_spaces(_),
  102    colon,
  103    whitespaces_with_pushback,
  104    data(Data),
  105    tab_data(Count, Dict),
  106    {
  107     Mapping = Dict.put(Key, Data)
  108    }.
  109
  110optional_spaces(Count) --> space, optional_spaces(X), !,
  111    {
  112     Count is X + 1
  113    }.
  114optional_spaces(0) --> [].
  115
  116string(String) --> ['"'], chars_exclude_trailing_double_quote(Chars), ['"'], !,
  117    {
  118     atom_string(Chars, String)
  119    }.
  120
  121string(String) --> ['\''], chars_exclude_trailing_single_quote(Chars), ['\''], !,
  122    {
  123     atom_string(Chars, String)
  124    }.
  125
  126string(String, ['>'|Chars], Rest) :-
  127    optional_spaces(_, Chars, ['\n'|Rest1]),
  128    optional_spaces(Count, Rest1, Rest2),
  129    line(X, Rest2, Rest3),
  130    next_line(Y, Count, Rest3, Rest4), !,
  131    (   sub_atom(Y, 0, 1, _, '\n')
  132    ->  atom_concat(X, Y, String)
  133    ;   atomic_list_concat([X, ' ', Y], String)
  134    ),
  135    Rest = ['\n'|Rest4].
  136
  137string(String, ['|'|Chars], Rest) :-
  138    optional_spaces(_, Chars, ['\n'|Rest1]),
  139    optional_spaces(Count, Rest1, Rest2),
  140    line(X, Rest2, Rest3),
  141    next_line_preserving_newline(Y, Count, Rest3, Rest4), !,
  142    atomic_list_concat([X, '\n', Y], String),
  143    Rest = ['\n'|Rest4].
  144
  145string(String) --> valid_start(Char), chars(Chars),
  146    {
  147     atom_concat(Char, Chars, String)
  148    }.
  149
  150space --> [' '].
  151
  152key(Key) -->
  153    valid_key_char(X),
  154    key(Y), !,
  155    {
  156     atom_concat(X, Y, Key)
  157    }.
  158key(Key) --> valid_key_char(Key).
  159
  160dash --> ['-'].
  161colon --> [':'].
  162comma --> [','].
  163newline --> ['\n'].
  164
  165at_least_one_newline --> newline, at_least_one_newline, !.
  166at_least_one_newline --> newline.
  167
  168newlines(V) --> newline, newlines(Y), !,
  169    {
  170     atom_concat('\n', Y, V)
  171    }.
  172newlines('\n') --> newline.
  173
  174optional_whitespaces --> space, optional_whitespaces, !.
  175optional_whitespaces --> newline, optional_whitespaces, !.
  176optional_whitespaces --> [].
  177
  178at_least_one_whitespace --> space, optional_whitespaces, !.
  179at_least_one_whitespace --> newline, optional_whitespaces.
  180
  181whitespaces_with_pushback --> space, whitespaces_with_pushback, !.
  182whitespaces_with_pushback --> newline, whitespaces_with_pushback0(0), !.
  183whitespaces_with_pushback --> space, !.
  184whitespaces_with_pushback --> newline.
  185
  186whitespaces_with_pushback0(Leading) --> space, !,
  187    {
  188     Leading1 is Leading + 1
  189    },
  190    whitespaces_with_pushback0(Leading1).
  191whitespaces_with_pushback0(_) --> newline, !,
  192    whitespaces_with_pushback0(0).
  193whitespaces_with_pushback0(Leading, [H|T], Rest) :-
  194    H \= ' ',
  195    H \= '\n',
  196    pushback(Leading, [H|T], Rest).
  197
  198pushback(0, List, List) :- !.
  199pushback(Count, List0, List) :-
  200    List1 = [' '|List0],
  201    Count1 is Count - 1,
  202    pushback(Count1, List1, List).
  203
  204comma_data([H|T]) --> comma, optional_whitespaces, data(H), comma_data(T).
  205comma_data([]) --> [].
  206
  207dash_data(Leading, [H|T], Chars, Rest) :-
  208    at_least_one_newline(Chars, Rest0),
  209    optional_spaces(Leading, Rest0, Rest1),
  210    dash(Rest1, Rest2),
  211    Count is Leading + 1,
  212    pushback(Count, Rest2, Rest3),
  213    Rest4 = ['\n'|Rest3],
  214    whitespaces_with_pushback(Rest4, Rest5),
  215    data(H, Rest5, Rest6),
  216    dash_data(Leading, T, Rest6, Rest), !.
  217dash_data(_, []) --> [].
  218
  219colon_data(V) -->
  220    comma,
  221    optional_whitespaces,
  222    key(Key),
  223    optional_spaces(_),
  224    colon,
  225    at_least_one_whitespace,
  226    data(Data),
  227    colon_data(Dict), !,
  228    {
  229     V = Dict.put(Key, Data)
  230    }.
  231colon_data(_{}) --> [].
  232
  233tab_data(Leading, V) -->
  234    at_least_one_newline,
  235    optional_spaces(Leading),
  236    key(Key),
  237    optional_spaces(_),
  238    colon,
  239    whitespaces_with_pushback,
  240    data(Data),
  241    tab_data(Leading, Dict), !,
  242    {
  243     V = Dict.put(Key, Data)
  244    }.
  245tab_data(_, _{}) --> [].
  246
  247chars_exclude_trailing_double_quote(V, [H|T], Rest) :-
  248    lookahead(T, C),
  249    (   C = '"'
  250    ->  (   H = '\\'
  251        ->  chars_exclude_trailing_double_quote(X, T, Rest),
  252            atom_concat(H, X, V)
  253        ;   V = H,
  254            Rest = T
  255        )
  256    ;   chars_exclude_trailing_double_quote(X, T, Rest),
  257        atom_concat(H, X, V)
  258    ), !.
  259chars_exclude_trailing_double_quote('') --> [].
  260
  261chars_exclude_trailing_single_quote(V, [H|T], Rest) :-
  262    (   H = '\''
  263    ->  T = [C|T1],
  264        (   C = '\''
  265        ->  chars_exclude_trailing_single_quote(X, T1, Rest),
  266            atom_concat('\'', X, V)
  267        ;   V = '',
  268            Rest = [H|T]
  269        )
  270    ;   chars_exclude_trailing_single_quote(X, T, Rest),
  271        atom_concat(H, X, V)
  272    ), !.
  273chars_exclude_trailing_single_quote('') --> [].
  274
  275chars(V, [X|Chars], Rest) :-
  276    (   condition(sequence), !
  277    ->  X \= ']',
  278        X \= ','
  279    ;   (   condition(mapping), !
  280        ->  X \= '}',
  281            X \= ','
  282        ;   true
  283        )
  284    ),
  285    X \= '\n', !,
  286    chars(Y, Chars, Rest),
  287    atom_concat(X, Y, V).
  288chars('') --> [].
  289
  290line(V) -->
  291    [X],
  292    {
  293     X \= '\n'
  294    },
  295    line(Y),
  296    {
  297     atom_concat(X, Y, V)
  298    }.
  299line('') --> [].
  300
  301next_line(V, Leading) -->
  302    ['\n'],
  303    optional_spaces(Leading),
  304    line(X),
  305    next_line(Y, Leading), !,
  306    {
  307     (   sub_atom(Y, 0, 1, _, '\n')
  308     ->  atom_concat(X, Y, V)
  309     ;   atomic_list_concat([X, ' ', Y], V)
  310     )
  311    }.
  312next_line(V, Leading, ['\n'|Chars], Rest) :-
  313    newlines(X, Chars, Rest1),
  314    next_line(Y, Leading, ['\n'|Rest1], Rest), !,
  315    atom_concat(X, Y, V).
  316next_line('', _) --> [].
  317
  318next_line_preserving_newline(V, Leading) -->
  319    ['\n'],
  320    optional_spaces(Leading),
  321    line(X),
  322    next_line_preserving_newline(Y, Leading), !,
  323    {
  324     atomic_list_concat([X, '\n', Y], V)
  325    }.
  326next_line_preserving_newline(V, Leading, ['\n'|Chars], Rest) :-
  327    newlines(X, Chars, Rest1),
  328    next_line_preserving_newline(Y, Leading, ['\n'|Rest1], Rest), !,
  329    atom_concat(X, Y, V).
  330next_line_preserving_newline('', _) --> [].
  331
  332valid_start(V) --> [V], {V \= '-', V \= '[', V \= '{'}.
  333
  334valid_key_char(V) --> [V],
  335    {
  336     code_type(V, Type),
  337     (Type = csym; Type = period)
  338    }.
  339
  340lookahead([H|_], H)