View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2013-2025, VU University Amsterdam
    7                              SWI-Prolog Solutions b.v.
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(json_grammar,
   37          [ json_token//1
   38          ]).   39:- use_module(library(dcg/basics)).   40:- use_module(library(pure_input)).     % syntax_error//1
   41:- set_prolog_flag(double_quotes, codes).   42
   43/** <module> JavaScript grammar
   44
   45This file provides a tokenizer for   JavaScript  (EcmaScript). This code
   46supports  the  quasi  quotation   syntax    =javascript=,   defined   in
   47library(http/js_write).
   48
   49@see    http://tomcopeland.blogs.com/EcmaScript.html is used for the
   50        high-level syntax.
   51@see    http://www.ecma-international.org/ecma-262/5.1/ is used for
   52        implementing the tokenization code.
   53*/
   54
   55%!  json_token(-TokenType)//
   56%
   57%   Matches and classifies the next JavaScript token.
   58
   59json_token(Type) -->
   60    token(Type).
   61
   62%!  token(-Type) is semidet.
   63%
   64%   Get the next token from the   input. Fails when encountering the
   65%   end of the input.
   66%
   67%   @error syntax_error(Culprit)
   68
   69token(comment)        --> comment, !.
   70token(string)         --> string_literal, !.
   71token(number)         --> numeric_literal, !.
   72token(identifier(Id)) --> identifier_name(Id), !.
   73token(regex)          --> regex_literal, !.
   74token(ws)             --> blank, !, blanks.
   75token(punct(Char))    --> [Code], { char_code(Char, Code) }.
   76
   77%!  comment// is semidet.
   78
   79comment -->
   80    "/*",
   81    !,
   82    (   string(_), "*/"
   83    ->  []
   84    ;   syntax_error(eof_in_comment)
   85    ).
   86comment -->
   87    "//",
   88    !,
   89    (   string(_), eol
   90    ->  []
   91    ;   string(_), eof
   92    ->  []
   93    ).
   94
   95
   96%!  string_literal// is semidet.
   97%
   98%   Matches a string literal
   99
  100string_literal -->
  101    "\"",
  102    !,
  103    (   q_codes, "\""
  104    ->  []
  105    ;   syntax_error(eof_in_string)
  106    ).
  107string_literal -->
  108    "\'",
  109    !,
  110    (   q_codes, "\'"
  111    ->  []
  112    ;   syntax_error(eof_in_string)
  113    ).
  114
  115
  116%!  numeric_literal//
  117%
  118%   Matches JavaScript notion of a numeric constant
  119
  120numeric_literal -->
  121    (   decimal_literal
  122    ->  []
  123    ;   hex_integer
  124    ),
  125    (   (   decimal_digit
  126        ;   js_id_start(_)
  127        )
  128    ->  syntax_error(js(illegal_number))
  129    ;   []
  130    ).
  131
  132decimal_literal -->
  133    decimal_integer, ".", opt_decimal_digits, opt_exponent.
  134decimal_literal -->
  135    ".", decimal_digits, opt_exponent.
  136decimal_literal -->
  137    decimal_integer,
  138    opt_exponent.
  139
  140decimal_integer -->
  141    "0",
  142    !.
  143decimal_integer -->
  144    non_zero_digit, opt_decimal_digits.
  145
  146decimal_digits -->
  147    decimal_digit,
  148    !,
  149    opt_decimal_digits.
  150
  151opt_decimal_digits -->
  152    decimal_digit,
  153    !,
  154    opt_decimal_digits.
  155opt_decimal_digits -->
  156    [].
  157
  158decimal_digit --> [C], { code_type(C, digit) }.
  159non_zero_digit --> [C], { code_type(C, digit), C \== 0'0 }.
  160
  161opt_exponent -->
  162    exponent,
  163    !.
  164opt_exponent -->
  165    [].
  166
  167exponent -->
  168    exponent_indictor,
  169    signed_integer.
  170
  171exponent_indictor --> "e", !.
  172exponent_indictor --> "E".
  173
  174signed_integer --> "+", !, decimal_digits.
  175signed_integer --> "-", !, decimal_digits.
  176signed_integer -->         decimal_digits.
  177
  178hex_integer --> "0", x, hex_digit, hex_digits.
  179
  180x --> "x".
  181x --> "X".
  182
  183
  184%!  regex_literal// is semidet.
  185%
  186%   Matches regex expression /.../flags
  187
  188regex_literal -->
  189    "/", regex_body, "/", !, regex_flags.
  190
  191regex_body -->
  192    regex_first_char,
  193    regex_chars.
  194
  195regex_chars --> regex_char, !, regex_chars.
  196regex_chars --> [].
  197
  198regex_first_char -->
  199    regex_backslash_sequence.
  200regex_first_char -->
  201    regex_non_terminator(C),
  202    !,
  203    { \+ memberchk(C, "*\\/[") }.
  204regex_first_char -->
  205    regex_class.
  206
  207regex_char -->
  208    regex_backslash_sequence.
  209regex_char -->
  210    regex_non_terminator(C),
  211    !,
  212    { \+ memberchk(C, "\\/[") }.
  213regex_char -->
  214    regex_class.
  215
  216regex_backslash_sequence -->
  217    "\\", !, regex_non_terminator(_).
  218
  219regex_class -->
  220    "[", regex_class_chars, "]".
  221
  222regex_class_chars --> regex_class_char, !, regex_class_chars.
  223regex_class_chars --> "".
  224
  225regex_class_char -->
  226    regex_non_terminator(C),
  227    !,
  228    { \+ memberchk(C, "]\\") }.
  229
  230regex_non_terminator(_) -->
  231    eol, !, {fail}.
  232regex_non_terminator(C) -->
  233    source_char(C).
  234
  235regex_flags -->
  236    js_id_conts(_).
  237
  238source_char(C) -->
  239    [C].
  240
  241
  242%!  q_codes//
  243%
  244%   Shortest list of quoted characters.
  245
  246q_codes --> [] ; q_code, q_codes.
  247
  248q_code --> "\\", !, char_esc.
  249q_code --> eol, !, {fail}.
  250q_code --> [_].
  251
  252char_esc --> single_escape_char, !.
  253char_esc --> "x", !, hex_digit, hex_digit.
  254char_esc --> "u", !, hex_digit, hex_digit, hex_digit, hex_digit.
  255char_esc --> eol, !.
  256
  257hex_digits --> hex_digit, !, hex_digits.
  258hex_digits --> [].
  259
  260hex_digit --> [C], {code_type(C, xdigit(_))}.
  261
  262single_escape_char --> "'".
  263single_escape_char --> "\"".
  264single_escape_char --> "\\".
  265single_escape_char --> "b".
  266single_escape_char --> "f".
  267single_escape_char --> "n".
  268single_escape_char --> "r".
  269single_escape_char --> "t".
  270single_escape_char --> "v".
  271
  272eof -->
  273    \+ [_].
  274
  275
  276%       js_identifier classification. Now  based  on   Prolog.  This  is
  277%       pretty close, but I'm afraid there are corner cases.
  278
  279identifier_name(Id) -->
  280    js_id_start(C0),
  281    !,
  282    js_id_conts(Rest),
  283    { atom_codes(Id, [C0|Rest]),
  284      (   keyword(Id)
  285      ->  fail, syntax_error(reserved(Id))
  286      ;   true
  287      )
  288    }.
  289
  290
  291js_id_start(C) --> [C], {js_id_start(C)}.
  292
  293js_id_start(C) :- code_type(C, prolog_var_start), !.
  294js_id_start(C) :- code_type(C, prolog_atom_start), !.
  295js_id_start(0'$).
  296
  297js_id_conts([H|T]) --> js_id_cont(H), !, js_id_conts(T).
  298js_id_conts([]) --> [].
  299
  300js_id_cont(C) --> [C], {js_id_cont(C)}.
  301
  302js_id_cont(C) :- code_type(C, prolog_identifier_continue), !.
  303js_id_cont(0'$) :- !.
  304
  305
  306keyword(break).                         % standard keywords
  307keyword(do).
  308keyword(instanceof).
  309keyword(typeof).
  310keyword(case).
  311keyword(else).
  312keyword(new).
  313keyword(var).
  314keyword(catch).
  315keyword(finally).
  316keyword(return).
  317keyword(void).
  318keyword(continue).
  319keyword(for).
  320keyword(switch).
  321keyword(while).
  322keyword(debugger).
  323keyword(function).
  324keyword(this).
  325keyword(with).
  326keyword(default).
  327keyword(if).
  328keyword(throw).
  329keyword(delete).
  330keyword(in).
  331keyword(try).
  332
  333keyword(class).                         % reserved keywords
  334keyword(enum).
  335keyword(extends).
  336keyword(super).
  337keyword(const).
  338keyword(export).
  339keyword(import).
  340
  341keyword(implements).                    % future reserved keywords
  342keyword(let).
  343keyword(private).
  344keyword(public).
  345keyword(yield).
  346keyword(interface).
  347keyword(package).
  348keyword(protected).
  349keyword(static)