1/*
    2  This module is responsible for the validation of xml schema types.
    3  @see https://www.w3.org/TR/xmlschema11-2/ for more information.
    4*/
    5:- module(simpletype, [
    6  is_xsd_simpleType/1,
    7  xsd_simpleType_is_a/2,
    8  validate_xsd_simpleType/2,
    9  facet/3
   10]).   11
   12:- use_module(library(xsd/xsd_messages)).   13
   14% https://github.com/mndrix/regex
   15:- use_module(library(regex)).   16
   17is_xsd_simpleType(T) :-
   18  clause(validate_xsd_simpleType(T, _), _).
   19
   20/*
   21  TYPE HIERARCHY
   22*/
   23xsd_simpleType_inheritance('anySimpleType', 'anyType').
   24xsd_simpleType_inheritance('untyped', 'anyType').
   25xsd_simpleType_inheritance('anyAtomicType', 'anySimpleType').
   26xsd_simpleType_inheritance('IDREFS', 'anySimpleType').
   27xsd_simpleType_inheritance('NMTOKENS', 'anySimpleType').
   28xsd_simpleType_inheritance('ENTITIES', 'anySimpleType').
   29xsd_simpleType_inheritance('untypedAtomic', 'anyAtomicType').
   30xsd_simpleType_inheritance('dateTime', 'anyAtomicType').
   31xsd_simpleType_inheritance('date', 'anyAtomicType').
   32xsd_simpleType_inheritance('time', 'anyAtomicType').
   33xsd_simpleType_inheritance('duration', 'anyAtomicType').
   34xsd_simpleType_inheritance('float', 'anyAtomicType').
   35xsd_simpleType_inheritance('double', 'anyAtomicType').
   36xsd_simpleType_inheritance('decimal', 'anyAtomicType').
   37xsd_simpleType_inheritance('string', 'anyAtomicType').
   38xsd_simpleType_inheritance('gYearMonth', 'anyAtomicType').
   39xsd_simpleType_inheritance('gYear', 'anyAtomicType').
   40xsd_simpleType_inheritance('gMonthDay', 'anyAtomicType').
   41xsd_simpleType_inheritance('gDay', 'anyAtomicType').
   42xsd_simpleType_inheritance('gMonth', 'anyAtomicType').
   43xsd_simpleType_inheritance('boolean', 'anyAtomicType').
   44xsd_simpleType_inheritance('base64Binary', 'anyAtomicType').
   45xsd_simpleType_inheritance('hexBinary', 'anyAtomicType').
   46xsd_simpleType_inheritance('anyURI', 'anyAtomicType').
   47xsd_simpleType_inheritance('QName', 'anyAtomicType').
   48xsd_simpleType_inheritance('NOTATION', 'anyAtomicType').
   49xsd_simpleType_inheritance('yearMonthDuration', 'duration').
   50xsd_simpleType_inheritance('dayTimeDuration', 'duration').
   51xsd_simpleType_inheritance('integer', 'decimal').
   52xsd_simpleType_inheritance('nonPositiveInteger', 'integer').
   53xsd_simpleType_inheritance('long', 'integer').
   54xsd_simpleType_inheritance('nonNegativeInteger', 'integer').
   55xsd_simpleType_inheritance('negativeInteger', 'nonPositiveInteger').
   56xsd_simpleType_inheritance('int', 'long').
   57xsd_simpleType_inheritance('short', 'int').
   58xsd_simpleType_inheritance('byte', 'short').
   59xsd_simpleType_inheritance('unsignedLong', 'nonNegativeInteger').
   60xsd_simpleType_inheritance('positiveInteger', 'nonNegativeInteger').
   61xsd_simpleType_inheritance('unsignedInt', 'unsignedLong').
   62xsd_simpleType_inheritance('unsignedShort', 'unsignedInt').
   63xsd_simpleType_inheritance('unsignedByte', 'unsignedShort').
   64xsd_simpleType_inheritance('normalizedString', 'string').
   65xsd_simpleType_inheritance('token', 'normalizedString').
   66xsd_simpleType_inheritance('language', 'token').
   67xsd_simpleType_inheritance('NMTOKEN', 'token').
   68xsd_simpleType_inheritance('Name', 'token').
   69xsd_simpleType_inheritance('NCName', 'Name').
   70xsd_simpleType_inheritance('ID', 'NCName').
   71xsd_simpleType_inheritance('IDREF', 'NCName').
   72xsd_simpleType_inheritance('ENTITY', 'NCName').
   73
   74xsd_simpleType_is_a(Type, Type).
   75xsd_simpleType_is_a(SubType, SuperType) :-
   76  xsd_simpleType_inheritance(SubType, InterType),
   77  xsd_simpleType_is_a(InterType, SuperType).
   78
   79
   80/*
   81  TYPE VALIDATION
   82  validate_xsd_simpleType(Type, Value)
   83  --> validates `Value` against (XML-Schema) type `Type`
   84*/
   85
   86% top of hierarchy (semantically equivalent in our case, but required by specification)
   87validate_xsd_simpleType('anyType', V) :-
   88  nonvar(V).
   89validate_xsd_simpleType(T, V) :-
   90  T = 'anySimpleType',
   91  xsd_simpleType_inheritance(T, ST),
   92  validate_xsd_simpleType(ST, V).
   93validate_xsd_simpleType(T, V) :-
   94  T = 'untyped',
   95  xsd_simpleType_inheritance(T, ST),
   96  validate_xsd_simpleType(ST, V).
   97
   98% non atomic types
   99validate_xsd_simpleType('IDREFS', V) :-
  100  split_string(V, " ", "", List),
  101  length(List, Length),
  102  Length > 0,
  103  validate_xsd_simpleType_list('IDREF', List).
  104validate_xsd_simpleType('NMTOKENS', V) :-
  105  split_string(V, " ", "", List),
  106  length(List, Length),
  107  Length > 0,
  108  validate_xsd_simpleType_list('NMTOKEN', List).
  109validate_xsd_simpleType('ENTITIES', V) :-
  110  split_string(V, " ", "", List),
  111  length(List, Length),
  112  Length > 0,
  113  validate_xsd_simpleType_list('ENTITY', List).
  114
  115% atomic types
  116validate_xsd_simpleType(T, V) :-
  117  T = 'anyAtomicType',
  118  xsd_simpleType_inheritance(T, ST),
  119  validate_xsd_simpleType(ST, V).
  120validate_xsd_simpleType(T, V) :-
  121  T = 'untypedAtomic',
  122  xsd_simpleType_inheritance(T, ST),
  123  validate_xsd_simpleType(ST, V).
  124validate_xsd_simpleType(T, V) :-
  125  T = 'dateTime',
  126  xsd_simpleType_inheritance(T, ST),
  127  validate_xsd_simpleType(ST, V),
  128  V =~ '^-?([1-9][0-9]*)?[0-9]{4}-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])T(([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](\\.[0-9]+)?|24:00:00(\\.0+)?)((\\+|-)(14:00|1[0-3]:[0-5][0-9]|0[0-9]:[0-5][0-9])|Z)?$'.
  129validate_xsd_simpleType(T, V) :-
  130  T = 'date',
  131  xsd_simpleType_inheritance(T, ST),
  132  validate_xsd_simpleType(ST, V),
  133  V =~ '^-?([1-9][0-9]*)?[0-9]{4}-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])((\\+|-)(14:00|1[0-3]:[0-5][0-9]|0[0-9]:[0-5][0-9])|Z)?$'.
  134validate_xsd_simpleType(T, V) :-
  135  T = 'time',
  136  xsd_simpleType_inheritance(T, ST),
  137  validate_xsd_simpleType(ST, V),
  138  V =~ '^(([01][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9](\\.[0-9]+)?|24:00:00(\\.0+)?)((\\+|-)(14:00|1[0-3]:[0-5][0-9]|0[0-9]:[0-5][0-9])|Z)?$'.
  139validate_xsd_simpleType(T, V) :-
  140  T = 'float',
  141  xsd_simpleType_inheritance(T, ST),
  142  validate_xsd_simpleType(ST, V),
  143  % TODO: validate value range (32bit)
  144  V =~ '^((\\+|-)?([0-9]+(\\.[0-9]*)?|\\.[0-9]+)([Ee](\\+|-)?[0-9]+)?|(\\+|-)?INF|NaN)$'.
  145validate_xsd_simpleType(T, V) :-
  146  T = 'double',
  147  xsd_simpleType_inheritance(T, ST),
  148  validate_xsd_simpleType(ST, V),
  149  % TODO: validate value range (64bit)
  150  validate_xsd_simpleType('float', V).
  151validate_xsd_simpleType(T, V) :-
  152  T = 'gYearMonth',
  153  xsd_simpleType_inheritance(T, ST),
  154  validate_xsd_simpleType(ST, V),
  155  V =~ '^-?([1-9][0-9]{3,}|0[0-9]{3})-(0[1-9]|1[0-2])(Z|(\\+|-)((0[0-9]|1[0-3]):[0-5][0-9]|14:00))?$'.
  156validate_xsd_simpleType(T, V) :-
  157  T = 'gYear',
  158  xsd_simpleType_inheritance(T, ST),
  159  validate_xsd_simpleType(ST, V),
  160  V =~ '^-?([1-9][0-9]{3,}|0[0-9]{3})(Z|(\\+|-)((0[0-9]|1[0-3]):[0-5][0-9]|14:00))?$'.
  161validate_xsd_simpleType(T, V) :-
  162  T = 'gMonthDay',
  163  xsd_simpleType_inheritance(T, ST),
  164  validate_xsd_simpleType(ST, V),
  165  V =~ '^--((0[1-9]|1[0-2])-([01][1-9]|10|2[0-8]))|((0[13-9]|1[0-2])-(29|30))|((0[13578]|1[0-2])/31)(Z|(\\+|-)((0[0-9]|1[0-3]):[0-5][0-9]|14:00))?$'.
  166validate_xsd_simpleType(T, V) :-
  167  T = 'gDay',
  168  xsd_simpleType_inheritance(T, ST),
  169  validate_xsd_simpleType(ST, V),
  170  V =~ '^---(0[1-9]|[12][0-9]|3[01])(Z|(\\+|-)((0[0-9]|1[0-3]):[0-5][0-9]|14:00))?$'.
  171validate_xsd_simpleType(T, V) :-
  172  T = 'gMonth',
  173  xsd_simpleType_inheritance(T, ST),
  174  validate_xsd_simpleType(ST, V),
  175  V =~ '^--(0[1-9]|1[0-2])(Z|(\\+|-)((0[0-9]|1[0-3]):[0-5][0-9]|14:00))?$'.
  176validate_xsd_simpleType(T, V) :-
  177  T = 'boolean',
  178  xsd_simpleType_inheritance(T, ST),
  179  validate_xsd_simpleType(ST, V),
  180  facet(enumeration, ['true', 'false', '1', '0'], V).
  181validate_xsd_simpleType(T, V) :-
  182  T = 'base64Binary',
  183  xsd_simpleType_inheritance(T, ST),
  184  validate_xsd_simpleType(ST, V),
  185  V =~ '^((([A-Za-z0-9+/] ?){4})*(([A-Za-z0-9+/] ?){3}[A-Za-z0-9+/]|([A-Za-z0-9+/] ?){2}[AEIMQUYcgkosw048] ?=|[A-Za-z0-9+/] ?[AQgw] ?= ?=))?$'.
  186validate_xsd_simpleType(T, V) :-
  187  T = 'hexBinary',
  188  xsd_simpleType_inheritance(T, ST),
  189  validate_xsd_simpleType(ST, V),
  190  V =~ '^([0-9a-fA-F]{2})*$'.
  191validate_xsd_simpleType(T, V) :-
  192  T = 'anyURI',
  193  xsd_simpleType_inheritance(T, ST),
  194  validate_xsd_simpleType(ST, V),
  195  % whoever has to debug the following regex is a poor sod
  196  V =~ '^([a-zA-Z][a-zA-Z0-9+\\-.]*:(((//)?((([a-zA-Z0-9\\-._~!$&()*+,;=:]|(%[0-9a-fA-F][0-9a-fA-F]))*@)?((\\[(([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)|([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?:|([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?:([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)|([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?|([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?|([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?|([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?:)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?|([0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?):((:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?)|:((:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?(:[0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?[0-9a-fA-F]?)?|:))])|(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]).([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]).([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]).([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))|([a-zA-Z0-9\\-._~!$&()*+,;=]|(%[0-9a-fA-F][0-9a-fA-F]))*)(:[0-9]*)?))((/([a-zA-Z0-9\\-._~!$&()*+,;=:@]|(%[0-9a-fA-F][0-9a-fA-F]))*)*|/(([a-zA-Z0-9\\-._~!$&()*+,;=:@]|(%[0-9a-fA-F][0-9a-fA-F]))(/([a-zA-Z0-9\\-._~!$&()*+,;=:@]|(%[0-9a-fA-F][0-9a-fA-F]))*)*)?|([a-zA-Z0-9\\-._~!$&()*+,;=:@]|(%[0-9a-fA-F][0-9a-fA-F]))(/([a-zA-Z0-9\\-._~!$&()*+,;=:@]|(%[0-9a-fA-F][0-9a-fA-F]))*)*))(\\?([a-zA-Z0-9\\-._~!$&()*+,;=:@/?]|(%[0-9a-fA-F][0-9a-fA-F]))*)?(#([a-zA-Z0-9\\-._~!$&()*+,;=:@/?]|(%[0-9a-fA-F][0-9a-fA-F]))*)?)$'.
  197validate_xsd_simpleType(T, V) :-
  198  T = 'QName',
  199  xsd_simpleType_inheritance(T, ST),
  200  validate_xsd_simpleType(ST, V),
  201  atom_string(V, VS),
  202  split_string(VS, ":", "", VL), %[{"<Prefix>",} "<LocalPart>"]
  203  length(VL, VLL),
  204  (
  205    % value has no prefix
  206    (
  207      VLL =:= 1,
  208      VL = [LocalPart],
  209      !,
  210      validate_xsd_simpleType('NCName', LocalPart)
  211    );
  212
  213    % otherwise fraction digit length must be validated
  214    (
  215      VLL =:= 2,
  216      VL = [Prefix, LocalPart],
  217      !,
  218      validate_xsd_simpleType('NCName', Prefix),
  219      validate_xsd_simpleType('NCName', LocalPart)
  220    )
  221  ).
  222validate_xsd_simpleType(T, V) :-
  223  T = 'NOTATION',
  224  xsd_simpleType_inheritance(T, ST),
  225  % NOTATIONs share the same lexical space as QNames
  226  validate_xsd_simpleType(ST, V),
  227  validate_xsd_simpleType('QName', V).
  228
  229% durations
  230validate_xsd_simpleType(T, V) :-
  231  T = 'duration',
  232  xsd_simpleType_inheritance(T, ST),
  233  validate_xsd_simpleType(ST, V),
  234  V =~ '^-?P([0-9]+Y)?([0-9]+M)?([0-9]+D)?(T([0-9]+H)?([0-9]+M)?([0-9]+(\\.[0-9]+)?S)?)?$', % general regexp
  235  V =~ '^.*[YMDHS].*$', % at least one of the properties (year, month, day, hour, minute or second) must be specified
  236  V =~ '^.*[^T]$'. % if there is a 'T' (separator between day and time properties), it must be followed by a time property (hour, minute or second)
  237validate_xsd_simpleType(T, V) :-
  238  T = 'yearMonthDuration',
  239  xsd_simpleType_inheritance(T, ST),
  240  validate_xsd_simpleType(ST, V),
  241  V =~ '^[^DT]*$'. % only durations with year and/or month properties are allowed
  242validate_xsd_simpleType(T, V) :-
  243  T = 'dayTimeDuration',
  244  xsd_simpleType_inheritance(T, ST),
  245  validate_xsd_simpleType(ST, V),
  246  V =~ '^[^YM]*[DT].*$'. % only durations with day, hour, minute and/or second properties are allowed
  247
  248% decimals
  249validate_xsd_simpleType(T, V) :-
  250  T = 'decimal',
  251  xsd_simpleType_inheritance(T, ST),
  252  validate_xsd_simpleType(ST, V),
  253  V =~ '^((\\+|-)?([0-9]+(\\.[0-9]*)?|\\.[0-9]+))$'.
  254validate_xsd_simpleType(T, V) :-
  255  T = 'integer',
  256  xsd_simpleType_inheritance(T, ST),
  257  validate_xsd_simpleType(ST, V),
  258  V =~ '^(\\+|-)?[0-9]+$'.
  259% non positive integers
  260validate_xsd_simpleType(T, V) :-
  261  T = 'nonPositiveInteger',
  262  xsd_simpleType_inheritance(T, ST),
  263  validate_xsd_simpleType(ST, V),
  264  facet(maxInclusive, 0, V).
  265validate_xsd_simpleType(T, V) :-
  266  T = 'negativeInteger',
  267  xsd_simpleType_inheritance(T, ST),
  268  validate_xsd_simpleType(ST, V),
  269  facet(maxInclusive, -1, V).
  270% longs
  271validate_xsd_simpleType(T, V) :-
  272  T = 'long',
  273  xsd_simpleType_inheritance(T, ST),
  274  validate_xsd_simpleType(ST, V),
  275  facet(minInclusive, -9223372036854775808, V),
  276  facet(maxInclusive,  9223372036854775807, V).
  277validate_xsd_simpleType(T, V) :-
  278  T = 'int',
  279  xsd_simpleType_inheritance(T, ST),
  280  validate_xsd_simpleType(ST, V),
  281  facet(minInclusive, -2147483648, V),
  282  facet(maxInclusive,  2147483647, V).
  283validate_xsd_simpleType(T, V) :-
  284  T = 'short',
  285  xsd_simpleType_inheritance(T, ST),
  286  validate_xsd_simpleType(ST, V),
  287  facet(minInclusive, -32768, V),
  288  facet(maxInclusive,  32767, V).
  289validate_xsd_simpleType(T, V) :-
  290  T = 'byte',
  291  xsd_simpleType_inheritance(T, ST),
  292  validate_xsd_simpleType(ST, V),
  293  facet(minInclusive, -128, V),
  294  facet(maxInclusive,  127, V).
  295% non negative integers
  296validate_xsd_simpleType(T, V) :-
  297  T = 'nonNegativeInteger',
  298  xsd_simpleType_inheritance(T, ST),
  299  validate_xsd_simpleType(ST, V),
  300  facet(minInclusive, 0, V).
  301% unsigned longs
  302validate_xsd_simpleType(T, V) :-
  303  T = 'unsignedLong',
  304  xsd_simpleType_inheritance(T, ST),
  305  validate_xsd_simpleType(ST, V),
  306  facet(minInclusive, 0, V),
  307  facet(maxInclusive, 18446744073709551615, V).
  308validate_xsd_simpleType(T, V) :-
  309  T = 'unsignedInt',
  310  xsd_simpleType_inheritance(T, ST),
  311  validate_xsd_simpleType(ST, V),
  312  facet(minInclusive, 0, V),
  313  facet(maxInclusive, 4294967295, V).
  314validate_xsd_simpleType(T, V) :-
  315  T = 'unsignedShort',
  316  xsd_simpleType_inheritance(T, ST),
  317  validate_xsd_simpleType(ST, V),
  318  facet(minInclusive, 0, V),
  319  facet(maxInclusive, 65535, V).
  320validate_xsd_simpleType(T, V) :-
  321  T = 'unsignedByte',
  322  xsd_simpleType_inheritance(T, ST),
  323  validate_xsd_simpleType(ST, V),
  324  facet(minInclusive, 0, V),
  325  facet(maxInclusive, 255, V).
  326% positive integers
  327validate_xsd_simpleType(T, V) :-
  328  T = 'positiveInteger',
  329  xsd_simpleType_inheritance(T, ST),
  330  validate_xsd_simpleType(ST, V),
  331  facet(minInclusive, 1, V).
  332
  333% strings
  334validate_xsd_simpleType(T, V) :-
  335  T = 'string',
  336  xsd_simpleType_inheritance(T, ST),
  337  validate_xsd_simpleType(ST, V),
  338  (
  339    atom(V), atom_string(V, S)
  340  ;
  341    string(V), V = S
  342  ),
  343  string_codes(S, CL),
  344  forall(
  345    member(C, CL),
  346    validate_xsd_character(C)
  347  ).
  348validate_xsd_simpleType(T, V) :-
  349  T = 'normalizedString',
  350  xsd_simpleType_inheritance(T, ST),
  351  validate_xsd_simpleType(ST, V),
  352  V \~ '[\f|\r|\t]'.
  353validate_xsd_simpleType(T, V) :-
  354  T = 'token',
  355  xsd_simpleType_inheritance(T, ST),
  356  validate_xsd_simpleType(ST, V),
  357  V \~ '^[ ]',
  358  V \~ '[ ]$',
  359  V \~ '[ ]{2,}'.
  360validate_xsd_simpleType(T, V) :-
  361  T = 'language',
  362  xsd_simpleType_inheritance(T, ST),
  363  validate_xsd_simpleType(ST, V),
  364  V =~ '^[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*$'.
  365validate_xsd_simpleType(T, V) :-
  366  T = 'NMTOKEN',
  367  xsd_simpleType_inheritance(T, ST),
  368  validate_xsd_simpleType(ST, V),
  369  string_codes(V, CL),
  370  length(CL, CLL),
  371  CLL >= 1,
  372  forall(
  373    member(C, CL),
  374    validate_xsd_name_character(C)
  375  ).
  376validate_xsd_simpleType(T, V) :-
  377  T = 'Name',
  378  xsd_simpleType_inheritance(T, ST),
  379  validate_xsd_simpleType(ST, V),
  380  string_codes(V, CL),
  381  CL = [H|R],
  382  validate_xsd_name_start_character(H),
  383  forall(
  384    member(C, R),
  385    validate_xsd_name_character(C)
  386  ).
  387validate_xsd_simpleType(T, V) :-
  388  T = 'NCName',
  389  xsd_simpleType_inheritance(T, ST),
  390  validate_xsd_simpleType(ST, V),
  391  string_codes(V, CL),
  392  forall(
  393    member(C, CL),
  394    C =\= 58 % [ : ], as NCName is a not colonized Name
  395  ).
  396validate_xsd_simpleType('ID', V) :-
  397  % same value space as NCName plus the following restrictions, which are not validated here:
  398  %  - IDs must be unique within an XML instance
  399  %  - a complex type may not have more than one attribute with a from ID derived type
  400  %  - ID attributes cannot have a default or fixed value
  401  validate_xsd_simpleType('NCName', V).
  402validate_xsd_simpleType('IDREF', V) :-
  403  % same value space as NCName plus the following restriction, which is not validated here:
  404  %  - each IDREF must have a corresponding ID in the same XML instance
  405  validate_xsd_simpleType('NCName', V).
  406validate_xsd_simpleType('ENTITY', V) :-
  407  % same value space as NCName plus the following restriction, which is not validated here:
  408  %  - each ENTITY must match the name of an unparsed entity in a document type definition for the instance
  409  validate_xsd_simpleType('NCName', V).
  410
  411validate_xsd_simpleType(T, _) :-
  412  check_for_single(T).
  413
  414
  415/*
  416  validate_xsd_simpleType_list(Type, List)
  417  --> validates every item in `List` against (XML-Schema) type `Type`
  418*/
  419validate_xsd_simpleType_list(_, []).
  420validate_xsd_simpleType_list(Type, [H|T]) :-
  421  validate_xsd_simpleType(Type, H),
  422  validate_xsd_simpleType_list(Type, T).
  423
  424check_for_single(T) :-
  425  \+((clause(validate_xsd_simpleType(T,_), B), B \= check_for_single(_))),
  426  !,
  427  warning('Type ~w is not yet supported.', [T]),
  428  false.
  429
  430
  431/*
  432  FACETS
  433*/
  434facet(enumeration, List, V) :-
  435  !,
  436  member(V, List).
  437facet(maxInclusive, Max, V) :-
  438  !,
  439  number(Max, Max_),
  440  number(V, V_),
  441  V_ =< Max_.
  442facet(maxExclusive, Max, V) :-
  443  !,
  444  number(Max, Max_),
  445  number(V, V_),
  446  V_ < Max_.
  447facet(minInclusive, Min, V) :-
  448  !,
  449  number(Min, Min_),
  450  number(V, V_),
  451  V_ >= Min_.
  452facet(minExclusive, Min, V) :-
  453  !,
  454  number(Min, Min_),
  455  number(V, V_),
  456  V_ > Min_.
  457facet(pattern, Pattern, V) :-
  458  !,
  459  regex(Pattern, [], V, _).
  460facet(length, Length, V) :-
  461  !,
  462  number(Length, Length_),
  463  atom_length(V, Length_).
  464facet(minLength, Length, V) :-
  465  !,
  466  number(Length, Length_),
  467  atom_length(V, V_Length),
  468  V_Length >= Length_.
  469facet(maxLength, Length, V) :-
  470  !,
  471  number(Length, Length_),
  472  atom_length(V, V_Length),
  473  V_Length =< Length_.
  474facet(fractionDigits, MaxLength, Value) :-
  475  !,
  476  validate_xsd_simpleType(nonNegativeInteger, MaxLength),
  477  split_string(Value, ".eE", "", ValueParts), %["<integer_digits>", "<fraction_digits>", [...]]
  478  length(ValueParts, ValuePartsLength),
  479  (
  480    % value has no fraction digits, so restriction is fulfilled
  481    ValuePartsLength < 2
  482  ;
  483    % otherwise fraction digit length must be validated
  484    (
  485      number(MaxLength, MaxFractionDigitLength),
  486      ValueParts = [_, FractionDigits|_],
  487      digit_length_fraction_part(FractionDigits, FractionDigitLength),
  488      !,
  489      FractionDigitLength =< MaxFractionDigitLength
  490    )
  491  ).
  492facet(totalDigits, _, Value) :-
  493  Value =~ '^(\\+|-)?INF|NaN$'.
  494facet(totalDigits, MaxLength, Value) :-
  495  !,
  496  validate_xsd_simpleType(positiveInteger, MaxLength),
  497  number(MaxLength, MaxDigitLength),
  498  split_string(Value, ".eE", "", ValueParts), %["<integer_digits>", "<fraction_digits>", [...]]
  499  length(ValueParts, ValuePartsLength),
  500  (
  501    (
  502      % value has only integer digits
  503      ValuePartsLength =:= 1,
  504      ValueParts = [IntDigits|_],
  505      digit_length_integer_part(IntDigits, DigitLength)
  506    )
  507  ;
  508    (
  509      % value has both integer and fraction digits
  510      ValuePartsLength =:= 2,
  511      ValueParts = [IntDigits,FractionDigits|_],
  512      digit_length_integer_part(IntDigits, IntDigitsLength),
  513      digit_length_fraction_part(FractionDigits, FractionDigitsLength),
  514      DigitLength is IntDigitsLength + FractionDigitsLength
  515    )
  516  ),
  517  !,
  518  DigitLength =< MaxDigitLength.
  519
  520facet(Facet, _, _) :-
  521  !,
  522  warning('Facet ~w is not yet supported.', [Facet]),
  523  fail.
  524
  525
  526/*
  527  HELPER FUNCTIONS
  528*/
  529
  530number(In, In) :-
  531  number(In),
  532  !.
  533number(In, Out) :-
  534  atom_number(In, Out).
  535
  536% returns the length of significant integer digits
  537digit_length_integer_part(IntegerDigitString, IntegerDigitLength) :-
  538  % remove insignificant leading zeroes
  539  string_to_list(IntegerDigitString, IntegerDigitList),
  540  remove_leading_zeroes(IntegerDigitList, SanitizedIntegerDigitList),
  541  length(SanitizedIntegerDigitList, SanitizedIntegerDigitListLength),
  542
  543  % if we removed all digits, then we removed a significant zero
  544  ( SanitizedIntegerDigitListLength =:= 0 ->
  545    IntegerDigitLength = 1
  546  ;
  547    IntegerDigitLength = SanitizedIntegerDigitListLength
  548  ).
  549
  550% returns the length of significant fraction digits
  551digit_length_fraction_part(FractionDigitString, FractionDigitLength) :-
  552  % remove insignificant trailing zeroes
  553  string_to_list(FractionDigitString, FractionDigitList),
  554  reverse(FractionDigitList, ReversedFractionDigitList),
  555  remove_leading_zeroes(ReversedFractionDigitList, SanitizedReversedFractionDigitList),
  556  length(SanitizedReversedFractionDigitList, FractionDigitLength).
  557
  558% removes leading zeroes from a char code list
  559remove_leading_zeroes([], []).
  560remove_leading_zeroes([H|T], [H|T]) :-
  561  H =\= 48. % 48 ='0'
  562remove_leading_zeroes([48|T], T2) :-
  563  remove_leading_zeroes(T, T2).
  564
  565% validates whether a given character is a valid xml character
  566validate_xsd_character(C) :-
  567  % see xml spec for 'char'
  568  %  - unicode values have been translated to prolog character code values)
  569  %  - character values > 65535 are not supported in prolog
  570  (0 =< C, C < 9249) ; % [ #x0 to #x2420 ]
  571  (9249 < C, C < 57347) ; % [ #x2422 to #x20 ]
  572  (57352 < C, C < 57355) ; % [ #x9 to #xA ]
  573  (57356 < C, C < 57358) ; % [ #xD ]
  574  (57476 < C, C < 57478) ; % [ #x85 ]
  575  (57503 < C, C < 64976) ; % [ #xE09F to #xFDCF ]
  576  (64991 < C, C =< 65535). % [ #FDE0 to Prologs Limit ]
  577
  578% validates whether a given character is a valid xml name start character
  579validate_xsd_name_start_character(C) :-
  580  % see xml spec for 'NameStartChar'
  581  (57 < C, C < 59) ; % [ : ]
  582  (64 < C, C < 91) ; % [ A to Z ]
  583  (94 < C, C < 96) ; % [ _ ]
  584  (96 < C, C < 123) ; % [ a to z ]
  585  (192 < C, C < 215) ; % [ #xC0 to #xD6 ]
  586  (215 < C, C < 247) ; % [ #xD8 to #xF6 ]
  587  (247 < C, C < 768) ; % [ #xF8 to #x2FF ]
  588  (879 < C, C < 894) ; % [ #x370 to #x37D ]
  589  (894 < C, C < 8192) ; % [ #x37F to #x1FFF ]
  590  (8203 < C, C < 8206) ; % [ #x200C to #x200D ]
  591  (8303 < C, C < 8592) ; % [ #x2070 to #x218F ]
  592  (11263 < C, C < 12272) ; % [ #x2C00 to #x2FEF ]
  593  (12288 < C, C < 55296) ; % [ #x3001 to #xD7FF ]
  594  (63743 < C, C < 64976) ; % [ #xF900 to #xFDCF ]
  595  (65007 < C, C < 65534). % [ #xFDF0 to #xFFFD ]
  596
  597% validates whether a given character is a valid xml name character
  598validate_xsd_name_character(C) :-
  599  % see xml spec for 'NameChar'
  600  validate_xsd_name_start_character(C) ;
  601  (44 < C, C < 47) ; % [ - to . ]
  602  (47 < C, C < 58) ; % [ 0 to 9 ]
  603  (182 < C, C < 184) ; % [ #xB7 ]
  604  (767 < C, C < 880) ; % [ #x0300 to #x036F ]
  605  (8254 < C, C < 8257). % [ #x203F to #x2040 ]