1:- module(semantria, [ process_document/2
    2                     , queue_document/2
    3                     , request_document/2
    4                     , request/3
    5                     ]).    6
    7:- use_module(library(base64), [base64/2]).    8:- use_module(library(condition)).    9:- use_module(library(error), [must_be/2]).   10:- use_module(library(func)).   11:- use_module(library(http/http_header)).  % needed for POST requests
   12:- use_module(library(http/http_open), [http_open/3]).   13:- use_module(library(http/http_ssl_plugin)).   14:- use_module(library(http/json), [atom_json_term/3, json_read/2]).   15:- use_module(library(random), [random_between/3]).   16:- use_module(library(readutil), [read_stream_to_codes/2]).   17:- use_module(library(sha), [hash_atom/2, hmac_sha/4, sha_hash/3]).   18:- use_module(library(uri), [uri_encoded/3]).   19:- use_module(library(uri_qq)).
 consumer_key(Key:text)
Add a clause to this predicate to specify your Semantria API "consumer key".
   25:- multifile consumer_key/1.   26:- dynamic consumer_key/1.
 secret_key_md5(MD5:text)
Add a clause to this predicate to specify the MD5 hash of your Semantria API "secret key". The hash should use lower case hexadecimal encoding.

This hack is necessary because SWI Prolog doesn't seem to have a good MD5 implementation. Run `md5sum $secret_key` at a command prompt to get the value you need.

   37:- multifile secret_key_md5/1.   38:- dynamic secret_key_md5/1.   39
   40
   41% the base URL for all API requests to Semantria
   42api_base('https://api35.semantria.com/').
 process_document(+Document:string, -Response:dict)
Queue Document for processing and block until a Response is available. This predicate is a synchronous convenience on top of Semantria's asynchronous API. A document ID is generated based on the document's content. Calling process_document/2 on a document that has already been processed returns immediately (using results cached on Semantria's server).
   54process_document(Document, Response) :-
   55    document_id(Document, Id),
   56    process_document_(Document, Id, 10, Response).
   57
   58process_document_(_, Id, Tries, Response) :-
   59    handle( request_document(Id, Response0)
   60          , error(_,context(_,status(404,_)))
   61          , fail
   62          ),
   63    !,
   64    Status = Response0.status,
   65    ( Status == "PROCESSED" ->
   66        Response = Response0
   67    ; Status == "QUEUED" ->
   68        poll_document(Id, Tries, Response)
   69    ; Status == "FAILED" ->
   70        throw("Semantria document processing failed")
   71    ; % otherwise ->
   72        must_be(one_of(["PROCESSED","QUEUED","FAILED"]), Status)
   73    ).
   74process_document_(Document, Id, Tries, Response) :-
   75    queue_document(Document, Id),
   76    process_document_(Document, Id, Tries, Response).
   77
   78poll_document(Id, Tries0, Response) :-
   79    sleep(1),  % give Semantria a chance to finish its work
   80    Tries is Tries0 - 1,
   81    ( Tries =< 0 -> throw("Too many retries") ; true ),
   82    process_document_(_, Id, Tries, Response).
 queue_document(+Document:string, ?Id:string) is det
Add Document to Semantria's queue for processing. If Id is ground, use that as the document's Id; otherwise, bind Id to a unique identifier for Document. Generated identifiers are guaranteed to be stable across invocations.
   90queue_document(Document, Id) :-
   91    % prepare arguments
   92    must_be(string, Document),
   93    document_id(Document, Id),
   94
   95    % submit document to Semantria
   96    Details = _{ id: Id, text: Document },
   97    request(post(Details), document, _).
   98
   99document_id(_, Id) :-
  100    ground(Id),
  101    !.
  102document_id(Document, Id) :-
  103    sha_hash(Document,HashBytes,[]),
  104    hash_atom(HashBytes, IdLong),
  105    sub_atom(IdLong, 0, 32, _, IdShort),  % 32 char max per API docs
  106    atom_string(IdShort, Id).
 request_document(+Id:string, -Response:dict)
Request details about an already-queued document with Id. This predicate is usually called after calling queue_document/2.
  115request_document(Id, Response) :-
  116    request(get, document/Id, Response).
  117
  118
  119%% request(+Method, +Path, Response:dict)
  120%
  121%  Low-level predicate for making authenticated API calls. Method
  122%  specifies the HTTP method. Path indicates the path. It can be an
  123%  atom or a term (like `document/some_document_id`). For example,
  124%
  125%      ?- request(get, status, R).
  126%      R = _{api_version:"3.5", ...} .
  127%
  128%  For a POST request, make `Method=post(Dict)`. The Dict is converted
  129%  into a JSON object and included as the request body.
  130request(Method, Path, Response) :-
  131    sign_request('~w.json' $ Path, _{}, Url, Auth),
  132    debug(semantria, "request URL: ~s~n", [Url]),
  133    catch( request_open(Method, Url, Auth, Stream)
  134         , E
  135         , failable_exception(E)
  136         ),
  137    json_read(Stream, Json),
  138    json_to_dict(Json, Response).
  139
  140request_open(get, Url, Auth, Stream) :-
  141    http_open( Url
  142             , Stream
  143             , [ method(get)
  144               , request_header(authorization=Auth)
  145               , cert_verify_hook(ssl_verify)
  146               ]
  147             ),
  148    set_stream(Stream, encoding(utf8)).
  149request_open(post(Dict), Url, Auth, Stream) :-
  150    % convert Dict to JSON
  151    dict_pairs(Dict, json, Pairs0),
  152    maplist(eq_dash, Pairs, Pairs0),
  153    atom_json_term(Json, json(Pairs), [as(atom)]),
  154    debug(semantria, "request JSON body: ~s~n", [Json]),
  155
  156    % POST JSON to Semantria
  157    http_open( Url
  158             , Stream
  159             , [ post(atom(application/json, Json))
  160               , request_header(authorization=Auth)
  161               , status_code(202)
  162               , cert_verify_hook(ssl_verify)
  163               ]
  164             ).
  165
  166
  167eq_dash(K=V,K-V).
  168
  169
  170% accept all SSL certificates
  171ssl_verify( _SSL
  172          , _ProblemCertificate
  173          , _AllCertificates
  174          , _FirstCertificate
  175          , _Error
  176          ).
  177
  178
  179% convert an exception into a signal which can either fail or rethrow.
  180% this is convenient for converting predicates that throw exceptions
  181% into predicates that raise signals.
  182% maybe it'd be convenient to have call_signal/2 which is like call/1
  183% but automatically uses this predicate to convert exceptions into
  184% signals.
  185failable_exception(E) :-
  186    ( signal(E, Restart) ->
  187        ( Restart == fail ->
  188            fail
  189        ; % unexpected restart ->
  190            must_be(one_of([fail]), Restart)
  191        )
  192    ; % signal not handled ->
  193        throw(E)
  194    ).
  195
  196
  197
  198json_to_dict(json(EqPairs), Dict) :-
  199    !,
  200    maplist(json_pair, EqPairs, DashPairs),
  201    dict_pairs(Dict, _, DashPairs).
  202json_to_dict(Term, Term).
  203
  204json_pair(Key=Value0, Key-Value) :-
  205    ( atom(Value0) ->
  206        atom_string(Value0, Value)
  207    ; Value0=json(_) ->
  208        json_to_dict(Value0, Value)
  209    ; is_list(Value0) ->
  210        maplist(json_to_dict, Value0, Value)
  211    ; true ->
  212        Value = Value0
  213    ).
  214
  215
  216% generate the URL and Authorization header that's needed for making
  217% requests to Semantria. documentation for this process is available
  218% at https://semantria.com/developer The written docs are somewhat poor,
  219% so it's best to consult the various SDKs and their source code to
  220% resolve questions.
  221sign_request(Path, Params0, Url, Authorization) :-
  222    % preliminaries
  223    api_base(Base),
  224    nonce(Nonce),
  225    now(Now),
  226
  227    % build "Signature Base String"
  228    Extra = [ oauth_consumer_key=consumer_key(~)
  229            , oauth_nonce=Nonce
  230            , oauth_signature_method="HMAC-SHA1"
  231            , oauth_timestamp=Now
  232            , oauth_version=1.0
  233            ],
  234    put_dict(Extra, Params0, Params),
  235    Url = {|uri(Base)||$Path?$Params|},
  236
  237    % build HMAC-SHA1 signature
  238    secret_key_md5(Key),
  239    hmac_sha(Key, uri_encode $ Url, SignatureBytes, [algorithm(sha1)]),
  240    base64(atom_codes(~,SignatureBytes), Signature64),
  241    uri_encode(Signature64, Signature),
  242
  243    % build Authorization header
  244    authorization_header([ oauth_signature=Signature
  245                         | Extra
  246                         ]
  247                        , Authorization
  248                        ).
  249
  250
  251% comma-separated Authorization header
  252authorization_header(Values, Header) :-
  253    maplist(quote_headerval, Values, Parts),
  254    atomic_list_concat(Parts, ', ', PartialHeader),
  255    Header = 'OAuth, ~s' $ PartialHeader.
  256
  257quote_headerval(Key=Value,Auth) :-
  258    Auth = '~s="~w"' $ [Key,Value].
  259
  260
  261% generate a large random integer
  262nonce(Nonce) :-
  263    random_between(1,18_446_744_073_709_551_616,Nonce).
  264
  265
  266% current time in integer seconds since the epoch
  267now(T) :-
  268    get_time(Tfloat),
  269    T is round(Tfloat).
  270
  271
  272% encode URI values as Semantria expects.
  273% uri_encoded/3 doesn't encode :, / or ? characters.
  274uri_encode(Value, Encoded) :-
  275    uri_encoded(query_value, Value, E0),
  276    atom_codes(E0, E1),
  277    once(phrase(enc, E1, E2)),
  278    atom_codes(Encoded, E2).
  279
  280enc, "%3A" --> ":", enc.
  281enc, "%2F" --> "/", enc.
  282enc, "%3F" --> "?", enc.
  283enc, [C] --> [C], enc.
  284enc --> { true }