1:-module(pro2sql,[
2 pro2sql/2,
3 pro2sql/3,
4 nondot_str/2,
5 concat_to_atom/2,
6 concat_to_atom/3,
7 member_dif/2,
8 load_csv/3,
9 load_csv/4,
10 bq/4,
11 op(800,fy,distinct),
12 op(800,fy,all)
13]).
141:- use_module('./file_path_name_ext.pro'). 142 143:- module_transparent 144 head_sql/9, 145 headargs_sql/9, 146 clauses_sql/8, 147 clause_sql/8, 148 args_sql/4, 149 pro2sql/2, 150 pro2sql/3, 151 nondot_str/2, 152 concat_to_atom/2, 153 concat_to_atom/3, 154 member_dif/2, 155 load_csv/3, 156 load_csv/4, 157 bq/4. 158 159:- op(800,fy,distinct). 160:- op(800,fy,all). 161:- set_flag(regexp,'REGEXP_CONTAINS (~w, r\'~w\')').
table(tablename).
Example. Note: we're using assert here from the prolog prompt, but table_def/3 can
be asserted from a prolog file like any other predicate. Also, load_csv/2 will
automatically create a table_dev/3.
:- assert(table_def(person,[id,name,age],[prefix(tiny),table(people)])).
179:- dynamic table_def/3. 180:- multifile table_def/3.
Example
:- assert(table_def(person,[id,name,age],[prefix(tiny),table(people)])).
:- assert( (adults(Name,Age):- person(_,Name,Age), Age >=21) ).
:- pro2sql(adults(Name,Age), SQL).
Name = 'people.name',
Age = 'people.age',
SQL = 'SELECT people.name,people.age FROM tiny.people WHERE people.age >= 21'
:- assert( (teens(Name,Age):- person(_,Name,Age), between(16,21,Age), \+ member(Name,[john,jane])) ).
:- pro2sql(teens(Name,Age),SQL).
Name = 'people.name',
Age = 'people.age',
SQL = 'SELECT people.name,people.age FROM tiny.people WHERE people.age BETWEEN 16 AND 21 AND NOT ( people.name IN ("john","jane") )' .
203pro2sql(Head,SQL):- 204 context_module(CM), 205 pro2sql(CM,Head,SQL). 206pro2sql(CM,Head,SQL):- 207 Head =.. [Func|Args], 208 length(Args,N), 209 current_predicate(Func/N), 210 CM:clause(Head,Body), 211 head_sql(Head,S^S,G^G,H^H,R^R,S1^ST,Gro^[],Hav^[],Ord^[]), 212 clauses_sql(CM,Body,S1^ST,F^F,W^W,Sel^[],Fro^[],Whe^[]), 213 concat_to_atom(Sel,',',Select), 214 concat_to_atom(Fro,',',From), 215 ( Whe=[] -> Where=''; concat_to_atom([' WHERE'|Whe],' ',Where) ), 216 ( Gro=[] -> Group=''; concat_to_atom(Gro,',',Grou), format(atom(Group),' GROUP BY ~w',[Grou])), 217 ( Hav=[] -> Having=''; concat_to_atom(Hav,',',Havi), format(atom(Group),' HAVING ~w',[Havi])), 218 ( Ord=[] -> Order=''; concat_to_atom(Ord,',',Orde), format(atom(Order),' ORDER BY ~w',[Orde])), 219 format(atom(SQL),'SELECT ~w FROM ~w~w~w~w~w',[Select,From,Where,Group,Having,Order]).
Example
:- head_sql(name_age_query(Name,Age),S^S,G^G,R^R,Select^NST,Group^NGT,Order^NRT). S=Select, Select = [Name,Age|NST] :- head_sql(company_age(group(Company),avg(Age)),S^S,G^G,R^R,Select^NST,Group^NGT,Order^R). S=Select, Select = [Company,avg(Age)] Group = [Company]
239head_sql(Head,S^ST,G^GT,H^HT,R^RT,S^NST,G^NGT,H^NHT,R^NRT):-
240 Head =.. [_|Args],
241 headargs_sql(Args,S^ST,G^GT,H^HT,R^RT,S^NST,G^NGT,H^NHT,R^NRT).Argument types and translations:
order(Expr) - (Expr) added to the Select list and to the Order list.group(Var) - Var is added to the Select and Group listhaving(Expr) - (Expr) is added to the Having listmin(Expr) - translated as min(Expr)max(Expr) - translated as max(Expr)count(Expr) - translated as count(Expr)sum(Expr) - translated as sum(Expr)avg(Expr) - translated as avg(Expr):- headargs_sql([group(Company),avg(Age)],S^S,G^G,R^R,Select^NST,Group^NGT,Order^NRT). S=Select, Select=[Company,avg(Age)|NST] G=Group, Group=[Company|NGT] R=Order, Order=NRT
@arg Args List of arguments from the head of a query predicate @arg S^ST Initial difference list of arguments for the SELECT clause @arg Select^NST Resulting diffence list of arguments for the SELECT clause @arg G^GT Initial difference list of arguments for the GROUP clause @arg Group^NGT Resulting diffence list of arguments for the GROUP clause @arg H^HT Initial difference list of arguments for the HAVING clause @arg Having^NHT Resulting diffence list of arguments for the HAVING clause @arg R^RT Initial difference list of arguments for the ORDER clause @arg Order^NRT Resulting diffence list of arguments for the ORDER clause
276headargs_sql([],S^ST,G^GT,H^HT,R^RT,S^ST,G^GT,H^HT,R^RT). 277headargs_sql([A|Args],S^ST,G^GT,H^HT,R^RT,S^NST,G^NGT,H^NHT,R^NRT):- 278 var(A), 279 ST = [A|ST2],!, 280 headargs_sql(Args,S^ST2,G^GT,H^HT,R^RT,S^NST,G^NGT,H^NHT,R^NRT). 281headargs_sql([order(Expr)|Args],S^ST,G^GT,H^HT,R^RT,S^NST,G^NGT,H^NHT,R^NRT):- 282 (member_dif(Expr,S^ST) -> ST=ST2; ST = [Expr|ST2]), 283 RT = [Expr|RT2],!, 284 headargs_sql(Args,S^ST2,G^GT,H^HT,R^RT2,S^NST,G^NGT,H^NHT,R^NRT). 285headargs_sql([group(Expr)|Args],S^ST,G^GT,H^HT,R^RT,S^NST,G^NGT,H^NHT,R^NRT):- 286 (member_dif(Expr,S^ST) -> ST=ST2; ST = [Expr|ST2]), 287 GT = [Expr|GT2],!, 288 headargs_sql(Args,S^ST2,G^GT2,H^HT,R^RT,S^NST,G^NGT,H^NHT,R^NRT). 289headargs_sql([having(Expr)|Args],S^ST,G^GT,H^HT,R^RT,S^NST,G^NGT,H^NHT,R^NRT):- 290 HT = [Expr|HT2],!, 291 headargs_sql(Args,S^ST,G^GT,H^HT2,R^RT,S^NST,G^NGT,H^NHT,R^NRT). 292headargs_sql([F|Args],S^ST,G^GT,H^HT,R^RT,S^NST,G^NGT,H^NHT,R^NRT):- 293 compound(F), 294 F =.. [Func,_Expr], 295 ( member(Func,[min,max,count,sum,avg]) 296 -> ST = [F|ST2] 297 ; ST = [(F)|ST2] 298 ),!, 299 headargs_sql(Args,S^ST2,G^GT,H^HT,R^RT,S^NST,G^NGT,H^NHT,R^NRT). 300headargs_sql([A|Args],S^ST,G^GT,H^HT,R^RT,S^NST,G^NGT,H^NHT,R^NRT):- 301 ST = [A|ST2],!, 302 headargs_sql(Args,S^ST2,G^GT,H^HT,R^RT,S^NST,G^NGT,H^NHT,R^NRT).
Example
:- assert(table_def(person,[id,name,age],[prefix(tiny),table(people)])). :- assert(table_def(employee,[name,company],[prefix(tiny)])). :- clauses_sql((person(_,Name,Age),employee(Name,Company)),[Name,Age,Company|ST]^ST,F^F,W^W, Select^NST,From^NFT,Where^NWT). :- clauses_sql((person(_,Name,Age),Age >= 16),[Name,Age|ST]^ST,F^F,W^W,Select^NST,From^NFT,Where^NWT). Name = 'people.name', Age = 'people.age', ST = NST, F = From, From = ['tiny.people'|NFT], W = Where, Where = ['people.age'>=16|NWT], Select = ['people.name', 'people.age'|NST] .
334% Conjunctive body clauses 335clauses_sql(CM,(Clause,Cs),S^ST,F^FT,W^WT,Select^NST,From^NFT,Where^NWT):- 336 clauses_sql(CM,Clause,S^ST,F^FT,W^WT,S2^ST2,F2^FT2,W2^WT2), 337 ( WT == WT2 -> WT2a=WT2; WT2=['AND'|WT2a]), 338 clauses_sql(CM,Cs,S2^ST2,F2^FT2,W2^WT2a,Select^NST,From^NFT,Where^NWT). 339 340% Disjunctive body clauses 341clauses_sql(CM,(Clause;Cs),S^ST,F^FT,W^WT,Select^NST,From^NFT,Where^NWT):- 342 clauses_sql(CM,Clause,S^ST,F^FT,W^WT,S2^ST2,F2^FT2,W2^WT2), 343 ( WT == WT2 -> WT2a=WT2; WT2=['OR'|WT2a]), 344 clauses_sql(CM,Cs,S2^ST2,F2^FT2,W2^WT2a,Select^NST,From^NFT,Where^NWT). 345 346% Negated body clause 347clauses_sql(CM,(\+ Clause),S^ST,F^FT,W^WT,Select^NST,From^NFT,Where^NWT):- 348 WT = ['NOT ('|WT2], 349 clauses_sql(CM,Clause,S^ST,F^FT,W^WT2,Select^NST,From^NFT,Where^WT2a), 350 WT2a = [')'|NWT]. 351 352% Last clause 353clauses_sql(CM,Clause,S^ST,F^FT,W^WT,Select^NST,From^NFT,Where^NWT):- 354 Clause \= (_,_), Clause \= (_;_), 355 clause_sql(CM,Clause,S^ST,F^FT,W^WT,Select^NST,From^NFT,Where^NWT).
Example
:- assert(table_def(person,[id,name,age],[prefix(tiny),table(people)])). :- clause_sql(person(_,Name,Age),[Name,Age|ST]^ST,F^F,W^W,Select^NST,From^NFT,Where^NWT). NST,From^NFT,Where^NWT). Name = 'people.name', Age = 'people.age', ST = NST, F = From, From = ['tiny.people'|NFT], W = Where, Where = NWT, NWT = [], Select = ['people.name', 'people.age'|NST] .
384% A simple predicate clause, where the predicate functor is defined as a table 385clause_sql(CM,Clause,Select^ST,From^FT,Where^WT,Select^ST,From^NFT,Where^NWT):- 386 Clause =.. [Func|Args], 387 CM:table_def(Func,Fields,Options), 388 ( member(prefix(Pref),Options) -> true; Pref='' ), 389 ( member(table(Tab),Options) -> true; Tab=Func), 390 ( Pref = '' -> Table=Tab; concat_to_atom([Pref,'.',Tab],Table)), 391 FT = [Table|NFT], 392 args_sql(Tab,Fields,Args,Wh), 393 ( Wh=[] 394 -> ( Where=Where, NWT=WT ) 395 ; append(Wh,NWT,WT) 396 ). 397 398% membership constraint clause. ('IN' for SQL) 399clause_sql(_,Clause,S^ST,F^FT,Where^WT,S^ST,F^FT,Where^NWT):- 400 Clause =.. [member,Arg1,Arg2], 401 maplist(nondot_str,Arg2,Arg2s), 402 concat_to_atom(Arg2s,',',A2), 403 format(atom(NewClause),'~w IN (~w)',[Arg1,A2]), 404 WT = [NewClause|NWT]. 405 406% between constraint clause. 407clause_sql(_,Clause,S^ST,F^FT,Where^WT,S^ST,F^FT,Where^NWT):- 408 Clause =.. [between,Low,High,Value], 409 format(atom(NewClause),'~w BETWEEN ~w AND ~w',[Value,Low,High]), 410 WT = [NewClause|NWT]. 411 412% re_match constraint clause. 413clause_sql(_,Clause,S^ST,F^FT,Where^WT,S^ST,F^FT,Where^NWT):- 414 Clause =.. [re_match,Arg1,Arg2], 415 get_flag(regexp,RegexpTemplate), 416 format(atom(NewClause),RegexpTemplate,[Arg2,Arg1]), 417 WT = [NewClause|NWT]. 418 419% A comparative constraint clause. 420clause_sql(_,Clause,S^ST,F^FT,Where^WT,S^ST,F^FT,Where^NWT):- 421 Clause =.. [Op,Arg1,Arg2], 422 member(Op-Sop,['<'-'<','>'-'>','='-'=','=='-'=','>='-'>=','=<'-'<=','\\='-'<>']), 423 nondot_str(Arg2,A2), 424 format(atom(NewClause),'~w ~w ~k',[Arg1,Sop,A2]), 425 WT = [NewClause|NWT].
Examples
:- args_sql(person,[id,name,age],[_,Name,Age],Where). Name = 'person.name' Age = 'person.age' Where = [] :- args_sql(person,[id,name,age],[_,Name,16],Where). Name = 'person.name' Where = ['person.age = 16']
448args_sql(_,[],[],[]). 449args_sql(Table,[Field|Fs],[Arg|As],Where):- 450 var(Arg), 451 atomic_list_concat([Table,'.',Field],Arg),!, 452 args_sql(Table,Fs,As,Where). 453args_sql(Table,[F|Fs],[Arg|As],[Field|Where]):- 454 nonvar(Arg), 455 atomic_list_concat([Table,'.',F],Fld), 456 nondot_str(Arg,A), 457 (atom(A)->T='~w = ~w';T='~w = ~k'), 458 format(atom(Field),T,[Fld,A]),!, 459 args_sql(Table,Fs,As,Where).
Example
472nondot_str(Atom,Convert):-
473 ( (atom(Atom),(Atom\=null,Atom\='NULL'),atomic_list_concat(L,'.',Atom),L=[Atom])
474 -> atom_string(Atom,Convert)
475 ; Convert=Atom
476 ).Example
:- concat_to_atom([hello,"John"],' ',Atom). Atom = 'hello "John"'
492concat_to_atom(List,Atom):- concat_to_atom(List,'',Atom). 493concat_to_atom([A|As],Sep,Result):- 494 (string(A)->T='~k';T='~w'), 495 format(atom(First),T,[A]), 496 concat_to_atom(As,Sep,First,Result). 497concat_to_atom([],_,Result,Result). 498concat_to_atom([A|As],Sep,Current,Result):- 499 (string(A)->T='~w~w~k';T='~w~w~w'), 500 format(atom(Temp),T,[Current,Sep,A]), 501 concat_to_atom(As,Sep,Temp,Result).
Example
:- member(c,[a,b,c,d|T]^T). true.
515member_dif(_,Xs^T):-Xs==T,!,fail. 516member_dif(Item,[X|Xs]^T):- 517 (Item=X -> true; member_dif(Item,Xs^T)).
Options:
functor(F) - F is an atom that will be used as the predicate name for the rows read into the Prolog databasetable(T) - 'T' is the table name (as used for SQL). This option is passed on to table_def/3.prefix(P) - 'P' is a atom that will be used as a prefix for the table (as used in the SQL FROM clause). This option is passed on to table_def/3.headings(true)- true (default) means the csv has a headings (i.e. column names) row
Example (assumes existence of a file people.csv)
:- load_csv('people.csv',3,[functor(person),prefix(tiny),table(people)]).
:- table_def(person,Fields,Options).
Fields = [id,name,age]
Options = [prefix(tiny),table(people)] .
:- person(ID,Name,Age).
ID = 1
Name = john
Age = 13 ;...
@arg File The csv file to load @arg Fields List of atom field names. If Fields is an Integer, it specifies the number of csv columns and the field names may be read from first row of csv. @arg Options Options that control the fact predicates loaded into Prolog
549load_csv(File,Fs,Options):- 550 context_module(CM), 551 load_csv(CM,File,Fs,Options). 552load_csv(CM,File,Fs,Options):- 553 file_path_name_ext(File,_,FName,_), 554 (member(functor(Func),Options) -> true; Func=FName), 555 (integer(Fs) -> (N=Fs,Fields=[]); (Fields=Fs,length(Fields,N))), 556 (member(table(Table),Options) -> D1=[table(Table)]; D1=[]), 557 (member(prefix(Prefix),Options) -> DefOptions=[prefix(Prefix)|D1]; DefOptions=D1), 558 (member(nodef,Options) 559 -> true 560 ; ( CM:retractall(table_def(Func,_,_)), 561 CM:assert(table_def(Func,Fields,DefOptions)) 562 ) 563 ), 564 CM:abolish(Func/N), 565 CSVops=[functor(Func),convert(true)], 566 load_csv_rows(CM,File,CSVops),!, 567 ( member(headings(false),Options) 568 -> true 569 ; ( functor(H,Func,N), 570 H =.. [_|Headers], 571 once(CM:retract(H)), 572 ( Fields=[] 573 -> ( CM:retractall(table_def(Func,_,_)), 574 CM:assert(table_def(Func,Headers,DefOptions)) 575 ) 576 ; true 577 ) 578 ) 579 ).
585load_csv_rows(CM,Input,CSVOps):- 586 csv_read_file_row(Input,Row,CSVOps), 587 (Row=end_of_file -> true; CM:assertz(Row)), 588 fail. 589load_csv_rows(_,_,_):-!.
Func/N from Opts functor(Func). If no functor is given in Opts
the `<pred>_result convention is used, where <pred>` is the functor
of the QueryPred.
Notes: at the moment this is coded with the assumption that
BigQuery's CLI utility is installed (i.e. bq). At some point
we'll replace that with direct calls to the BQ REST API.
Options:
table(T) - 'T' is the table name (as used for SQL). This option is passed on to table_def/3.prefix(P) - 'P' is a atom that will be used as a prefix for the table (as used in the SQL FROM clause).
This option is passed on to table_def/3.headings(true)- true (default) means the csv has a headings (i.e. column names) row. This option is passed to load_csv/3
Example
:- [test_queries]. :- bq(adults(_,_),[name,age],Status,[]). Status = 0 . :- adults_result(Name,Age). Name = jane, Age = 22 ; ...
@arg QueryPred The prolog query predicate @arg Status 0=successful, 1=error (error text in the results file Func.csv) @arg Fields Integer indicating number of result fields (columns), or a list of column headers @arg Opts Options
624bq(Pred,Fields,Status,Opts):-
625 context_module(CM),
626 pro2sql(CM,Pred,SQL),
627 ( member(functor(Func),Opts) -> true; (Pred=..[F|_], atom_concat(F,'_result',Func)) ),
628 format(atom(File),'~w.csv',[Func]),
629 format(atom(Command),'bq query --format=csv "~w" > ~w',[SQL,File]),
630 shell(Command,Status),
631 ( Status==0
632 -> load_csv(CM,File,Fields,Opts)
633 ; true
634 ),!
Lightweight Prolog to SQL compiler
pro2sql
A lightweight Prolog to SQL SELECT translator.
Introduction
This module is a minimal compiler of prolog "query predicates" to sql SELECT statements. I'm inspired by Draxler's work [1][], but I couldn't access his original paper, and it was too much work trying to use Mungall's implementation without much documentation. It seemed to me that it would be less work (and more fun) to implement a minimalist translator:
set_flag(regexp,Template).The aim is that a "query predicate" (a prolog predicate intended to retrieve results from fact predicates) works the same in prolog as in SQL -- except that SQL returns a table, where prolog returns variable result alternatives.
The use case is where you want to extract from a database through SQL and then further query the results in Prolog.
Installation
In SWI-Prolog,
package_install(pro2sql).Otherwise, clone the git repository (https://github.com/RdR1024/pro2sql), and copy the files from `...pro2sql/src/prolog/`
Usage
Let's assume that you have an SQL table called
personwith fieldsid,name,age. In Prolog, assert the following table definition predicate:Let's also assume that you have also have a predicate that would list all people with age >= 21.
This query predicate is stock-standard Prolog. And if you had fact predicates for person/3 then the query predicate would simply give you all the names and ages of people over 21. However, let's say that the data is not in Prolog, but in the SQL database. You can translate that predicate into an SQL query like this:
And you can run this query on Google's BigQuery like this (assuming you have an account and that the
persontable is available in themydatasetdataset):By default, the results become available in Prolog as
adult_result(Name,Age)(same predicate name as the query, but with _result suffix). The results also exist in a downloaded `.csv` table, with the column headings given in the second argument of bq/4.For more examples, see the code comments and also
test_queries.proin the test folder of the repository. The bq/4 predicate currently uses BigQuery commandline utility (bq). I intend to write an BiqQuery API interface for Prolog at some time in the future. In the meantime, I have also left some notes on BigQuery bq in the `.../doc` folder.Status
This code is currently
alpha.All requirements for now are implemented, and it seems to be working with a small set of tests. However, I want to test it more comprehensively before calling it
beta.Notes
'mytable.person' = jones) should be translated as a string (e.g."jones"). This method is not perfect -- there could be atoms that contain full-stops (periods) that should be translated as a string. Therefore, the recommendation is to store string values as strings in Prolog.company_age(group(Company), avg(Age)). These aggregation functions will translate to SQL and work fine. But if you use the predicate directly in Prolog, the aggregation function won't do anything. Rather, to get the same results as SQL, but in Prolog, you would have to run the query predicate through an aggregate predicate (like the aggregate/3 fromlibrary(aggregate).For example, assume that you have a query predicate
tonnage(sum(Weight)):- package(_,_,Weight). In Prolog, you could get the same results as SQL, by applying aggregate/3 -- that is,aggregate(sum(Weight),tonnage(sum(Weight)),Total).Code Notes
I use a lot of difference lists. My preferred notation is
List^Tail, whereList = [x1,x2,x3,...|Tail]. In other words, the `^Tail` suffix simply provides another copy of the tail variable. For example I useSelect^STas the difference list for the SELECT clauses. Typically, I usexTfor the initial tail variabe (e.g. ST,FT,WT for the SELECT,FROM and WHERE tails respectively) and NxT for the "new" (after processing) tails.The initial (empty) value when calling a predicate with difference lists is something like
S^S. With difference lists, adding a new value happens like this:ST = [NewValue|NST]. Here, the new value is made into a list with the new tail and then that list is unified with the old tail. Alternative, if we have a list of new values,append(NewValues,NST,ST)would do the same.References
[1] "Draxler C (1992) Prolog to SQL Compiler, version 1.0. Technical report, CIS Centre for Information and Speech, University of Munich"
License
BSD 3-Clause License
Copyright (c) 2023, Richard de Rozario
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**/