34
35:- module(r_data,
36 [ r_data_frame/3, 37 r_data_frame_from_rows/2, 38 r_data_frame_from_dicts/2, 39
40 r_data_frame_to_dicts/2, 41 r_data_frame_to_rows/3, 42
43 r_data_frame_colnames/2, 44 r_data_frame_rownames/2 45 ]). 46:- use_module(r_call). 47:- use_module(library(apply)). 48:- use_module(library(error)). 49:- use_module(library(pairs)). 50:- use_module(library(lists)). 51
52:- meta_predicate
53 r_data_frame(+, +, 0). 54
55/** <module> R data frame handling
56
57This library provides predicates for creating and fetching R data
58frames. R data frames are typically 2-dimensional arrays where the data
59is organised in _columns_. In Prolog, data is typically organised in
60_rows_ (or _records_).
61*/
62
63%% r_data_frame(+Rvar, +Columns, :Goal) is det.
64%
65% Create an R data.frame from the solutions of Goal. The resulting
66% data frame is bound to the R variable Rvar. For example:
67%
68% ```
69% ?- r_data_frame(movieyear,
70% [movie=Name, year=Year],
71% movie(Name, Year)).
72% ```
73%
74% @arg Rvar is the name of the R output variable
75% @arg Columns is a list Name=Var
76
77r_data_frame(RVar, ColSpec, Goal) :-
78 must_be(atom, RVar),
79 maplist(arg(1), ColSpec, Names),
80 maplist(arg(2), ColSpec, Vars),
81 Templ =.. [v|Vars],
82 findall(Templ, Goal, Rows),
83 r_data_frame_from_rows(RVar, Rows),
84 colnames(RVar) <- Names.
85
86%% r_data_frame_to_dicts(+DataFrame, -Dicts) is det.
87%
88% Translate a DataFrame into a list of dicts, where each dict
89% represents a _row_. The keys of the dicts are fetched from
90% `colnames(DataFrame)`. For example:
91%
92% ```
93% ?- r_data_frame_to_dicts(mtcars, Dicts).
94% Dicts = [ row{am:1, carb:4, cyl:6, disp:160.0, drat:3.9,
95% gear:4, hp:110, mpg:21.0, qsec:16.46, vs:0,
96% wt:2.62},
97% ...
98% ]
99% ```
100
101r_data_frame_to_dicts(DataFrame, Dicts) :-
102 Cols <- DataFrame,
103 ColNameStrings <- colnames(DataFrame),
104 maplist(atom_string, ColNames, ColNameStrings),
105 pairs_keys_values(Pairs, ColNames, _),
106 dict_pairs(Templ, _, Pairs),
107 maplist(dict_cols(Templ, Dicts), ColNames, Cols).
108
109dict_cols(Templ, Dicts, Name, Col) :-
110 maplist(fill_col(Templ, Name), Col, Dicts).
111
112fill_col(_, Name, Value, Dict) :-
113 nonvar(Dict), !,
114 get_dict(Name, Dict, Value).
115fill_col(Templ, Name, Value, Dict) :-
116 copy_term(Templ, Dict),
117 get_dict(Name, Dict, Value).
118
119%% r_data_frame_to_rows(+DataFrame, +Functor, -Rows) is det.
120%
121% Translate a 2-dimensional R dataframe into a list of compound
122% terms, each representing a row. The functor of each row is
123% Functor. For example:
124%
125% ```
126% ?- r_data_frame_to_rows(mtcars, car, Rows).
127% Rows = [ car(21.0, 6, 160.0, 110, 3.9, 2.62, 16.46, 0, 1, 4, 4),
128% ...
129% ].
130% ```
131
132r_data_frame_to_rows(DataFrame, Functor, Rows) :-
133 Cols <- DataFrame,
134 length(Cols, Arity),
135 term_cols(Cols, 1, Arity, Functor, Rows).
136
137term_cols([], _, _, _, _).
138term_cols([Col|Cols], I, Arity, Functor, Rows) :-
139 maplist(term_col(I, Arity, Functor), Col, Rows),
140 I2 is I+1,
141 term_cols(Cols, I2, Arity, Functor, Rows).
142
143term_col(1, Arity, Functor, Value, Term) :- !,
144 functor(Term, Functor, Arity),
145 arg(1, Term, Value).
146term_col(I, _, _, Value, Term) :-
147 arg(I, Term, Value).
148
149%% r_data_frame_from_dicts(+DataFrame, +Rows) is det.
150%
151% Assign the R variable DataFrame the content of Rows. Rows is a
152% list of dicts that must all have the same set of keys. The keys
153% are used as column names.
154%
155% @see dicts_to_same_keys/3 to align the set of keys for each dict
156
157r_data_frame_from_dicts(DataFrame, Rows) :-
158 must_be(atom, DataFrame),
159 must_be(list, Rows),
160 Rows = [Row1|_],
161 dict_keys(Row1, Keys),
162 dict_col_data(Keys, Rows, ColData),
163 compound_name_arguments(Term, 'data.frame', ColData),
164 DataFrame <- Term,
165 colnames(DataFrame) <- Keys.
166
167dict_col_data([], _, []).
168dict_col_data([K|Keys], Rows, [ColI|ColR]) :-
169 maplist(get_dict(K), Rows, ColI),
170 dict_col_data(Keys, Rows, ColR).
171
172%% r_data_frame_from_rows(+DataFrame, +Rows) is det.
173%
174% Assign the R variable DataFrame the content of Rows. Rows is a
175% list of compound terms.
176
177r_data_frame_from_rows(DataFrame, Rows) :-
178 must_be(atom, DataFrame),
179 must_be(list, Rows),
180 Rows = [Row1|_],
181 functor(Row1, _, NCols),
182 col_data(1, NCols, Rows, ColData),
183 append(ColData, [stringsAsFactors = 'FALSE'], ColDataOpts),
184 compound_name_arguments(Term, 'data.frame', ColDataOpts),
185 DataFrame <- Term.
186
187col_data(I, NCols, Rows, [ColI|ColR]) :-
188 I =< NCols, !,
189 maplist(arg(I), Rows, ColI),
190 I2 is I + 1,
191 col_data(I2, NCols, Rows, ColR).
192col_data(_, _, _, []).
193
194%% r_data_frame_colnames(+DataFrame, -ColNames:list(atom)) is det.
195%
196% ColNames are the column names for DataFrame as a list of atoms.
197
198r_data_frame_colnames(DataFrame, ColNames) :-
199 ColNameStrings <- colnames(DataFrame),
200 maplist(atom_string, ColNames, ColNameStrings).
201
202%% r_data_frame_rownames(+DataFrame, -RowNames:list(atom)) is det.
203%
204% RowNames are the row names for DataFrame as a list of atoms.
205
206r_data_frame_rownames(DataFrame, RowNames) :-
207 RowNameStrings