35
36:- module(rdf_http_plugin, []). 37:- use_module(library(semweb/rdf_db), []). 38
39:- autoload(library(date),[parse_time/2]). 40:- autoload(library(error),[domain_error/2]). 41:- autoload(library(lists),[append/2]). 42:- autoload(library(option),[option/3]). 43:- autoload(library(http/http_header),[http_timestamp/2]). 44:- autoload(library(http/http_open),[http_open/3]). 45
60
61:- multifile
62 rdf_db:rdf_open_hook/8,
63 rdf_db:url_protocol/1,
64 rdf_db:rdf_storage_encoding/2,
65 rdf_db:rdf_file_type/2,
66 rdf_content_type/3. 67
68rdf_db:url_protocol(http).
69rdf_db:url_protocol(https).
70
71
73:- multifile error:has_type/2. 74error:has_type(rdf_format, Term):-
75 error:has_type(oneof([nquads,ntriples,rdfa,trig,turtle,xml]), Term).
76
86
([ cert_verify_hook(ssl_verify),
88 request_header('Accept'=AcceptValue)
89 ], Options) :-
90 option(format(Format), Options, _VAR),
91 rdf_accept_header_value(Format, AcceptValue).
92
93
104
105rdf_db:rdf_open_hook(https, SourceURL, HaveModified, Stream, Cleanup,
106 Modified, Format, Options) :-
107 rdf_db:rdf_open_hook(http, SourceURL, HaveModified, Stream, Cleanup,
108 Modified, Format, Options).
109rdf_db:rdf_open_hook(http, SourceURL, HaveModified, Stream, Cleanup,
110 Modified, Format, Options) :-
111 modified_since_header(HaveModified, Header),
112 TypeHdr = [ header(content_type, ContentType),
113 header(last_modified, ModifiedText)
114 ],
115 rdf_extra_headers(Extra, Options),
116 append([Extra, TypeHdr, Header, Options], OpenOptions),
117 catch(http_open(SourceURL, Stream0,
118 [ status_code(Code)
119 | OpenOptions
120 ]), E, true),
121 ( Code == 200
122 -> ( open_envelope(ContentType, SourceURL,
123 Stream0, Stream, Format)
124 -> Cleanup = close(Stream),
125 ( nonvar(ModifiedText),
126 parse_time(ModifiedText, ModifiedStamp)
127 -> Modified = last_modified(ModifiedStamp)
128 ; Modified = unknown
129 )
130 ; close(Stream0),
131 domain_error(content_type, ContentType)
132 )
133 ; Code == 304
134 -> Modified = not_modified,
135 Cleanup = true
136 ; var(E)
137 -> throw(error(existence_error(url, SourceURL),
138 context(_, status(Code,_))))
139 ; throw(E)
140 ).
141
142:- public ssl_verify/5. 143
147
148ssl_verify(_SSL,
149 _ProblemCertificate, _AllCertificates, _FirstCertificate,
150 _Error).
151
156
(HaveModified, []) :-
158 var(HaveModified),
159 !.
160modified_since_header(HaveModified,
161 [ request_header('If-modified-since' =
162 Modified)
163 ]) :-
164 http_timestamp(HaveModified, Modified).
165
170
171open_envelope(ContentType, SourceURL, Stream0, Stream, Format) :-
172 ( ContentType == 'application/x-gzip'
173 ; ContentType == 'application/octet-stream',
174 file_name_extension(_, gz, SourceURL)
175 ),
176 !,
177 rdf_db:rdf_storage_encoding(_, gzip),
178 !,
179 ( var(Format)
180 -> file_name_extension(BaseURL, _GzExt, SourceURL),
181 file_name_extension(_, Ext, BaseURL),
182 rdf_db:rdf_file_type(Ext, Format)
183 ; true
184 ),
185 stream_pair(Stream0, Read, _),
186 rdf_zlib_plugin:zopen(Read, Stream, []).
187open_envelope(_, _, Stream, Stream, Format) :-
188 nonvar(Format),
189 !.
190open_envelope(ContentType, SourceURL, Stream, Stream, Format) :-
191 major_content_type(ContentType, Major),
192 ( rdf_content_type(Major, _, Format)
193 -> true
194 ; Major == 'text/plain' 195 -> file_name_extension(_, Ext, SourceURL),
196 rdf_db:rdf_file_type(Ext, Format)
197 ).
198
199major_content_type(ContentType, Major) :-
200 sub_atom(ContentType, Pre, _, _, (;)),
201 !,
202 sub_atom(ContentType, 0, Pre, _, Major).
203major_content_type(Major, Major).
204
205
207
(Format, AcceptValue) :-
209 findall(AcceptValue, accept_value(Format, AcceptValue), AcceptValues),
210 atomic_list_concat(['*/*;q=0.001'|AcceptValues], ',', AcceptValue).
211
212accept_value(Format, AcceptValue) :-
213 rdf_content_type(MediaType, QValue0, Format0),
214 ( Format == Format0
215 -> QValue = 1.0
216 ; QValue = QValue0
217 ),
218 format(atom(AcceptValue), '~a;q=~3f', [MediaType,QValue]).
219
220
254
255rdf_content_type('application/n-quads', 0.99, nquads ). 256rdf_content_type('application/n-triples', 0.99, ntriples). 257rdf_content_type('application/rdf', 0.76, xml ). 258rdf_content_type('application/rdf+turtle', 0.76, turtle ). 259rdf_content_type('application/rdf+xml', 0.76, xml ). 260rdf_content_type('application/rss+xml', 0.66, xml ). 261rdf_content_type('application/trig', 0.99, trig ). 262rdf_content_type('application/turtle', 0.76, turtle ). 263rdf_content_type('application/x-trig', 0.76, trig ). 264rdf_content_type('application/x-turtle', 0.76, turtle ). 265rdf_content_type('application/xhtml+xml', 0.66, rdfa ). 266rdf_content_type('application/xml', 0.66, xml ). 267rdf_content_type('text/html', 0.66, rdfa ). 268rdf_content_type('text/n3', 0.56, turtle ). 269rdf_content_type('text/rdf', 0.76, xml ). 270rdf_content_type('text/rdf+n3', 0.33, turtle ). 271rdf_content_type('text/rdf+xml', 0.76, xml ). 272rdf_content_type('text/turtle', 0.99, turtle ). 273rdf_content_type('text/xml', 0.66, xml ). 274rdf_content_type('application/x-gzip', 0.23, gzip ).