```    1/*
2Hidden Markov model for part-of-speech tagging.
3The states represent parts-of-speech, and the symbols emitted by the states are words. The assumption is that a word depends probabilistically on just its own part-of-speech (i.e. its tag) which in turn depends on the part-of-speech of the preceding word (or on the start state in case there is no preceding word).
4From
5http://www.ling.gu.se/~lager/Spaghetti/spaghetti.html
6Original program by Torbjorn Lager, adapted to MCINTYRE by Fabrizio Riguzzi
7*/
8:- use_module(library(mcintyre)).    9
10:- if(current_predicate(use_rendering/1)).   11:- use_rendering(c3).   12:- use_rendering(graphviz).   13:- endif.   14
15:- mc.   16
19% hmm(O): O is the output sequence
20% hmm(S,O): O is the output sequence and S is the sequence of states
21% hmm(Q,S0,S,O):  from state Q and previous state S0, generates output O and
22% sequence of states S
23
24hmm(O):-hmm(_,O).
25% O is an output sequence if there is a state seuqnece S such that hmm(S,O)
26% holds
27
28hmm(S,O):-trans(start,Q0,[]),hmm(Q0,[],S0,O),reverse(S0,S).
29% O is an output sequence and S a state sequence if the chain stats at state
30% start and ends generating state sequence S and output sequence O
31
32hmm(Q,S0,S,[L|O]):-
33	trans(Q,Q1,S0),
34	out(L,Q,S0),
35	hmm(Q1,[Q|S0],S,O).
36% an HMM in state Q goes in state Q1, emits the word L
37% and continues the chain
38
39hmm(_,S,S,[]).
40% an HMM in sny state terminates the sequence without emitting any symbol
41
42
43trans(start,det,_):0.30;
44trans(start,aux,_):0.20;
45trans(start,v,_):0.10;
46trans(start,n,_):0.10;
47trans(start,pron,_):0.30.
48
49trans(det,det,_):0.20;
50trans(det,aux,_):0.01;
51trans(det,v,_):0.01;
52trans(det,n,_):0.77;
53trans(det,pron,_):0.01.
54
55trans(aux,det,_):0.18;
56trans(aux,aux,_):0.10;
57trans(aux,v,_):0.50;
58trans(aux,n,_):0.01;
59trans(aux,pron,_):0.21.
60
61trans(v,det,_):0.36;
62trans(v,aux,_):0.01;
63trans(v,v,_):0.01;
64trans(v,n,_):0.26;
65trans(v,pron,_):0.36.
66
67trans(n,det,_):0.01;
68trans(n,aux,_):0.25;
69trans(n,v,_):0.39;
70trans(n,n,_):0.34;
71trans(n,pron,_):0.01.
72
73trans(pron,det,_):0.01;
74trans(pron,aux,_):0.45;
75trans(pron,v,_):0.52;
76trans(pron,n,_):0.01;
77trans(pron,pron,_):0.01.
78
79/*
80out(a,det,_):0.300.
81out(can,aux,_):0.010.
82out(can,v,_):0.005.
83out(can,n,_):0.001.
84out(he,pron,_):0.070.
85*/
86out(a,det,_).
87out(can,aux,_).
88out(can,v,_).
89out(can,n,_).
90out(he,pron,_).
91
92
95state_diagram(digraph(G)):-
96    setof(A,(B,S,Body)^
97          clause(trans(A,B,S),Body),Nodes),
98    maplist(nodelab,Nodes,NodesLab),
99    findall(edge(A -> B,[label=P]),
100      (clause(trans(A,B,_),
?- `mc_sample_arg(hmm(S,[he,can,can,a,can]),20,S,O)`. % sample the state sequence corresonding to the phrase "he can can a can" % the most frequent state sequence is an approximate POS tagging for the % sentence. It corresponds to the Viterbi path of the HMM. % expected result: the most frequent tagging should be [pron, aux, v, det, n] ?- `mc_sample_arg(hmm(S,[he,can,can,a,can]),20,S,O)`,`argbar(O,C)`.
?- `state_diagram(G)`. % show the state diagram */