22 #include <qdragobject.h>
29 void HtmlDataFetcher::initMap()
31 entityMap.insert(QString(
"AElig"),QChar(0x00c6));
32 entityMap.insert(QString(
"Aacute"), QChar(0x00c1));
33 entityMap.insert(QString(
"Acirc"), QChar(0x00c2));
34 entityMap.insert(QString(
"Agrave"), QChar(0x00c0));
35 entityMap.insert(QString(
"Alpha"), QChar(0x0391));
36 entityMap.insert(QString(
"AMP"), QChar(38));
37 entityMap.insert(QString(
"Aring"), QChar(0x00c5));
38 entityMap.insert(QString(
"Atilde"), QChar(0x00c3));
39 entityMap.insert(QString(
"Auml"), QChar(0x00c4));
40 entityMap.insert(QString(
"Beta"), QChar(0x0392));
41 entityMap.insert(QString(
"Ccedil"), QChar(0x00c7));
42 entityMap.insert(QString(
"Chi"), QChar(0x03a7));
43 entityMap.insert(QString(
"Dagger"), QChar(0x2021));
44 entityMap.insert(QString(
"Delta"), QChar(0x0394));
45 entityMap.insert(QString(
"ETH"), QChar(0x00d0));
46 entityMap.insert(QString(
"Eacute"), QChar(0x00c9));
47 entityMap.insert(QString(
"Ecirc"), QChar(0x00ca));
48 entityMap.insert(QString(
"Egrave"), QChar(0x00c8));
49 entityMap.insert(QString(
"Epsilon"), QChar(0x0395));
50 entityMap.insert(QString(
"Eta"), QChar(0x0397));
51 entityMap.insert(QString(
"Euml"), QChar(0x00cb));
52 entityMap.insert(QString(
"Gamma"), QChar(0x0393));
53 entityMap.insert(QString(
"GT"), QChar(62));
54 entityMap.insert(QString(
"Iacute"), QChar(0x00cd));
55 entityMap.insert(QString(
"Icirc"), QChar(0x00ce));
56 entityMap.insert(QString(
"Igrave"), QChar(0x00cc));
57 entityMap.insert(QString(
"Iota"), QChar(0x0399));
58 entityMap.insert(QString(
"Iuml"), QChar(0x00cf));
59 entityMap.insert(QString(
"Kappa"), QChar(0x039a));
60 entityMap.insert(QString(
"Lambda"), QChar(0x039b));
61 entityMap.insert(QString(
"LT"), QChar(60));
62 entityMap.insert(QString(
"Mu"), QChar(0x039c));
63 entityMap.insert(QString(
"Ntilde"), QChar(0x00d1));
64 entityMap.insert(QString(
"Nu"), QChar(0x039d));
65 entityMap.insert(QString(
"OElig"), QChar(0x0152));
66 entityMap.insert(QString(
"Oacute"), QChar(0x00d3));
67 entityMap.insert(QString(
"Ocirc"), QChar(0x00d4));
68 entityMap.insert(QString(
"Ograve"), QChar(0x00d2));
69 entityMap.insert(QString(
"Omega"), QChar(0x03a9));
70 entityMap.insert(QString(
"Omicron"), QChar(0x039f));
71 entityMap.insert(QString(
"Oslash"), QChar(0x00d8));
72 entityMap.insert(QString(
"Otilde"), QChar(0x00d5));
73 entityMap.insert(QString(
"Ouml"), QChar(0x00d6));
74 entityMap.insert(QString(
"Phi"), QChar(0x03a6));
75 entityMap.insert(QString(
"Pi"), QChar(0x03a0));
76 entityMap.insert(QString(
"Prime"), QChar(0x2033));
77 entityMap.insert(QString(
"Psi"), QChar(0x03a8));
78 entityMap.insert(QString(
"QUOT"), QChar(34));
79 entityMap.insert(QString(
"Rho"), QChar(0x03a1));
80 entityMap.insert(QString(
"Scaron"), QChar(0x0160));
81 entityMap.insert(QString(
"Sigma"), QChar(0x03a3));
82 entityMap.insert(QString(
"THORN"), QChar(0x00de));
83 entityMap.insert(QString(
"Tau"), QChar(0x03a4));
84 entityMap.insert(QString(
"Theta"), QChar(0x0398));
85 entityMap.insert(QString(
"Uacute"), QChar(0x00da));
86 entityMap.insert(QString(
"Ucirc"), QChar(0x00db));
87 entityMap.insert(QString(
"Ugrave"), QChar(0x00d9));
88 entityMap.insert(QString(
"Upsilon"), QChar(0x03a5));
89 entityMap.insert(QString(
"Uuml"), QChar(0x00dc));
90 entityMap.insert(QString(
"Xi"), QChar(0x039e));
91 entityMap.insert(QString(
"Yacute"), QChar(0x00dd));
92 entityMap.insert(QString(
"Yuml"), QChar(0x0178));
93 entityMap.insert(QString(
"Zeta"), QChar(0x0396));
94 entityMap.insert(QString(
"aacute"), QChar(0x00e1));
95 entityMap.insert(QString(
"acirc"), QChar(0x00e2));
96 entityMap.insert(QString(
"acute"), QChar(0x00b4));
97 entityMap.insert(QString(
"aelig"), QChar(0x00e6));
98 entityMap.insert(QString(
"agrave"), QChar(0x00e0));
99 entityMap.insert(QString(
"alefsym"), QChar(0x2135));
100 entityMap.insert(QString(
"alpha"), QChar(0x03b1));
101 entityMap.insert(QString(
"amp"), QChar(38));
102 entityMap.insert(QString(
"and"), QChar(0x22a5));
103 entityMap.insert(QString(
"ang"), QChar(0x2220));
104 entityMap.insert(QString(
"apos"), QChar(0x0027));
105 entityMap.insert(QString(
"aring"), QChar(0x00e5));
106 entityMap.insert(QString(
"asymp"), QChar(0x2248));
107 entityMap.insert(QString(
"atilde"), QChar(0x00e3));
108 entityMap.insert(QString(
"auml"), QChar(0x00e4));
109 entityMap.insert(QString(
"bdquo"), QChar(0x201e));
110 entityMap.insert(QString(
"beta"), QChar(0x03b2));
111 entityMap.insert(QString(
"brvbar"), QChar(0x00a6));
112 entityMap.insert(QString(
"bull"), QChar(0x2022));
113 entityMap.insert(QString(
"cap"), QChar(0x2229));
114 entityMap.insert(QString(
"ccedil"), QChar(0x00e7));
115 entityMap.insert(QString(
"cedil"), QChar(0x00b8));
116 entityMap.insert(QString(
"cent"), QChar(0x00a2));
117 entityMap.insert(QString(
"chi"), QChar(0x03c7));
118 entityMap.insert(QString(
"circ"), QChar(0x02c6));
119 entityMap.insert(QString(
"clubs"), QChar(0x2663));
120 entityMap.insert(QString(
"cong"), QChar(0x2245));
121 entityMap.insert(QString(
"copy"), QChar(0x00a9));
122 entityMap.insert(QString(
"crarr"), QChar(0x21b5));
123 entityMap.insert(QString(
"cup"), QChar(0x222a));
124 entityMap.insert(QString(
"curren"), QChar(0x00a4));
125 entityMap.insert(QString(
"dArr"), QChar(0x21d3));
126 entityMap.insert(QString(
"dagger"), QChar(0x2020));
127 entityMap.insert(QString(
"darr"), QChar(0x2193));
128 entityMap.insert(QString(
"deg"), QChar(0x00b0));
129 entityMap.insert(QString(
"delta"), QChar(0x03b4));
130 entityMap.insert(QString(
"diams"), QChar(0x2666));
131 entityMap.insert(QString(
"divide"), QChar(0x00f7));
132 entityMap.insert(QString(
"eacute"), QChar(0x00e9));
133 entityMap.insert(QString(
"ecirc"), QChar(0x00ea));
134 entityMap.insert(QString(
"egrave"), QChar(0x00e8));
135 entityMap.insert(QString(
"empty"), QChar(0x2205));
136 entityMap.insert(QString(
"emsp"), QChar(0x2003));
137 entityMap.insert(QString(
"ensp"), QChar(0x2002));
138 entityMap.insert(QString(
"epsilon"), QChar(0x03b5));
139 entityMap.insert(QString(
"equiv"), QChar(0x2261));
140 entityMap.insert(QString(
"eta"), QChar(0x03b7));
141 entityMap.insert(QString(
"eth"), QChar(0x00f0));
142 entityMap.insert(QString(
"euml"), QChar(0x00eb));
143 entityMap.insert(QString(
"euro"), QChar(0x20ac));
144 entityMap.insert(QString(
"exist"), QChar(0x2203));
145 entityMap.insert(QString(
"fnof"), QChar(0x0192));
146 entityMap.insert(QString(
"forall"), QChar(0x2200));
147 entityMap.insert(QString(
"frac12"), QChar(0x00bd));
148 entityMap.insert(QString(
"frac14"), QChar(0x00bc));
149 entityMap.insert(QString(
"frac34"), QChar(0x00be));
150 entityMap.insert(QString(
"frasl"), QChar(0x2044));
151 entityMap.insert(QString(
"gamma"), QChar(0x03b3));
152 entityMap.insert(QString(
"ge"), QChar(0x2265));
153 entityMap.insert(QString(
"gt"), QChar(62));
154 entityMap.insert(QString(
"hArr"), QChar(0x21d4));
155 entityMap.insert(QString(
"harr"), QChar(0x2194));
156 entityMap.insert(QString(
"hearts"), QChar(0x2665));
157 entityMap.insert(QString(
"hellip"), QChar(0x2026));
158 entityMap.insert(QString(
"iacute"), QChar(0x00ed));
159 entityMap.insert(QString(
"icirc"), QChar(0x00ee));
160 entityMap.insert(QString(
"iexcl"), QChar(0x00a1));
161 entityMap.insert(QString(
"igrave"), QChar(0x00ec));
162 entityMap.insert(QString(
"image"), QChar(0x2111));
163 entityMap.insert(QString(
"infin"), QChar(0x221e));
164 entityMap.insert(QString(
"int"), QChar(0x222b));
165 entityMap.insert(QString(
"iota"), QChar(0x03b9));
166 entityMap.insert(QString(
"iquest"), QChar(0x00bf));
167 entityMap.insert(QString(
"isin"), QChar(0x2208));
168 entityMap.insert(QString(
"iuml"), QChar(0x00ef));
169 entityMap.insert(QString(
"kappa"), QChar(0x03ba));
170 entityMap.insert(QString(
"lArr"), QChar(0x21d0));
171 entityMap.insert(QString(
"lambda"), QChar(0x03bb));
172 entityMap.insert(QString(
"lang"), QChar(0x2329));
173 entityMap.insert(QString(
"laquo"), QChar(0x00ab));
174 entityMap.insert(QString(
"larr"), QChar(0x2190));
175 entityMap.insert(QString(
"lceil"), QChar(0x2308));
176 entityMap.insert(QString(
"ldquo"), QChar(0x201c));
177 entityMap.insert(QString(
"le"), QChar(0x2264));
178 entityMap.insert(QString(
"lfloor"), QChar(0x230a));
179 entityMap.insert(QString(
"lowast"), QChar(0x2217));
180 entityMap.insert(QString(
"loz"), QChar(0x25ca));
181 entityMap.insert(QString(
"lrm"), QChar(0x200e));
182 entityMap.insert(QString(
"lsaquo"), QChar(0x2039));
183 entityMap.insert(QString(
"lsquo"), QChar(0x2018));
184 entityMap.insert(QString(
"lt"), QChar(60));
185 entityMap.insert(QString(
"macr"), QChar(0x00af));
186 entityMap.insert(QString(
"mdash"), QChar(0x2014));
187 entityMap.insert(QString(
"micro"), QChar(0x00b5));
188 entityMap.insert(QString(
"middot"), QChar(0x00b7));
189 entityMap.insert(QString(
"minus"), QChar(0x2212));
190 entityMap.insert(QString(
"mu"), QChar(0x03bc));
191 entityMap.insert(QString(
"nabla"), QChar(0x2207));
192 entityMap.insert(QString(
"nbsp"), QChar(0x00a0));
193 entityMap.insert(QString(
"ndash"), QChar(0x2013));
194 entityMap.insert(QString(
"ne"), QChar(0x2260));
195 entityMap.insert(QString(
"ni"), QChar(0x220b));
196 entityMap.insert(QString(
"not"), QChar(0x00ac));
197 entityMap.insert(QString(
"notin"), QChar(0x2209));
198 entityMap.insert(QString(
"nsub"), QChar(0x2284));
199 entityMap.insert(QString(
"ntilde"), QChar(0x00f1));
200 entityMap.insert(QString(
"nu"), QChar(0x03bd));
201 entityMap.insert(QString(
"oacute"), QChar(0x00f3));
202 entityMap.insert(QString(
"ocirc"), QChar(0x00f4));
203 entityMap.insert(QString(
"oelig"), QChar(0x0153));
204 entityMap.insert(QString(
"ograve"), QChar(0x00f2));
205 entityMap.insert(QString(
"oline"), QChar(0x203e));
206 entityMap.insert(QString(
"omega"), QChar(0x03c9));
207 entityMap.insert(QString(
"omicron"), QChar(0x03bf));
208 entityMap.insert(QString(
"oplus"), QChar(0x2295));
209 entityMap.insert(QString(
"or"), QChar(0x22a6));
210 entityMap.insert(QString(
"ordf"), QChar(0x00aa));
211 entityMap.insert(QString(
"ordm"), QChar(0x00ba));
212 entityMap.insert(QString(
"oslash"), QChar(0x00f8));
213 entityMap.insert(QString(
"otilde"), QChar(0x00f5));
214 entityMap.insert(QString(
"otimes"), QChar(0x2297));
215 entityMap.insert(QString(
"ouml"), QChar(0x00f6));
216 entityMap.insert(QString(
"para"), QChar(0x00b6));
217 entityMap.insert(QString(
"part"), QChar(0x2202));
218 entityMap.insert(QString(
"percnt"), QChar(0x0025));
219 entityMap.insert(QString(
"permil"), QChar(0x2030));
220 entityMap.insert(QString(
"perp"), QChar(0x22a5));
221 entityMap.insert(QString(
"phi"), QChar(0x03c6));
222 entityMap.insert(QString(
"pi"), QChar(0x03c0));
223 entityMap.insert(QString(
"piv"), QChar(0x03d6));
224 entityMap.insert(QString(
"plusmn"), QChar(0x00b1));
225 entityMap.insert(QString(
"pound"), QChar(0x00a3));
226 entityMap.insert(QString(
"prime"), QChar(0x2032));
227 entityMap.insert(QString(
"prod"), QChar(0x220f));
228 entityMap.insert(QString(
"prop"), QChar(0x221d));
229 entityMap.insert(QString(
"psi"), QChar(0x03c8));
230 entityMap.insert(QString(
"quot"), QChar(34));
231 entityMap.insert(QString(
"rArr"), QChar(0x21d2));
232 entityMap.insert(QString(
"radic"), QChar(0x221a));
233 entityMap.insert(QString(
"rang"), QChar(0x232a));
234 entityMap.insert(QString(
"raquo"), QChar(0x00bb));
235 entityMap.insert(QString(
"rarr"), QChar(0x2192));
236 entityMap.insert(QString(
"rceil"), QChar(0x2309));
237 entityMap.insert(QString(
"rdquo"), QChar(0x201d));
238 entityMap.insert(QString(
"real"), QChar(0x211c));
239 entityMap.insert(QString(
"reg"), QChar(0x00ae));
240 entityMap.insert(QString(
"rfloor"), QChar(0x230b));
241 entityMap.insert(QString(
"rho"), QChar(0x03c1));
242 entityMap.insert(QString(
"rlm"), QChar(0x200f));
243 entityMap.insert(QString(
"rsaquo"), QChar(0x203a));
244 entityMap.insert(QString(
"rsquo"), QChar(0x2019));
245 entityMap.insert(QString(
"sbquo"), QChar(0x201a));
246 entityMap.insert(QString(
"scaron"), QChar(0x0161));
247 entityMap.insert(QString(
"sdot"), QChar(0x22c5));
248 entityMap.insert(QString(
"sect"), QChar(0x00a7));
249 entityMap.insert(QString(
"shy"), QChar(0x00ad));
250 entityMap.insert(QString(
"sigma"), QChar(0x03c3));
251 entityMap.insert(QString(
"sigmaf"), QChar(0x03c2));
252 entityMap.insert(QString(
"sim"), QChar(0x223c));
253 entityMap.insert(QString(
"spades"), QChar(0x2660));
254 entityMap.insert(QString(
"sub"), QChar(0x2282));
255 entityMap.insert(QString(
"sube"), QChar(0x2286));
256 entityMap.insert(QString(
"sum"), QChar(0x2211));
257 entityMap.insert(QString(
"sup1"), QChar(0x00b9));
258 entityMap.insert(QString(
"sup2"), QChar(0x00b2));
259 entityMap.insert(QString(
"sup3"), QChar(0x00b3));
260 entityMap.insert(QString(
"sup"), QChar(0x2283));
261 entityMap.insert(QString(
"supe"), QChar(0x2287));
262 entityMap.insert(QString(
"szlig"), QChar(0x00df));
263 entityMap.insert(QString(
"tau"), QChar(0x03c4));
264 entityMap.insert(QString(
"there4"), QChar(0x2234));
265 entityMap.insert(QString(
"theta"), QChar(0x03b8));
266 entityMap.insert(QString(
"thetasym"), QChar(0x03d1));
267 entityMap.insert(QString(
"thinsp"), QChar(0x2009));
268 entityMap.insert(QString(
"thorn"), QChar(0x00fe));
269 entityMap.insert(QString(
"tilde"), QChar(0x02dc));
270 entityMap.insert(QString(
"times"), QChar(0x00d7));
271 entityMap.insert(QString(
"trade"), QChar(0x2122));
272 entityMap.insert(QString(
"uArr"), QChar(0x21d1));
273 entityMap.insert(QString(
"uacute"), QChar(0x00fa));
274 entityMap.insert(QString(
"uarr"), QChar(0x2191));
275 entityMap.insert(QString(
"ucirc"), QChar(0x00fb));
276 entityMap.insert(QString(
"ugrave"), QChar(0x00f9));
277 entityMap.insert(QString(
"uml"), QChar(0x00a8));
278 entityMap.insert(QString(
"upsih"), QChar(0x03d2));
279 entityMap.insert(QString(
"upsilon"), QChar(0x03c5));
280 entityMap.insert(QString(
"uuml"), QChar(0x00fc));
281 entityMap.insert(QString(
"weierp"), QChar(0x2118));
282 entityMap.insert(QString(
"xi"), QChar(0x03be));
283 entityMap.insert(QString(
"yacute"), QChar(0x00fd));
284 entityMap.insert(QString(
"yen"), QChar(0x00a5));
285 entityMap.insert(QString(
"yuml"), QChar(0x00ff));
286 entityMap.insert(QString(
"zeta"), QChar(0x03b6));
287 entityMap.insert(QString(
"zwj"), QChar(0x200d));
288 entityMap.insert(QString(
"zwnj"), QChar(0x200c));
292 QChar HtmlDataFetcher::unicodeFor(QString placeHolder)
294 placeHolder.remove(
' ');
295 placeHolder.remove(
'\t');
296 Q_ASSERT(placeHolder[0] ==
'&');
297 Q_ASSERT(placeHolder[(
int)placeHolder.length() - 1] ==
';');
298 placeHolder.remove(placeHolder.length() - 1,1);
299 placeHolder.remove(0,1);
300 if(placeHolder[0] ==
'#')
302 placeHolder.remove(0,1);
303 return QChar(placeHolder.toInt());
305 QMap<QString, QChar>::Iterator it = entityMap.find(placeHolder);
306 if ( it != entityMap.end() )
308 qWarning(
"HtmlDataFetcher::unicodeFor() : Unknown html entity or html special character");
316 QFile file(indexFile);
317 if(!file.open(IO_ReadOnly))
319 qWarning(
"HtmlDataFetcher::fetchChapterTexts() : Can't open file %s",indexFile.latin1());
323 const QMimeSource *source = QMimeSourceFactory::defaultFactory()->data(indexFile);
326 bool status = QTextDrag::decode(source,sourceText);
329 QTextStream str(&sourceText,IO_ReadOnly);
332 while ( !str.atEnd() )
334 QString line = str.readLine();
338 int index = line.find(
"href=\"");
339 if(index == -1 || line.contains(
"http") || line.contains(
"mailto"))
342 index = line.find(
'>',index);
345 qWarning(
"HtmlDataFetcher::fetchChapterTexts() : Parse error");
349 int end = line.find(
"</a>",index);
352 txt = line.mid(index,end-index);
353 formatAndReplace(txt);
358 txt += line.mid(index);
365 int end = line.find(
"</a>");
370 txt += line.left(end);
372 formatAndReplace(txt);
383 retVal << QString(
"index.html");
384 QFile file(indexFile);
385 if(!file.open(IO_ReadOnly))
387 qWarning(
"HtmlDataFetcher::fetchLinksToFiles() : File open error");
388 return QStringList();
390 QTextStream str(&file);
394 while ( !str.atEnd() )
396 line = str.readLine();
397 if(line.contains(
"http") || line.contains(
"mailto"))
399 index = line.find(
"href=\"");
403 end = line.find(
'"',index);
406 qWarning(
"HtmlDataFetcher::fetchLinksToFiles() : Can't find end quote. May be HTML error");
407 return QStringList();
409 link = line.mid(index,end-index);
416 void HtmlDataFetcher::formatAndReplace(QString &txt)
418 if(!txt.contains(
'&'))
427 end = txt.find(
';',st);
430 qWarning(
"HtmlDataFetcher::formatAndReplace() : Can't find ';'");
433 QChar ch = unicodeFor(txt.mid(st,end-st+1));
434 txt.replace(st,end-st+1,ch);