kmail

encodingdetector.h
1/*
2 This file was taken from the KDE 4.x libraries and backported to TQt 3.
3
4 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
5 Copyright (C) 2007 Nick Shaforostoff (shafff@ukr.net)
6
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public
9 License as published by the Free Software Foundation; either
10 version 2 of the License, or (at your option) any later version.
11
12 This library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Library General Public License for more details.
16
17 You should have received a copy of the GNU Library General Public License
18 along with this library; see the file COPYING.LIB. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA.
21
22*/
23#ifndef ENCODINGDETECTOR_H
24#define ENCODINGDETECTOR_H
25
26#include <tqstring.h>
27
28class TQTextCodec;
29class TQTextDecoder;
30class EncodingDetectorPrivate;
31
58{
59public:
60 enum EncodingChoiceSource
61 {
62 DefaultEncoding,
63 AutoDetectedEncoding,
64 BOM,
65 EncodingFromXMLHeader,
66 EncodingFromMetaTag,
67 EncodingFromHTTPHeader,
68 UserChosenEncoding
69 };
70
71 enum AutoDetectScript
72 {
73 None,
74 SemiautomaticDetection,
75 Arabic,
76 Baltic,
77 CentralEuropean,
78 ChineseSimplified,
79 ChineseTraditional,
80 Cyrillic,
81 Greek,
82 Hebrew,
83 Japanese,
84 Korean,
85 NorthernSaami,
86 SouthEasternEurope,
87 Thai,
88 Turkish,
89 Unicode,
90 WesternEuropean
91 };
92
97
101 EncodingDetector(TQTextCodec* codec, EncodingChoiceSource source, AutoDetectScript script=None);
103
104 //const TQTextCodec* codec() const;
105
109 bool setEncoding(const char *encoding, EncodingChoiceSource type);
110
115 const char* encoding() const;
116
117 bool visuallyOrdered() const;
118
119// void setAutoDetectLanguage( const TQString& );
120// const TQString& autoDetectLanguage() const;
121
122 void setAutoDetectLanguage( AutoDetectScript );
123 AutoDetectScript autoDetectLanguage() const;
124
125 EncodingChoiceSource encodingChoiceSource() const;
126
131 bool analyze( const char *data, int len );
132
137 bool analyze( const TQByteArray &data );
138
142 static AutoDetectScript scriptForName(const TQString& lang);
143 static TQString nameForScript(AutoDetectScript);
144 static AutoDetectScript scriptForLanguageCode(const TQString &lang);
145 static bool hasAutoDetectionForScript(AutoDetectScript);
146
147protected:
155 bool errorsIfUtf8 (const char* data, int length);
156
160 TQTextDecoder* decoder();
161
162private:
163 EncodingDetectorPrivate* const d;
164};
165
166#endif
Provides encoding detection capabilities.
bool errorsIfUtf8(const char *data, int length)
Check if we are really utf8.
static AutoDetectScript scriptForName(const TQString &lang)
Takes lang name after it were i18n()'ed.
EncodingDetector()
Default codec is latin1 (as html spec says), EncodingChoiceSource is default, AutoDetectScript=Semiau...
const char * encoding() const
Convenience method.
bool setEncoding(const char *encoding, EncodingChoiceSource type)
TQTextDecoder * decoder()
bool analyze(const char *data, int len)
Analyze text data.