kmail

encodingdetector.h
1 /*
2  This file was taken from the KDE 4.x libraries and backported to TQt 3.
3 
4  Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
5  Copyright (C) 2007 Nick Shaforostoff (shafff@ukr.net)
6 
7  This library is free software; you can redistribute it and/or
8  modify it under the terms of the GNU Library General Public
9  License as published by the Free Software Foundation; either
10  version 2 of the License, or (at your option) any later version.
11 
12  This library is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  Library General Public License for more details.
16 
17  You should have received a copy of the GNU Library General Public License
18  along with this library; see the file COPYING.LIB. If not, write to
19  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20  Boston, MA 02110-1301, USA.
21 
22 */
23 #ifndef ENCODINGDETECTOR_H
24 #define ENCODINGDETECTOR_H
25 
26 #include <tqstring.h>
27 
28 class TQTextCodec;
29 class TQTextDecoder;
30 class EncodingDetectorPrivate;
31 
58 {
59 public:
60  enum EncodingChoiceSource
61  {
62  DefaultEncoding,
63  AutoDetectedEncoding,
64  BOM,
65  EncodingFromXMLHeader,
66  EncodingFromMetaTag,
67  EncodingFromHTTPHeader,
68  UserChosenEncoding
69  };
70 
71  enum AutoDetectScript
72  {
73  None,
74  SemiautomaticDetection,
75  Arabic,
76  Baltic,
77  CentralEuropean,
78  ChineseSimplified,
79  ChineseTraditional,
80  Cyrillic,
81  Greek,
82  Hebrew,
83  Japanese,
84  Korean,
85  NorthernSaami,
86  SouthEasternEurope,
87  Thai,
88  Turkish,
89  Unicode,
90  WesternEuropean
91  };
92 
97 
101  EncodingDetector(TQTextCodec* codec, EncodingChoiceSource source, AutoDetectScript script=None);
102  ~EncodingDetector();
103 
104  //const TQTextCodec* codec() const;
105 
109  bool setEncoding(const char *encoding, EncodingChoiceSource type);
110 
115  const char* encoding() const;
116 
117  bool visuallyOrdered() const;
118 
119 // void setAutoDetectLanguage( const TQString& );
120 // const TQString& autoDetectLanguage() const;
121 
122  void setAutoDetectLanguage( AutoDetectScript );
123  AutoDetectScript autoDetectLanguage() const;
124 
125  EncodingChoiceSource encodingChoiceSource() const;
126 
131  bool analyze( const char *data, int len );
132 
137  bool analyze( const TQByteArray &data );
138 
142  static AutoDetectScript scriptForName(const TQString& lang);
143  static TQString nameForScript(AutoDetectScript);
144  static AutoDetectScript scriptForLanguageCode(const TQString &lang);
145  static bool hasAutoDetectionForScript(AutoDetectScript);
146 
147 protected:
155  bool errorsIfUtf8 (const char* data, int length);
156 
160  TQTextDecoder* decoder();
161 
162 private:
163  EncodingDetectorPrivate* const d;
164 };
165 
166 #endif
Provides encoding detection capabilities.
bool errorsIfUtf8(const char *data, int length)
Check if we are really utf8.
static AutoDetectScript scriptForName(const TQString &lang)
Takes lang name after it were i18n()'ed.
EncodingDetector()
Default codec is latin1 (as html spec says), EncodingChoiceSource is default, AutoDetectScript=Semiau...
const char * encoding() const
Convenience method.
bool setEncoding(const char *encoding, EncodingChoiceSource type)
TQTextDecoder * decoder()
bool analyze(const char *data, int len)
Analyze text data.