Aber irgendwie verhunzt mir mySQL immer meine Umlaute. Aus ä ö ü Ä Ö Ü ß è é Wird ä ö ü Ä Ö Ü ß è é
äöüÄÖÜß -> äöüÄÖÜß
http://www.python-forum.de/topic-3529.html?highlight=use_unicode
1 #!/usr/bin/python
2 # -*- coding: UTF-8 -*-
3
4 TestString = "ä ö ü ß Ä Ö Ü"
5 print TestString
6 print TestString.encode("String_Escape"
ERROR: EOF in multi-line statement
1 def testcodec( txt, destination="utf_8" ):
2 "Testet blind alle Codecs mit encode und decode"
3
4 codecs = ['ascii', 'big5', 'big5hkscs', 'cp037', 'cp424', 'cp437', 'cp500',
5 'cp737', 'cp775', 'cp850', 'cp852', 'cp855', 'cp856', 'cp857', 'cp860',
6 'cp861', 'cp862', 'cp863', 'cp864', 'cp865', 'cp866', 'cp869', 'cp874',
7 'cp875', 'cp932', 'cp949', 'cp950', 'cp1006', 'cp1026', 'cp1140', 'cp1250',
8 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 'cp1256', 'cp1257', 'cp1258',
9 'euc_jp', 'euc_jis_2004', 'euc_jisx0213', 'euc_kr', 'gb2312', 'gbk', 'gb18030',
10 'hz', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2', 'iso2022_jp_2004',
11 'iso2022_jp_3', 'iso2022_jp_ext', 'iso2022_kr', 'latin_1', 'iso8859_2',
12 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', 'iso8859_7', 'iso8859_8',
13 'iso8859_9', 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', 'johab',
14 'koi8_r', 'koi8_u', 'mac_cyrillic', 'mac_greek', 'mac_iceland', 'mac_latin2',
15 'mac_roman', 'mac_turkish', 'ptcp154', 'shift_jis', 'shift_jis_2004',
16 'shift_jisx0213', 'utf_16', 'utf_16_be', 'utf_16_le', 'utf_7', 'utf_8',
17 'idna', 'mbcs', 'palmos',
18 'raw_unicode_escape', 'rot_13', 'string_escape',
19 'undefined', 'unicode_escape', 'unicode_internal'
20 ]
21 #~ codecs += [
22 #~ 'base64_codec',
23 #~ 'bz2_codec',
24 #~ 'hex_codec',
25 #~ 'punycode',
26 #~ 'quopri_codec',
27 #~ 'zlib_codec',
28 #~ 'uu_codec'
29 #~ ]
30 for codec in codecs:
31 try:
32 print txt.decode( codec ).encode( destination ), " - codec:", codec
33 except:
34 pass
35 print "-"*80
36
37 testcodec( "Ein ue...: ü" )
38 testcodec( "latin-1..: \xfc" )
39 testcodec( "UTF8.....: \xc3\xbc" )
http://de.wikipedia.org/wiki/Unicode_Transformation_Format
Thema: htmlspecialchars - Problematik bei Sonderzeichen, regexps,
http://wiki.python.de/Von_Umlauten%2C_Unicode_und_Encodings
$sql = "SELECT * FROM board WHERE trashcan = '0' AND content REGEXP '^[space]*(\[.*\])*[space]*(m).*$' ";
- 42 testcodec("Ein ue...: ü") 43 testcodec("latin-1..: \xfc") 44 testcodec("UTF8.....: \xc3\xbc")
rules = [
- (" ", "_"), ("ä", "ae"), ("ö", "oe"), ("ü", "ue"), ("Ä", "Ae"), ("Ö", "Oe"), ("Ü", "Ue"), ("ß", "ss"),
]
#-- DetlevLengsfeld 2006-05-30 01:48:08
Linux/MySql/Sonderzeichen (last modified 2008-11-04 07:00:11)