Stripping 'charset=' from po files [the sequal]
From: Erik Huelsmann <e.huelsmann_at_gmx.net>
Date: 2004-05-13 20:13:52 CEST
In order to prevent charset conversion by 'smart' gettext implementations
The Windows (python based) build system does not provide sed. To work
There are several questions to be answered before proceding:
1) We don't want to use the same script for the Makefile build (adding a new
2)
3) Do you have any comments to either script? (the strip charset script has
bye,
Erik.
start of the parser ===============
def recv_domain(self, domain):
def recv_simple_msg(self, pre_comment, msgid, msgstr):
TOKEN_CHUNK_SIZE = 100 * 1024 # 100kiB
class PoTokens:
def get(self):
# skip initial whitespace
while self.idx < len(self.buf) and \
if self.idx == len(self.buf):
if not self.buf[self.idx] in string.whitespace:
start = self.idx
# string "token"
end = self.buf.find('"', start+1)
if end == -1:
self.buf = self.inp.read(TOKEN_CHUNK_SIZE)
end = self.buf.find('"')
self.idx = end
# comment "token"
while 1:
if end == -1:
self.buf = self.inp.read(TOKEN_CHUNK_SIZE)
start = 0
self.idx = end
# msgstr "[INDEX]" "token"
while 1:
if self.idx == len(self.buf):
if not self.buf:
self.idx = start = 0
break
while 1:
if self.idx == len(self.buf):
if not self.buf:
self.idx = start = 0
token += self.buf[start:self.idx]
while 1:
if self.idx == len(self.buf):
if not self.buf:
self.idx = start = 0
if self.buf[self.idx] == ']':
# character series token
while 1:
if self.idx == len(self.buf):
self.buf = self.inp.read(TOKEN_CHUNK_SIZE)
if not self.buf:
self.idx = start = 0
return token + self.buf[start:self.idx]
# unknown token starting character
def unget(self, token):
return ungot
self.get = reget
def get_msg_argument(arg_to):
inp.unget(token)
if len(rv) == 0:
return rv
comment = []
if not token: # EOF
if token[0] == '#':
continue
if token.lower() == 'domain':
if token[0] in string.letters + string.digits + '_':
else:
continue
if token.lower() == 'msgid':
token = inp.get()
if msgid_plural:
if not token[0] == '[':
msgstr_indices += [ token[1:-1] ]
token = inp.get()
if len(msgstr_indices) == 0:
inp.unget(token)
sink.recv_plural_msg(comment, msgid, msgid_plural,
else: # not msgid_plural
sink.recv_simple_msg(comment, msgid, get_msg_argument('msgstr'))
comment = []
raise "Unknown token (%s)" % token
end of the parser ===============
start of the strip script ===============
import sys, poparse
class CharsetStrippingSink(poparse.PoSink):
def recv_simple_msg(self, pre_comment, msgid, msgstr):
for l in pre_comment:
msg = "msgid "
msg = "msgstr "
def finish_parse(self):
def strip_it(infile, outfile):
def main():
opts, args = getopt.getopt(sys.argv[1:], '', [])
if len(args) < 1:
infile = None
outfile = None
strip_it(infile, outfile)
if __name__ == '__main__':
-- NEU : GMX Internet.FreeDSL Ab sofort DSL-Tarif ohne Grundgebühr: http://www.gmx.net/dsl --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org For additional commands, e-mail: dev-help@subversion.tigris.orgReceived on Thu May 13 20:14:23 2004 |
This is an archived mail posted to the Subversion Dev mailing list.
This site is subject to the Apache Privacy Policy and the Apache Public Forum Archive Policy.