You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

1370 lines
35 KiB

  1. #!/usr/bin/perl -w
  2. #
  3. # The Intltool Message Merger
  4. #
  5. # Copyright (C) 2000, 2003 Free Software Foundation.
  6. # Copyright (C) 2000, 2001 Eazel, Inc
  7. #
  8. # Intltool is free software; you can redistribute it and/or
  9. # modify it under the terms of the GNU General Public License
  10. # version 2 published by the Free Software Foundation.
  11. #
  12. # Intltool is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. # General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program; if not, write to the Free Software
  19. # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20. #
  21. # As a special exception to the GNU General Public License, if you
  22. # distribute this file as part of a program that contains a
  23. # configuration script generated by Autoconf, you may include it under
  24. # the same distribution terms that you use for the rest of that program.
  25. #
  26. # Authors: Maciej Stachowiak <mjs@noisehavoc.org>
  27. # Kenneth Christiansen <kenneth@gnu.org>
  28. # Darin Adler <darin@bentspoon.com>
  29. #
  30. # Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
  31. #
  32. ## Release information
  33. my $PROGRAM = "intltool-merge";
  34. my $PACKAGE = "intltool";
  35. my $VERSION = "0.30";
  36. ## Loaded modules
  37. use strict;
  38. use Getopt::Long;
  39. use Text::Wrap;
  40. use File::Basename;
  41. my $must_end_tag = -1;
  42. my $last_depth = -1;
  43. my $translation_depth = -1;
  44. my @tag_stack = ();
  45. my @entered_tag = ();
  46. my @translation_strings = ();
  47. my $leading_space = "";
  48. ## Scalars used by the option stuff
  49. my $HELP_ARG = 0;
  50. my $VERSION_ARG = 0;
  51. my $BA_STYLE_ARG = 0;
  52. my $XML_STYLE_ARG = 0;
  53. my $KEYS_STYLE_ARG = 0;
  54. my $DESKTOP_STYLE_ARG = 0;
  55. my $SCHEMAS_STYLE_ARG = 0;
  56. my $RFC822DEB_STYLE_ARG = 0;
  57. my $QUIET_ARG = 0;
  58. my $PASS_THROUGH_ARG = 0;
  59. my $UTF8_ARG = 0;
  60. my $MULTIPLE_OUTPUT = 0;
  61. my $cache_file;
  62. ## Handle options
  63. GetOptions
  64. (
  65. "help" => \$HELP_ARG,
  66. "version" => \$VERSION_ARG,
  67. "quiet|q" => \$QUIET_ARG,
  68. "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
  69. "ba-style|b" => \$BA_STYLE_ARG,
  70. "xml-style|x" => \$XML_STYLE_ARG,
  71. "keys-style|k" => \$KEYS_STYLE_ARG,
  72. "desktop-style|d" => \$DESKTOP_STYLE_ARG,
  73. "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
  74. "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
  75. "pass-through|p" => \$PASS_THROUGH_ARG,
  76. "utf8|u" => \$UTF8_ARG,
  77. "multiple-output|m" => \$MULTIPLE_OUTPUT,
  78. "cache|c=s" => \$cache_file
  79. ) or &error;
  80. my $PO_DIR;
  81. my $FILE;
  82. my $OUTFILE;
  83. my %po_files_by_lang = ();
  84. my %translations = ();
  85. my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv";
  86. # Use this instead of \w for XML files to handle more possible characters.
  87. my $w = "[-A-Za-z0-9._:]";
  88. # XML quoted string contents
  89. my $q = "[^\\\"]*";
  90. ## Check for options.
  91. if ($VERSION_ARG)
  92. {
  93. &print_version;
  94. }
  95. elsif ($HELP_ARG)
  96. {
  97. &print_help;
  98. }
  99. elsif ($BA_STYLE_ARG && @ARGV > 2)
  100. {
  101. &preparation;
  102. &print_message;
  103. &ba_merge_translations;
  104. &finalize;
  105. }
  106. elsif ($XML_STYLE_ARG && @ARGV > 2)
  107. {
  108. &utf8_sanity_check;
  109. &preparation;
  110. &print_message;
  111. &xml_merge_output;
  112. &finalize;
  113. }
  114. elsif ($KEYS_STYLE_ARG && @ARGV > 2)
  115. {
  116. &utf8_sanity_check;
  117. &preparation;
  118. &print_message;
  119. &keys_merge_translations;
  120. &finalize;
  121. }
  122. elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
  123. {
  124. &preparation;
  125. &print_message;
  126. &desktop_merge_translations;
  127. &finalize;
  128. }
  129. elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
  130. {
  131. &preparation;
  132. &print_message;
  133. &schemas_merge_translations;
  134. &finalize;
  135. }
  136. elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
  137. {
  138. &preparation;
  139. &print_message;
  140. &rfc822deb_merge_translations;
  141. &finalize;
  142. }
  143. else
  144. {
  145. &print_help;
  146. }
  147. exit;
  148. ## Sub for printing release information
  149. sub print_version
  150. {
  151. print <<_EOF_;
  152. ${PROGRAM} (${PACKAGE}) ${VERSION}
  153. Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
  154. Copyright (C) 2000-2003 Free Software Foundation, Inc.
  155. Copyright (C) 2000-2001 Eazel, Inc.
  156. This is free software; see the source for copying conditions. There is NO
  157. warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  158. _EOF_
  159. exit;
  160. }
  161. ## Sub for printing usage information
  162. sub print_help
  163. {
  164. print <<_EOF_;
  165. Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
  166. Generates an output file that includes some localized attributes from an
  167. untranslated source file.
  168. Mandatory options: (exactly one must be specified)
  169. -b, --ba-style includes translations in the bonobo-activation style
  170. -d, --desktop-style includes translations in the desktop style
  171. -k, --keys-style includes translations in the keys style
  172. -s, --schemas-style includes translations in the schemas style
  173. -r, --rfc822deb-style includes translations in the RFC822 style
  174. -x, --xml-style includes translations in the standard xml style
  175. Other options:
  176. -u, --utf8 convert all strings to UTF-8 before merging
  177. -p, --pass-through use strings as found in .po files, without
  178. conversion (STRONGLY unrecommended with -x)
  179. -m, --multiple-output output one localized file per locale, instead of
  180. a single file containing all localized elements
  181. -c, --cache=FILE specify cache file name
  182. (usually \$top_builddir/po/.intltool-merge-cache)
  183. -q, --quiet suppress most messages
  184. --help display this help and exit
  185. --version output version information and exit
  186. Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
  187. or send email to <xml-i18n-tools\@gnome.org>.
  188. _EOF_
  189. exit;
  190. }
  191. ## Sub for printing error messages
  192. sub print_error
  193. {
  194. print STDERR "Try `${PROGRAM} --help' for more information.\n";
  195. exit;
  196. }
  197. sub print_message
  198. {
  199. print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
  200. }
  201. sub preparation
  202. {
  203. $PO_DIR = $ARGV[0];
  204. $FILE = $ARGV[1];
  205. $OUTFILE = $ARGV[2];
  206. &gather_po_files;
  207. &get_translation_database;
  208. }
  209. # General-purpose code for looking up translations in .po files
  210. sub po_file2lang
  211. {
  212. my ($tmp) = @_;
  213. $tmp =~ s/^.*\/(.*)\.po$/$1/;
  214. return $tmp;
  215. }
  216. sub gather_po_files
  217. {
  218. for my $po_file (glob "$PO_DIR/*.po") {
  219. $po_files_by_lang{po_file2lang($po_file)} = $po_file;
  220. }
  221. }
  222. sub get_local_charset
  223. {
  224. my ($encoding) = @_;
  225. my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/usr/local/lib/charset.alias";
  226. # seek character encoding aliases in charset.alias (glib)
  227. if (open CHARSET_ALIAS, $alias_file)
  228. {
  229. while (<CHARSET_ALIAS>)
  230. {
  231. next if /^\#/;
  232. return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
  233. }
  234. close CHARSET_ALIAS;
  235. }
  236. # if not found, return input string
  237. return $encoding;
  238. }
  239. sub get_po_encoding
  240. {
  241. my ($in_po_file) = @_;
  242. my $encoding = "";
  243. open IN_PO_FILE, $in_po_file or die;
  244. while (<IN_PO_FILE>)
  245. {
  246. ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
  247. if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
  248. {
  249. $encoding = $1;
  250. last;
  251. }
  252. }
  253. close IN_PO_FILE;
  254. if (!$encoding)
  255. {
  256. print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
  257. $encoding = "ISO-8859-1";
  258. }
  259. system ("$iconv -f $encoding -t UTF-8 </dev/null 2>/dev/null");
  260. if ($?) {
  261. $encoding = get_local_charset($encoding);
  262. }
  263. return $encoding
  264. }
  265. sub utf8_sanity_check
  266. {
  267. if (!$UTF8_ARG)
  268. {
  269. if (!$PASS_THROUGH_ARG)
  270. {
  271. $PASS_THROUGH_ARG="1";
  272. }
  273. }
  274. }
  275. sub get_translation_database
  276. {
  277. if ($cache_file) {
  278. &get_cached_translation_database;
  279. } else {
  280. &create_translation_database;
  281. }
  282. }
  283. sub get_newest_po_age
  284. {
  285. my $newest_age;
  286. foreach my $file (values %po_files_by_lang)
  287. {
  288. my $file_age = -M $file;
  289. $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
  290. }
  291. $newest_age = 0 if !$newest_age;
  292. return $newest_age;
  293. }
  294. sub create_cache
  295. {
  296. print "Generating and caching the translation database\n" unless $QUIET_ARG;
  297. &create_translation_database;
  298. open CACHE, ">$cache_file" || die;
  299. print CACHE join "\x01", %translations;
  300. close CACHE;
  301. }
  302. sub load_cache
  303. {
  304. print "Found cached translation database\n" unless $QUIET_ARG;
  305. my $contents;
  306. open CACHE, "<$cache_file" || die;
  307. {
  308. local $/;
  309. $contents = <CACHE>;
  310. }
  311. close CACHE;
  312. %translations = split "\x01", $contents;
  313. }
  314. sub get_cached_translation_database
  315. {
  316. my $cache_file_age = -M $cache_file;
  317. if (defined $cache_file_age)
  318. {
  319. if ($cache_file_age <= &get_newest_po_age)
  320. {
  321. &load_cache;
  322. return;
  323. }
  324. print "Found too-old cached translation database\n" unless $QUIET_ARG;
  325. }
  326. &create_cache;
  327. }
  328. sub create_translation_database
  329. {
  330. for my $lang (keys %po_files_by_lang)
  331. {
  332. my $po_file = $po_files_by_lang{$lang};
  333. if ($UTF8_ARG)
  334. {
  335. my $encoding = get_po_encoding ($po_file);
  336. if (lc $encoding eq "utf-8")
  337. {
  338. open PO_FILE, "<$po_file";
  339. }
  340. else
  341. {
  342. print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
  343. open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
  344. }
  345. }
  346. else
  347. {
  348. open PO_FILE, "<$po_file";
  349. }
  350. my $nextfuzzy = 0;
  351. my $inmsgid = 0;
  352. my $inmsgstr = 0;
  353. my $msgid = "";
  354. my $msgstr = "";
  355. while (<PO_FILE>)
  356. {
  357. $nextfuzzy = 1 if /^#, fuzzy/;
  358. if (/^msgid "((\\.|[^\\])*)"/ )
  359. {
  360. $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
  361. $msgid = "";
  362. $msgstr = "";
  363. if ($nextfuzzy) {
  364. $inmsgid = 0;
  365. } else {
  366. $msgid = unescape_po_string($1);
  367. $inmsgid = 1;
  368. }
  369. $inmsgstr = 0;
  370. $nextfuzzy = 0;
  371. }
  372. if (/^msgstr "((\\.|[^\\])*)"/)
  373. {
  374. $msgstr = unescape_po_string($1);
  375. $inmsgstr = 1;
  376. $inmsgid = 0;
  377. }
  378. if (/^"((\\.|[^\\])*)"/)
  379. {
  380. $msgid .= unescape_po_string($1) if $inmsgid;
  381. $msgstr .= unescape_po_string($1) if $inmsgstr;
  382. }
  383. }
  384. $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
  385. }
  386. }
  387. sub finalize
  388. {
  389. }
  390. sub unescape_one_sequence
  391. {
  392. my ($sequence) = @_;
  393. return "\\" if $sequence eq "\\\\";
  394. return "\"" if $sequence eq "\\\"";
  395. return "\n" if $sequence eq "\\n";
  396. # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
  397. # \xXX (hex) and has a comment saying they want to handle \u and \U.
  398. return $sequence;
  399. }
  400. sub unescape_po_string
  401. {
  402. my ($string) = @_;
  403. $string =~ s/(\\.)/unescape_one_sequence($1)/eg;
  404. return $string;
  405. }
  406. ## NOTE: deal with < - &lt; but not > - &gt; because it seems its ok to have
  407. ## > in the entity. For further info please look at #84738.
  408. sub entity_decode
  409. {
  410. local ($_) = @_;
  411. s/&apos;/'/g; # '
  412. s/&quot;/"/g; # "
  413. s/&amp;/&/g;
  414. s/&lt;/</g;
  415. return $_;
  416. }
  417. # entity_encode: (string)
  418. #
  419. # Encode the given string to XML format (encode '<' etc). It also
  420. # encodes high bit if not in UTF-8 mode.
  421. sub entity_encode
  422. {
  423. my ($pre_encoded) = @_;
  424. my @list_of_chars = unpack ('C*', $pre_encoded);
  425. if ($PASS_THROUGH_ARG)
  426. {
  427. return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
  428. }
  429. else
  430. {
  431. # with UTF-8 we only encode minimalistic
  432. return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
  433. }
  434. }
  435. sub entity_encode_int_minimalist
  436. {
  437. return "&quot;" if $_ == 34;
  438. return "&amp;" if $_ == 38;
  439. return "&apos;" if $_ == 39;
  440. return "&lt;" if $_ == 60;
  441. return chr $_;
  442. }
  443. sub entity_encode_int_even_high_bit
  444. {
  445. if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39 || $_ == 60)
  446. {
  447. # the ($_ > 127) should probably be removed
  448. return "&#" . $_ . ";";
  449. }
  450. else
  451. {
  452. return chr $_;
  453. }
  454. }
  455. sub entity_encoded_translation
  456. {
  457. my ($lang, $string) = @_;
  458. my $translation = $translations{$lang, $string};
  459. return $string if !$translation;
  460. return entity_encode ($translation);
  461. }
  462. ## XML (bonobo-activation specific) merge code
  463. sub ba_merge_translations
  464. {
  465. my $source;
  466. {
  467. local $/; # slurp mode
  468. open INPUT, "<$FILE" or die "can't open $FILE: $!";
  469. $source = <INPUT>;
  470. close INPUT;
  471. }
  472. open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
  473. while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
  474. {
  475. print OUTPUT $1;
  476. my $node = $2 . "\n";
  477. my @strings = ();
  478. $_ = $node;
  479. while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
  480. push @strings, entity_decode($3);
  481. }
  482. print OUTPUT;
  483. my %langs;
  484. for my $string (@strings)
  485. {
  486. for my $lang (keys %po_files_by_lang)
  487. {
  488. $langs{$lang} = 1 if $translations{$lang, $string};
  489. }
  490. }
  491. for my $lang (sort keys %langs)
  492. {
  493. $_ = $node;
  494. s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
  495. s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
  496. print OUTPUT;
  497. }
  498. }
  499. print OUTPUT $source;
  500. close OUTPUT;
  501. }
  502. ## XML (non-bonobo-activation) merge code
  503. sub parseTree
  504. {
  505. my $fh = shift;
  506. my $ref = shift;
  507. my $depth = shift || 0;
  508. my $language = shift || "";
  509. my $not_cdata = 0;
  510. my $has_children;
  511. $entered_tag[$depth + 1] = 0;
  512. # Check to see if this is not a CDATA element.
  513. #
  514. foreach my $sub (@{ $ref }) {
  515. if (ref $sub eq 'ARRAY') {
  516. $not_cdata = 1;
  517. }
  518. }
  519. foreach my $sub (@{ $ref }) {
  520. # Handle empty nodes.
  521. #
  522. if (! $sub) {
  523. next;
  524. }
  525. if (ref $sub eq 'ARRAY') {
  526. # Process subnodes
  527. #
  528. $has_children = 0;
  529. # Check to see if current tag has any elements that need to be translated.
  530. #
  531. if ($translation_depth == -1) {
  532. foreach my $subsub (@{ $sub } ) {
  533. if (ref $subsub eq 'HASH') {
  534. foreach my $e (reverse(keys %{ $subsub })) {
  535. if ($e =~ /^_/) {
  536. $translation_depth = $depth;
  537. }
  538. }
  539. } elsif (ref $subsub eq 'ARRAY') {
  540. $has_children = 1;
  541. }
  542. }
  543. }
  544. my $current_tag = pop(@tag_stack);
  545. push @tag_stack, $current_tag;
  546. @translation_strings = ();
  547. $must_end_tag = $depth;
  548. print $fh "<", $current_tag;
  549. parseTree($fh, $sub, $depth + 1, $language);
  550. # Close any open tags
  551. #
  552. if ($must_end_tag != -1) {
  553. if ($must_end_tag < $depth) {
  554. print $fh ">";
  555. } else {
  556. print $fh " />";
  557. pop(@tag_stack);
  558. if ($depth == $translation_depth) {
  559. $translation_depth = -1;
  560. }
  561. }
  562. $must_end_tag = -1;
  563. }
  564. # Add ending tag(s), if needed
  565. #
  566. if ($entered_tag[$depth + 1] == 1) {
  567. while ($last_depth > $depth) {
  568. $last_depth--;
  569. print $fh "</", pop(@tag_stack), ">";
  570. }
  571. }
  572. $last_depth = $depth;
  573. # If beginning a translation block, then process for each language.
  574. #
  575. if ($translation_depth == $depth) {
  576. my $do_language;
  577. # Skip languages that do not have translations. Since we only
  578. # do this check when $translation_depth == $depth, it will never
  579. # happen for nodes inside a node with a translated element.
  580. #
  581. for my $lang (sort keys %po_files_by_lang) {
  582. if ($has_children == 1) {
  583. $do_language = 1;
  584. } else {
  585. # Skip this language if there is no translation
  586. #
  587. $do_language = 0;
  588. foreach my $string (@translation_strings) {
  589. my $decode_string = entity_decode($string);
  590. my $translation = $translations{$lang, $decode_string};
  591. if ($translation) {
  592. $do_language = 1;
  593. last;
  594. }
  595. }
  596. }
  597. if ($do_language == 0) {
  598. next;
  599. }
  600. print $fh "\n";
  601. $leading_space =~ s/.*\n//g;
  602. print $fh $leading_space;
  603. if ($MULTIPLE_OUTPUT && $lang ne "$language") {
  604. next;
  605. }
  606. print $fh "<", $current_tag, " xml:lang=\"", $lang, "\"";
  607. $must_end_tag = $depth;
  608. parseTree($fh, $sub, $depth + 1, $lang);
  609. # Close any open tags
  610. #
  611. if ($must_end_tag != -1) {
  612. if ($must_end_tag < $depth) {
  613. print $fh ">";
  614. } else {
  615. print $fh " />";
  616. pop(@tag_stack);
  617. if ($depth == $translation_depth) {
  618. $translation_depth = -1;
  619. }
  620. }
  621. $must_end_tag = -1;
  622. }
  623. # Add ending tag(s), if needed
  624. #
  625. if ($entered_tag[$depth + 1] == 1) {
  626. while ($last_depth > $depth + 1) {
  627. $last_depth--;
  628. print $fh "</", pop(@tag_stack), ">";
  629. }
  630. print $fh "</", $current_tag, ">";
  631. }
  632. }
  633. $translation_depth = -1;
  634. $last_depth = $depth;
  635. }
  636. $leading_space = "";
  637. } elsif (ref $sub eq 'HASH') {
  638. # Process tag elements
  639. #
  640. foreach my $e (reverse(keys %{ $sub })) {
  641. my $key = $e;
  642. my $string = $sub->{$e};
  643. my $quote = '"';
  644. $string =~ s/^[\s]+//;
  645. $string =~ s/[\s]+$//;
  646. if ($string =~ /^'.*'$/)
  647. {
  648. $quote = "'";
  649. }
  650. $string =~ s/^['"]//g;
  651. $string =~ s/['"]$//g;
  652. if ($key =~ /^_/) {
  653. $key =~ s|^_||g;
  654. if ($language) {
  655. # Handle translation
  656. #
  657. my $decode_string = entity_decode($string);
  658. my $translation = $translations{$language, $decode_string};
  659. if ($translation) {
  660. $translation = entity_encode($translation);
  661. $string = $translation;
  662. }
  663. }
  664. }
  665. print $fh " $key=$quote$string$quote";
  666. }
  667. } else {
  668. # Handle tags and CDATA values
  669. # Mark parent tag as having been entered.
  670. #
  671. $entered_tag[$depth] = 1;
  672. # The last_depth flag allows us to determine if this tag should be
  673. # closed with "/>" or ">"
  674. #
  675. $last_depth = $depth;
  676. # Close any open tags
  677. #
  678. if ($must_end_tag != -1) {
  679. if ($must_end_tag < $depth) {
  680. print $fh ">";
  681. } else {
  682. print $fh " />";
  683. pop(@tag_stack);
  684. }
  685. $must_end_tag = -1;
  686. }
  687. if ($sub =~ /^[\s]*$/) {
  688. $leading_space .= $sub;
  689. print $fh $sub;
  690. } elsif ($not_cdata) {
  691. # Handle tags
  692. #
  693. my $temp_tag = $sub;
  694. # Display key
  695. #
  696. if ($sub =~ /^_/) {
  697. $temp_tag =~ s|^_||g;
  698. if ($translation_depth == -1) {
  699. $translation_depth = $depth;
  700. }
  701. }
  702. # Push the tag on the stack, it will get handled when the ARRAY
  703. # for this tag is processed.
  704. #
  705. push(@tag_stack, $temp_tag);
  706. } else {
  707. # Handle CDATA
  708. #
  709. my $string = $sub;
  710. $string =~ s/^[\s]+//;
  711. $string =~ s/[\s]+$//;
  712. push(@translation_strings, $string);
  713. # Display CDATA
  714. #
  715. if ($language) {
  716. # Handle translation
  717. #
  718. my $decode_string = entity_decode($string);
  719. my $translation = $translations{$language, $decode_string};
  720. if ($translation) {
  721. $translation = entity_encode($translation);
  722. $string = $translation;
  723. }
  724. }
  725. print $fh $string;
  726. }
  727. }
  728. }
  729. }
  730. sub intltool_tree_char
  731. {
  732. my $expat = shift;
  733. my $text = shift;
  734. my $clist = $expat->{Curlist};
  735. my $pos = $#$clist;
  736. # Use original_string so that we retain escaped entities
  737. # in CDATA sections.
  738. #
  739. if ($pos > 0 and $clist->[$pos - 1] eq '0') {
  740. $clist->[$pos] .= $expat->original_string();
  741. } else {
  742. push @$clist, 0 => $expat->original_string();
  743. }
  744. }
  745. sub intltool_tree_start
  746. {
  747. my $expat = shift;
  748. my $tag = shift;
  749. my @origlist = ();
  750. # Use original_string so that we retain escaped entities
  751. # in attribute values. We must convert the string to an
  752. # @origlist array to conform to the structure of the Tree
  753. # Style.
  754. #
  755. my @original_array = split /\x/, $expat->original_string();
  756. my $source = $expat->original_string();
  757. # Remove leading tag.
  758. #
  759. $source =~ s|^\s*<\s*(\S+)||s;
  760. # Grab attribute key/value pairs and push onto @origlist array.
  761. #
  762. while ($source)
  763. {
  764. if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
  765. {
  766. $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
  767. push @origlist, $1;
  768. push @origlist, '"' . $2 . '"';
  769. }
  770. elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
  771. {
  772. $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
  773. push @origlist, $1;
  774. push @origlist, "'" . $2 . "'";
  775. }
  776. else
  777. {
  778. last;
  779. }
  780. }
  781. my $ol = [ { @origlist } ];
  782. push @{ $expat->{Lists} }, $expat->{Curlist};
  783. push @{ $expat->{Curlist} }, $tag => $ol;
  784. $expat->{Curlist} = $ol;
  785. }
  786. sub readXml
  787. {
  788. my $filename = shift || return;
  789. if(!-f $filename) {
  790. die "ERROR Cannot find filename: $filename\n";
  791. }
  792. my $ret = eval 'require XML::Parser';
  793. if(!$ret) {
  794. die "You must have XML::Parser installed to run $0\n\n";
  795. }
  796. my $xp = new XML::Parser(Style => 'Tree');
  797. $xp->setHandlers(Char => \&intltool_tree_char);
  798. $xp->setHandlers(Start => \&intltool_tree_start);
  799. my $tree = $xp->parsefile($filename);
  800. # <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
  801. # would be:
  802. # [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [{},
  803. # 0, "Howdy", ref, [{}]], 0, "do" ] ]
  804. return $tree;
  805. }
  806. sub print_header
  807. {
  808. my $infile = shift;
  809. my $fh = shift;
  810. my $source;
  811. if(!-f $infile) {
  812. die "ERROR Cannot find filename: $infile\n";
  813. }
  814. print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
  815. {
  816. local $/;
  817. open DOCINPUT, "<${FILE}" or die;
  818. $source = <DOCINPUT>;
  819. close DOCINPUT;
  820. }
  821. if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
  822. {
  823. print $fh "$1\n";
  824. }
  825. elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
  826. {
  827. print $fh "$1\n";
  828. }
  829. }
  830. sub xml_merge_output
  831. {
  832. my $source;
  833. eval 'require XML::Parser';
  834. die "You must have XML::Parser installed to run $0\n\n"
  835. if($@);
  836. ## Add dir to own perl modules
  837. my $dir = dirname($0);
  838. push @INC, "NONE/share/intltool";
  839. push @INC, "$dir/modules";
  840. push @INC, "$dir/intltool-modules";
  841. eval 'require XML::Parser::Style::OrigTree';
  842. die "The OrigTree module doesn't seem to be properly installed $0\n\n"
  843. if($@);
  844. if ($MULTIPLE_OUTPUT) {
  845. for my $lang (sort keys %po_files_by_lang) {
  846. if ( ! -e $lang ) {
  847. mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
  848. }
  849. open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
  850. my $tree = readXml($FILE);
  851. print_header($FILE, \*OUTPUT);
  852. parseTree(\*OUTPUT, $tree, 0, $lang);
  853. close OUTPUT;
  854. print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
  855. }
  856. }
  857. open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
  858. my $tree = readXml($FILE);
  859. print_header($FILE, \*OUTPUT);
  860. parseTree(\*OUTPUT, $tree, 0);
  861. close OUTPUT;
  862. print "CREATED $OUTFILE\n" unless $QUIET_ARG;
  863. }
  864. sub keys_merge_translations
  865. {
  866. open INPUT, "<${FILE}" or die;
  867. open OUTPUT, ">${OUTFILE}" or die;
  868. while (<INPUT>)
  869. {
  870. if (s/^(\s*)_(\w+=(.*))/$1$2/)
  871. {
  872. my $string = $3;
  873. print OUTPUT;
  874. my $non_translated_line = $_;
  875. for my $lang (sort keys %po_files_by_lang)
  876. {
  877. my $translation = $translations{$lang, $string};
  878. next if !$translation;
  879. $_ = $non_translated_line;
  880. s/(\w+)=.*/[$lang]$1=$translation/;
  881. print OUTPUT;
  882. }
  883. }
  884. else
  885. {
  886. print OUTPUT;
  887. }
  888. }
  889. close OUTPUT;
  890. close INPUT;
  891. }
  892. sub desktop_merge_translations
  893. {
  894. open INPUT, "<${FILE}" or die;
  895. open OUTPUT, ">${OUTFILE}" or die;
  896. while (<INPUT>)
  897. {
  898. if (s/^(\s*)_(\w+=(.*))/$1$2/)
  899. {
  900. my $string = $3;
  901. print OUTPUT;
  902. my $non_translated_line = $_;
  903. for my $lang (sort keys %po_files_by_lang)
  904. {
  905. my $translation = $translations{$lang, $string};
  906. next if !$translation;
  907. $_ = $non_translated_line;
  908. s/(\w+)=.*/${1}[$lang]=$translation/;
  909. print OUTPUT;
  910. }
  911. }
  912. else
  913. {
  914. print OUTPUT;
  915. }
  916. }
  917. close OUTPUT;
  918. close INPUT;
  919. }
  920. sub schemas_merge_translations
  921. {
  922. my $source;
  923. {
  924. local $/; # slurp mode
  925. open INPUT, "<$FILE" or die "can't open $FILE: $!";
  926. $source = <INPUT>;
  927. close INPUT;
  928. }
  929. open OUTPUT, ">$OUTFILE" or die;
  930. # FIXME: support attribute translations
  931. # Empty nodes never need translation, so unmark all of them.
  932. # For example, <_foo/> is just replaced by <foo/>.
  933. $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
  934. while ($source =~ s/
  935. (.*?)
  936. (\s+)(<locale\ name="C">(\s*)
  937. (<default>\s*(.*?)\s*<\/default>)?(\s*)
  938. (<short>\s*(.*?)\s*<\/short>)?(\s*)
  939. (<long>\s*(.*?)\s*<\/long>)?(\s*)
  940. <\/locale>)
  941. //sx)
  942. {
  943. print OUTPUT $1;
  944. my $locale_start_spaces = $2 ? $2 : '';
  945. my $default_spaces = $4 ? $4 : '';
  946. my $short_spaces = $7 ? $7 : '';
  947. my $long_spaces = $10 ? $10 : '';
  948. my $locale_end_spaces = $13 ? $13 : '';
  949. my $c_default_block = $3 ? $3 : '';
  950. my $default_string = $6 ? $6 : '';
  951. my $short_string = $9 ? $9 : '';
  952. my $long_string = $12 ? $12 : '';
  953. print OUTPUT "$locale_start_spaces$c_default_block";
  954. $default_string =~ s/\s+/ /g;
  955. $default_string = entity_decode($default_string);
  956. $short_string =~ s/\s+/ /g;
  957. $short_string = entity_decode($short_string);
  958. $long_string =~ s/\s+/ /g;
  959. $long_string = entity_decode($long_string);
  960. for my $lang (sort keys %po_files_by_lang)
  961. {
  962. my $default_translation = $translations{$lang, $default_string};
  963. my $short_translation = $translations{$lang, $short_string};
  964. my $long_translation = $translations{$lang, $long_string};
  965. next if (!$default_translation && !$short_translation &&
  966. !$long_translation);
  967. print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
  968. print OUTPUT "$default_spaces";
  969. if ($default_translation)
  970. {
  971. $default_translation = entity_encode($default_translation);
  972. print OUTPUT "<default>$default_translation</default>";
  973. }
  974. print OUTPUT "$short_spaces";
  975. if ($short_translation)
  976. {
  977. $short_translation = entity_encode($short_translation);
  978. print OUTPUT "<short>$short_translation</short>";
  979. }
  980. print OUTPUT "$long_spaces";
  981. if ($long_translation)
  982. {
  983. $long_translation = entity_encode($long_translation);
  984. print OUTPUT "<long>$long_translation</long>";
  985. }
  986. print OUTPUT "$locale_end_spaces</locale>";
  987. }
  988. }
  989. print OUTPUT $source;
  990. close OUTPUT;
  991. }
  992. sub rfc822deb_merge_translations
  993. {
  994. my %encodings = ();
  995. for my $lang (keys %po_files_by_lang) {
  996. $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
  997. }
  998. my $source;
  999. $Text::Wrap::huge = 'overflow';
  1000. $Text::Wrap::break = qr/\n|\s(?=\S)/;
  1001. {
  1002. local $/; # slurp mode
  1003. open INPUT, "<$FILE" or die "can't open $FILE: $!";
  1004. $source = <INPUT>;
  1005. close INPUT;
  1006. }
  1007. open OUTPUT, ">${OUTFILE}" or die;
  1008. # Remove comments
  1009. $source =~ s/^#[^\n]*(\n|$)//s;
  1010. $source =~ s/\n#[^\n]*//g;
  1011. while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
  1012. {
  1013. my $sep = $1;
  1014. my $non_translated_line = $3.$4;
  1015. my $string = $5;
  1016. my $underscore = length($2);
  1017. # Remove [] dummy strings
  1018. my $stripped = $string;
  1019. $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
  1020. $stripped =~ s/\[\s[^\[\]]*\]$//;
  1021. $non_translated_line .= $stripped;
  1022. print OUTPUT $sep.$non_translated_line;
  1023. if ($underscore)
  1024. {
  1025. my @str_list = rfc822deb_split($underscore, $string);
  1026. for my $lang (sort keys %po_files_by_lang)
  1027. {
  1028. my $is_translated = 1;
  1029. my $str_translated = '';
  1030. my $first = 1;
  1031. for my $str (@str_list)
  1032. {
  1033. my $translation = $translations{$lang, $str};
  1034. if (!$translation)
  1035. {
  1036. # $is_translated = 0;
  1037. # last;
  1038. $translation = $str;
  1039. }
  1040. # $translation may also contain [] dummy
  1041. # strings, mostly to indicate an empty string
  1042. $translation =~ s/\[\s[^\[\]]*\]$//;
  1043. # Escape commas
  1044. $translation =~ s/,/\\,/g if $underscore eq 2;
  1045. if ($first)
  1046. {
  1047. if ($underscore eq 2)
  1048. {
  1049. $str_translated .= $translation;
  1050. }
  1051. else
  1052. {
  1053. $str_translated .=
  1054. Text::Tabs::expand($translation) .
  1055. "\n";
  1056. }
  1057. }
  1058. else
  1059. {
  1060. if ($underscore eq 2)
  1061. {
  1062. $str_translated .= ', ' . $translation;
  1063. }
  1064. else
  1065. {
  1066. $str_translated .= Text::Tabs::expand(
  1067. Text::Wrap::wrap(' ', ' ', $translation)) .
  1068. "\n .\n";
  1069. }
  1070. }
  1071. $first = 0;
  1072. # To fix some problems with Text::Wrap::wrap
  1073. $str_translated =~ s/(\n )+\n/\n .\n/g;
  1074. $str_translated =~ s/(^|\n)( +)\n( \S{$Text::Wrap::columns})/$1$2$3/g;
  1075. }
  1076. next unless $is_translated;
  1077. $str_translated =~ s/\n \.\n$//;
  1078. $str_translated =~ s/\s+$//;
  1079. $_ = $non_translated_line;
  1080. s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
  1081. print OUTPUT;
  1082. }
  1083. }
  1084. }
  1085. print OUTPUT "\n";
  1086. close OUTPUT;
  1087. close INPUT;
  1088. }
  1089. sub rfc822deb_split
  1090. {
  1091. # Debian defines a special way to deal with rfc822-style files:
  1092. # when a value contain newlines, it consists of
  1093. # 1. a short form (first line)
  1094. # 2. a long description, all lines begin with a space,
  1095. # and paragraphs are separated by a single dot on a line
  1096. # This routine returns an array of all paragraphs, and reformat
  1097. # them.
  1098. # When first argument is 2, the string is a comma separated list of
  1099. # values.
  1100. my $type = shift;
  1101. my $text = shift;
  1102. $text =~ s/^[ \t]//mg;
  1103. if ($type ne 1)
  1104. {
  1105. my @values = ();
  1106. for my $value (split(/(?<!\\), */, $text, 0))
  1107. {
  1108. $value =~ s/\\,/,/g;
  1109. push @values, $value;
  1110. }
  1111. return @values;
  1112. }
  1113. return ($text) if $text !~ /\n/;
  1114. $text =~ s/([^\n]*)\n//;
  1115. my @list = ($1);
  1116. my $str = '';
  1117. for my $line (split (/\n/, $text))
  1118. {
  1119. chomp $line;
  1120. if ($line =~ /^\.\s*$/)
  1121. {
  1122. # New paragraph
  1123. $str =~ s/\s*$//;
  1124. push(@list, $str);
  1125. $str = '';
  1126. }
  1127. elsif ($line =~ /^\s/)
  1128. {
  1129. # Line which must not be reformatted
  1130. $str .= "\n" if length ($str) && $str !~ /\n$/;
  1131. $line =~ s/\s+$//;
  1132. $str .= $line."\n";
  1133. }
  1134. else
  1135. {
  1136. # Continuation line, remove newline
  1137. $str .= " " if length ($str) && $str !~ /\n$/;
  1138. $str .= $line;
  1139. }
  1140. }
  1141. $str =~ s/\s*$//;
  1142. push(@list, $str) if length ($str);
  1143. return @list;
  1144. }