Utilisateur:FtiercelBot/getall.pl
getall.pl
#!/usr/bin/perl # Encoding : UTF8 # Is used with RAW XML !!! # my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); $year += 1900 ; $mon += 1 ; @ttlang = ( "af", "als", "an", "ang", "am", "ar", "as", "ast", "ay", "az", "bg", "bm", "bn", "bo", "br", "bs", "ca", "co", "chr", "cs", "cy", "da", "de", "dog", "el", "en", "eo", "es", "et", "eu", "fa", "fi", "fr" , "fy" , "ga", "gd", "gl", "gn", "gu", "he", "hi", "hr", "hu", "hy", "ia", "id", "ie", "io", "is", "it", "ja", "jv", "ka", "ko", "kn", "ku", "kw", "la", "lb", "li", "ln", "lo", "lt", "lv", "mg", "mk", "ml", "mn", "mr", "ms", "mt", "nah", "nds", "ne", "nl", "nn", "no", "oc", "or", "pa", "pi", "pl", "prv", "pt", "qu", "ro", "ru", "rw", "sa", "scn", "sh", "simple", "sk", "sl", "sq", "sr", "su", "sv", "ta", "te", "tg", "th", "tl", "tr", "tt", "ug", "uk", "ur", "vi", "vo", "yi", "yo", "zh", "zu" ) ; $today = "$year-$mon-$mday" ; open( RESFILE,">$today.txt"); foreach ( @ttlang ) { $lang_cur = $_ ; $url1 = "http://download.wikimedia.org/" ; $url2 = "wiktionary/latest/" ; $url3 = "wiktionary-latest-all-titles-in-ns0" ; `wget $url1$lang_cur$url2$lang_cur$url3.gz` ; `gunzip $lang_cur$url3.gz`; open( XMLFILE, "< $lang_cur$url3" ) ; $line = <XMLFILE> ; # we don't take the first line into account while ( $line = <XMLFILE> ) { $line =~ s/_/ /g; print RESFILE "$lang_cur:$line" ; } # `rm $lang_cur$url3.gz` ; `rm $lang_cur$url3` ; }