使用者工具

網站工具


perl:yahoo_dictionary

yahoo 字典

Perl 版本

#!/usr/bin/perl
# Yen-Ming Lee <leeym@leeym.com>
# 2007/01/09
use Encode qw(encode decode from_to);
use Term::ANSIColor qw(:constants);
use Data::Dumper;
use LWP::Simple;
use strict;

ydict(shift);

sub ydict
{
  my $p = shift;
  die if !$p;
  my $html = get("http://tw.dictionary.yahoo.com/search?p=$p");
  from_to($html, "utf-8", "big5") if $ENV{'LC_CTYPE'} =~ /Big5/;
  my $i = 0;
  $html =~ s/\r//g;
  $html =~ s/\n//g;
  if ($html =~ m{<em class="warning">})
  {
    if ($html =~ m{<em class="warning">.*?>(\S+)<.*?</em>})
    {
      my $q = $1;
      print BOLD . YELLOW . "\nERROR: $p -> $q\n" . RESET;
      return ydict($q);
    }
    else
    {
      print BOLD . YELLOW . "ERROR: $p\n" . RESET;
      return;
    }
  }
  print BOLD . YELLOW . "\n$p\n" . RESET;
  while ($html =~ m{<div class=p(\w+)>(.*?)</div>}i)
  {
    my $type = $1;
    my $line = $2;
    $html = $';
    my $color;
    my $bold = BOLD;
    my $reset = RESET;
    $line =~ s/^\s+//;
    $line =~ s/\s+$//;
    if ($type eq 'cixin')
    {
      $i = 0;
      $color = BOLD . RED;
    }
    elsif ($type eq 'chi' or $type eq 'eng')
    {
      $color = CYAN;
      $reset = RESET . $color;
      $line = "\t$line";
    }
    elsif ($type eq 'explain')
    {
      $i++;
      $line = "$i $line";
    }
    else
    {
      $color = BOLD . BLUE;
      next;
    }
    $line =~ s,<b>,$bold,g;
    $line =~ s,</b>,$reset,g;
    $line =~ s/<[^>]+>//g;
    print $color . "$line\n" . RESET;
  }
  print "\n";
}

FourDollars 進化版

  • 只有支援 UTF-8
  • 一次能查詢多個單字
  • 具 Shell-like
#!/usr/bin/perl
# Shih-Yuan Lee <fourdollars@gmail.com>
# http://fd.idv.tw

use Term::ANSIColor qw(:constants);
use LWP::Simple;
use strict;
use warnings;

my $debug = 0;
my $trigger = 0;

if ($#ARGV == -1) {
	print "<Yahoo!奇摩字典> ";
	while (<>) {
		chomp;
		if ("$_" eq '') {
			print "<Yahoo!奇摩字典> ";
			next;
		}
		ydict($_);
		print "<Yahoo!奇摩字典> ";
	}
} else {
	while ($#ARGV != -1) {
		if ($trigger) {
			print "\n";
		} else {
			$trigger = 1;
		}
		ydict(shift);
	}
}

sub ydict
{
	my $p = shift;
	die if !$p;
	my $html = get("http://tw.dictionary.yahoo.com/search?ei=UTF-8&p=$p");
	$html =~ s/\r//g;
	$html =~ s/\n//g;

	if ($html =~ m{<em class="warning">}) {
		if ($html =~ m{<em class="warning">.*?>(\S+)<.*?</em>}) {
			my $q = $1;
			print BOLD . YELLOW . "拼字檢查: $p -> $q" . RESET . "\n\n";
			return ydict($q);
		} else {
			print BOLD . YELLOW . "查無此字: $p\n" . RESET;
			return;
		}
	}

	print BOLD . YELLOW . "$p 的查詢結果:" . RESET . "\n\n";

	while ($html =~ m{<div class="break"></div>}) {
		my $block = $`;
		$html = $';
		parser($block);
	}
	parser($html);
}

sub parser
{
	my $html = shift;
	if ($html =~ m{<h2>(.*?)</h2>}) {
		my $line = $1;
		$line =~ s,<sup>,[,g;
		$line =~ s,</sup>,],g;
		$line =~ s/<[^>]+>//g;
		$line =~ s/^\s+//;
		$line =~ s/\s+$//;
		print "<h2>" if $debug;
		print BOLD."$line".RESET."\n";
	}
	parsermore($html);
}

sub parsermore
{
	my $i = 0;
	my $flag = 0;
	my $html = shift;
	while ($html =~ m{<div class=(p\w+|chinese)>(.*?)</div>}i)
	{
		my $type = $1;
		my $line = $2;
		my $color = RESET;
		my $bold = BOLD;
		my $reset = RESET;
		$html = $';
		$line =~ s/^\s+//;
		$line =~ s/\s+$//;
		print "type=$type\nline=$line\n" if $debug;
		if ($type eq 'pcixin')
		{
			$flag = 0;
			$i = 0;
			$color = BOLD . RED;
			next if not ($line);
			print "<pcixin>\n" if $debug;
		}
		elsif ($type eq 'pchi')
		{
			$color = GREEN;
			$reset = RESET.$color;
			$line = "        $line";
			print "<pchi>\n" if $debug;
		}
		elsif ($type eq 'peng')
		{
			$color = CYAN;
			$reset = RESET.$color;
			$line = "        $line";
			print "<peng>\n" if $debug;
		}
		elsif ($type eq 'chinese')
		{
			my $num = $line =~ s/<br>/\n    /g;
			$line = "    $line";
			print "<chinese>\n" if $debug;
		}
		elsif ($type eq 'pexplain')
		{
			if ($flag == 0) {
				$i++;
				if ($line =~ m{<li>}) {
					my $reval;
					do {
						$reval = $line =~ s/<li>/    $i. /;
						$i++;
					} while ($reval);
					my $num = $line =~ s/<br>/\n/g;
					$i = 0;
				} else {
					$line = "    $i. $line";
					print "<pexplain 1>\n" if $debug;
				}
			} else {
				$line = "    $line" if ($line);
				print "<pexplain 2>\n" if $debug;
			}
		}
		elsif ($type eq 'ptitle')
		{
			$flag = 1;
			$color = BOLD . BLUE;
			next if ($line =~ m{KK} && $line =~ m{DJ});
			print "<ptitle>\n" if $debug;
		}
		else
		{
			$color = BOLD . BLUE;
			print "<others>\n" if $debug;
			next;
		}
		$line =~ s,<b>,$bold,g;
		$line =~ s,</b>,$reset,g;
		$line =~ s,<sup>,[,g;
		$line =~ s,</sup>,],g;
		$line =~ s/<[^>]+>//g;
		print $color."$line".RESET."\n";
	}
}

awk + sed 簡單版

#!/bin/sh
curl -sd "p=$1" http://tw.dictionary.yahoo.com/search  | \
         awk "/pexplain/{ print } \
         /peng/{ print } \
         /pchi/{ print ; print \"\n\" }" | \
         sed -e :a -e 's/<[^>]*>//g;/</N;//ba'

改變檔案屬性

 $ chmod +x filename 

執行:

 $ ./filename [單字] 
perl/yahoo_dictionary.txt · 上一次變更: 2007/04/15 02:41 由 wenpei