#!/usr/bin/perl # Copyright (C) 2006, 2007 王亮 # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public Licence as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public Licence for more details. use strict; use warnings; use HTML::TableExtract; use Config::General; use LWP::UserAgent; use Getopt::Long; use Encode; use HTML::Template; use Pod::Usage; use DateTime; use DateTime::Format::W3CDTF; use XML::Atom::Feed; use File::Basename; use File::Spec::Functions; use File::Path; use FindBin qw($Bin); use lib "$Bin"; use DB::Fundcalc; my $verbose; my $help; my $refresh; my $first_site_index; my $output; my $discard_data; my $domain; my $urlpath = "fundcalc"; my $output_directory; my $config_directory; my $username; # Priority of output option is higher than output_directory. my $result = GetOptions ( "refresh|r" => \$refresh, "firsturl=i" => \$first_site_index, "output|o=s" => \$output, "discard-data|n" => \$discard_data, "help|h" => \$help, "domain=s" => \$domain, "urlpath=s" => \$urlpath, "username|u=s" => \$username, "output-directory|d=s" => \$output_directory, "config-directory|c=s" => \$config_directory, ); $help = 1 unless $result; $refresh = 1 if defined $first_site_index; pod2usage(-verbose=>2) if (defined $help); $config_directory = (exists $ENV{HOME})?"$ENV{HOME}/.funds" : ".funds" unless defined $config_directory; create_directory($config_directory, 0700); # information of open-end funds you invest my $user_investment; if (defined $username) { $user_investment = "$config_directory/$username.config"; $urlpath .= "/$username"; } else { $user_investment = "$config_directory/config"; } unless (-e $user_investment and -f _) { &generate_example_conf; my $message = "没有找到中国开放式基金投资组合数据。\n请参照文件$config_directory/example.conf里的格式,将投资组合保存在文件$config_directory/config里。"; pod2usage($message); } # get user's investment data. my $conf = new Config::General($user_investment); my %user_funds = $conf->getall; my $user_funds_num = scalar keys %{$user_funds{fund}}; # This hash will hold information about open-end funds. my %funds_info; my $now = DateTime->now(time_zone => 'Asia/Shanghai'); my $yesterday = $now->subtract (days => 1); my $year = $yesterday->year; my $month = sprintf '%02d', $yesterday->month; my $day = sprintf '%02d', $yesterday->day; my $date = join '-', $year, $month, $day; # This array hold structure information of web page from which to # extract funds info. my @site_config = ( { url => "http://www.caibangzi.com/fund", table_depth => 0, table_count => 1, skip_rows => 0, date_row => -1, date_column => -1, code_column => 1, name_column => 2, net_column => 3, total_column => 4, diff_column => 5 }, { url => "http://fund.zhicheng.com/", table_depth => 0, table_count => 4, skip_rows => 1, date_row => -1, date_column => 11, code_column => 1, name_column => 2, net_column => 4, total_column => 5, diff_column => 8, }, ); # raw information of all open-end funds in China my $info_file = "$config_directory/funds.info"; my $read_funds_info_from_file = 0; # Information of open-end funds in China is updated once a day. We # retrieve twice at most to make it easy to program and to not get # out-of-date data. if (! defined $refresh and -e $info_file and -f _ and -M _ < 0.5) { $read_funds_info_from_file = 1; } # This is money you earn or lose my $total = 0; # profit of each fund my @user_fund_item; my $url; my $show_warning; if (not $read_funds_info_from_file) { my @visit_order = (0 .. $#site_config); if (defined $first_site_index) { @visit_order[0, $first_site_index] = ($first_site_index, 0); } for my $site_index (@visit_order) { my $avail_funds_num = get_funds_info_from_web($site_config[$site_index]); if ($avail_funds_num == $user_funds_num) { $url = $site_config[$site_index]->{url}; last; } %funds_info = (); @user_fund_item = (); } # no new data if (scalar keys %funds_info == 0) { exit 0 if defined $refresh or defined $domain; $show_warning = 1; $read_funds_info_from_file = 1; } } &get_funds_info_from_file if $read_funds_info_from_file; my $overwrite_html = 1; unless (defined $output) { $output_directory = $config_directory unless defined $output_directory; my $dir = catdir($output_directory, $year, $month); create_directory($dir); $output = catfile($dir, $day . ".html"); $overwrite_html = 0 if -e $output and -f _; } else { $output_directory = dirname($output); create_directory($output_directory); } sub create_directory { my $dir = shift; my $permission = shift || 0755; unless (-e $dir and -d _) { mkpath([ $dir ], 0, $permission) or die can_not_open_file_or_create_dir("创建", $dir, $!); } } unless ($overwrite_html == 0) { my $template=HTML::Template->new(filename=>"$config_directory/funds.tmpl"); my $f = DateTime::Format::W3CDTF->new; my $updated_time = $f->format_datetime(DateTime->now(time_zone =>"Asia/Shanghai")); # atom feed if (defined $domain) { my $atom_file = "$output_directory/atom.xml"; $XML::Atom::DefaultVersion = "1.0"; my $feed; my @entries; # load existing entries if (-e $atom_file) { $feed = XML::Atom::Feed->new($atom_file); @entries = $feed->entries; pop @entries if scalar @entries == 30; } # create new feed $feed = XML::Atom::Feed->new; $feed->title('开放式基金投资简报'); $feed->id("tag:$domain,2007:$urlpath/atom.xml"); $feed->language('zh-cn'); $feed->updated($updated_time); $feed->generator('XML::Atom'); my $author = XML::Atom::Person->new; $author->name('王亮'); $feed->author($author); my $atom_link = XML::Atom::Link->new; $atom_link->type('application/atom+xml'); $atom_link->rel('self'); $atom_link->href("http://$domain/$urlpath/atom.xml"); $feed->add_link($atom_link); my $feed_link = XML::Atom::Link->new; $feed_link->type('text/html'); $feed_link->rel('alternate'); $feed_link->href("http://$domain/$urlpath/"); $feed->add_link($feed_link); my $output_filename = join('/', $year, $month, $day) . ".html"; my $link = XML::Atom::Link->new; $link->type('text/html'); $link->rel('alternate'); $link->href("http://$domain/$urlpath/$output_filename"); my $entry = XML::Atom::Entry->new; Encode::_utf8_off($year); Encode::_utf8_off($month); Encode::_utf8_off($day); $entry->title("$year年$month月$day日投资简报"); $entry->add_link($link); $entry->published($updated_time); $entry->updated($updated_time); $entry->id("tag:$domain,2007:$urlpath/$output_filename"); $entry->summary("总收益为$total元。"); $feed->add_entry($entry); $feed->add_entry($_) foreach @entries; open ATOM, ">$atom_file" or die can_not_open_file_or_create_dir("打开", $atom_file, $!); print ATOM $feed->as_xml_utf8; close ATOM; $template->param(ATOMLINK=>"http://$domain/$urlpath/atom.xml"); } $template->param(YEAR=>$year); $template->param(MONTH=>$month); $template->param(DAY=>$day); $template->param(ATOM=>defined $domain); $template->param(INVESTMENT_INFO=>\@user_fund_item); $template->param(TOTAL=>$total); $template->param(TOTAL_POSITIVE=>$total > 0); my $total2 = $total; for my $f (@user_fund_item) { $total2 += $f->{INVESTMENT}; } $template->param(TOTAL2=>$total2); $template->param(SHOW_WARNING => $show_warning); $template->param(URL => (defined $url) ? $url : $site_config[0]->{url}); $template->param(UPDATE_DATE => $updated_time); open HTML, ">$output" or die can_not_open_file_or_create_dir("打开", $output, $!); print HTML $template->output; close HTML; my $index = "$output_directory/index.html"; unlink $index if -e $index and -f _; symlink($output, $index); } system("firefox $output >/dev/null 2>&1 &") unless defined $domain; my $info_dir = catdir($config_directory, $year, $month); create_directory($info_dir); my $info_file_ymd = catfile($info_dir, $day . "." . basename($info_file)); if (! $read_funds_info_from_file and ! defined $discard_data and (! -e $info_file_ymd or $refresh)) { my $db_file = catfile($config_directory, "openfund.sqlite"); if (-e $db_file) { # save into database eval { my $schema = DB::Fundcalc->connect("dbi:SQLite:$db_file"); my $fund_table = $schema->resultset('Fund'); my $price_table = $schema->resultset('Price'); for my $key (keys %funds_info) { next if $key eq "date"; my $fi = $funds_info{$key}; my $code = $fi->{code}; my $name = $fi->{name}; my $f = $fund_table->find_or_new({ code => $code }); unless ($f->in_storage) { $f->code($code); $f->name($name); $f->insert; } my $f_id = $f->id; my $f_date = (exists $fi->{date}) ? $fi->{date} : $date; my $p = $price_table->find_or_new({ fund_id => $f_id, date => $f_date }); unless ($p->in_storage) { $p->fund_id($f_id); $p->net($fi->{net}); $p->total($fi->{total}); $p->diff($fi->{diff}); $p->date($f_date); $p->insert; } } }; if ($@) { print STDERR "警告:数据未能保存到数据库中。$@\n"; } } # save into file for my $key (keys %funds_info) { next if $key eq "date"; my $name = $funds_info{$key}->{name}; $funds_info{$key}->{name} = encode("utf8", $name); } my $info = new Config::General(\%funds_info); $info->save_file($info_file_ymd); unlink $info_file if -e $info_file and -f _; symlink($info_file_ymd, $info_file); } sub can_not_open_file_or_create_dir { my ($operation, $name, $message) = @_; "无法$operation$name:$message\n"; } sub get_funds_info_from_file { my $info = new Config::General($info_file); %funds_info = $info->getall; $date = $funds_info{date}; &calculate_investment; print STDERR "警告:使用本地数据\n" if defined $show_warning; } sub get_funds_info_from_web { my ($site) = @_; print STDERR "正在从$site->{url}提取基金净值数据……\n"; my $browser = new LWP::UserAgent; $browser->env_proxy; my $response = $browser->get($site->{url}); unless ($response->is_success()) { print STDERR "错误:无法从$site->{url}提取基金净值数据。原因:", $response->status_line(), "\n"; return 0; } $funds_info{date} = $date; # get funds info from HTML Table my $te = HTML::TableExtract->new(depth => $site->{table_depth}, count => $site->{table_count}); $te->parse($response->decoded_content); # should only match one foreach my $table ($te->tables) { my $funds_num = 0; my $row_num = 0; foreach my $row ($table->rows) { ++$row_num; # skip table header next if $row_num <= $site->{skip_rows}; # bad table. if (scalar @$row < $site->{diff_column}) { print STDERR "网页数据残缺,略过\n"; return 0; } ++$funds_num; my %fund_info = ( code => $row->[$site->{code_column}], name => $row->[$site->{name_column}], net => $row->[$site->{net_column}], total=> $row->[$site->{total_column}], diff => $row->[$site->{diff_column}], ); if ($fund_info{net} =~ /\d/ and $fund_info{total} =~ /\d/) { if ($fund_info{net} =~ /(\d\d)-(\d\d)$/) { my ($fund_month, $fund_day) = ($1, $2); $fund_info{date} = "$year-$fund_month-$fund_day" unless $fund_month eq $month and $fund_day eq $day; } elsif ($site->{date_column} >= 0 and $row->[$site->{date_column}] ne "$year$month$day") { $fund_info{date} = $row->[$site->{date_column}]; } $fund_info{diff} =~ s/^(.*)%.*$/$1/ if $fund_info{diff} =~ /%/; $fund_info{net} =~ s/\d\d-\d\d$//; $fund_info{total} =~ s/\d\d-\d\d$//; for my $key (keys %fund_info) { $fund_info{$key} =~ s/^\s+//; $fund_info{$key} =~ s/\s+$//; } $funds_info{$fund_info{code}} = \%fund_info } } print STDERR "提取基金净值数据$funds_num项\n"; &calculate_investment; return scalar @user_fund_item; } } sub calculate_investment { for my $fund (keys %{$user_funds{fund}}) { if (exists $funds_info{$fund}) { if (exists $funds_info{$fund}->{date}) { print STDERR $funds_info{$fund}->{code}, "日期为", $funds_info{$fund}->{date}, "\n"; return; } my $income = sprintf("%.2f", $funds_info{$fund}->{net} * $user_funds{fund}->{$fund}->{number} - $user_funds{fund}->{$fund}->{investment}); $total += $income; my $name = $funds_info{$fund}->{name}; # print "encoding $name.\n"; # Using `encode' to remove warning during printing. $name = encode("utf8", $funds_info{$fund}->{name}) unless ($read_funds_info_from_file); my $diff = $funds_info{$fund}->{diff}; my %f = ( CODE => $fund, NAME => $name, NET => $funds_info{$fund}->{net}, BUY => $user_funds{fund}->{$fund}->{net}, DIFF => $diff, NUMBER =>$user_funds{fund}->{$fund}->{number}, INCOME => $income, INCOME_POSITIVE => $income > 0, DIFF_POSITIVE => $diff !~ m/-/, INVESTMENT => $user_funds{fund}->{$fund}->{investment}, ); push @user_fund_item, \%f; } else { print STDERR "没有找到代码为$fund的基金净值\n"; } } } sub generate_example_conf { open EXAMPLE, ">$config_directory/example.conf" or die can_not_open_file_or_create_dir("打开", "$config_directory/example.conf", $!); print EXAMPLE< # 买入净值 net = 1.00 # 持有数量 number = 1000 # 投入资本 investment = 1002 CONF close EXAMPLE; } =head1 名称 funds.pl =head1 摘要 本程序从专业网站得到所有基金净值,然后根据用户提供的信息列出所有用户投 资的资金信息,并统计出每只基金的收益,以及总收益。结果输出为网页,并用 firefox打开。 =head1 版本 1.3.4 Subversion地址为http://www.wanglianghome.org/svn/fundcalc/ 项目介绍参见http://wanglianghome.org/cgi-bin/wiki.cgi/中国开放式基金投资跟踪工具 =head1 选项 =over 8 =item B<--help>或B<-h> 显示帮助信息 =item B<--refresh>或B<-r> 忽略本地缓存的基金净值信息,重新从网上获取 =item B<--firsturl=NUM> 选择最先查询的网站。缺省查询顺序是先(0)http://www.jjjz.net/再 (1)http://fund.jrj.com.cn/openfund/everydayjz.asp。使用选项--firsturl=1可 以先查询金融界网站 =item B<--output>或B<-o> 指定输出文件位置 =item B<--discard-date>或B<-n> 从网站上取回的基金净值数据将不会保存到本地 =item B<--domain> 网站域名,投资结果发布到该网站 =item B<--urlpath> 域名之后的路径 =item B<--username>或B<-u> 多用户支持,并用来查找配置文件及确定发布路径。如用户名为me,则相应的配置文件为me.config,发布路径为fundcalc/me/ =item B<--config-directory>或B<-c> 指定配置文件所在目录 =item B<--output-directory>或B<-d> 输出目录,当使用--output选项时,使用该选项指定的路径无效,而采用指定文件所在的路径 =back =head1 使用 下载funds.pl和funds.tmpl到本地,第一次运行时会提示用户并没有提供任何基金投资组合信息,建议用户按照例子编写,并存在.funds目录里。 $ perl funds.pl 没有找到中国开放式基金投资组合数据 请参照文件/path/example.conf里的格式,将投资组合保存在文件/path/config里 example.conf的内容如下: # example for config # 买入净值 net = 1.00 # 持有数量 number = 1000 # 投入资本 investment = 1002 其中“000001”是基金代码,而“买入净值”目前没有任何用处,但会显示在结果网页上。config文件可以含有多种资金投资信息,但同一种基金只能有一项,因此无法记录用户多次投资的信息,但可以将多次投资同一只基金的信息综合到一项里面,计算结果是一样的。后续版本将提供处理同种基金多项信息的功能。 如果用户系统定义了HOME环境变量,那么.funds目录会建在用户的home目录下,否则建在当前目录下。修改好config文件后将funds.tmpl也放在同一个目录下,缺省情况下,统计结果也会存在该目录下。因为基金信息每天更新一次,为了节省带宽,提高响应速度,该工具会在当天第一次运行时将所有基金信息存在.funds目录下,以后无需每次从网上提取,当然,无论何时运行,一天之内的结果总是一样的,即前一天投资的收益情况。 填好基金投资信息后,以后只需每日运行: $ perl funds.pl 即可。 =head1 cron job 该程序可以作为cron job定期运行,并将结果发布到网站上,同时提供Atom Feed。方法如下: 在您的web服务器上运行 $ crontab -e 然后编辑文件如下: 0 8 * * * perl /path/to/script/funds.pl -c /path/to/config/ -d /path/to/output --domain=www.yourdomain.com 2>&1 >/path/to/log 缺省的urlpath值为fundcalc,所以Atom Feed的网址为 http://www.yourdomain.com/fundcalc/atom.xml, 订阅它就可以及时了解投资状况,无需手工每天运行。man 5 crontab查看crontab文件的编写帮助。 为防止一些意想不到的问题,可以多添加几项,然而由于基金信息一天公布一次,所以太多了也没用。建议添加两项,注意:后面的cron job最好使用--refresh选项。 =head1 作者 王亮 =head1 版权 本工具以GPL协议(第二版本,或以上)发布,对该程序的运行结果作者不做任何 担保,任何结果由使用该程序的用户自己承担。