[程式] 自動依照 regex pattern 標記文章
鑑於廣告文章氾濫, 有些發信者的 news server 又不管,弟寫了個小程式
來標記特定 pattern 的文章,這裡不多做說明,直接給大家使用範例:
****註**** 資料損壞恕不負責,使用前請多做測試~~~
***感謝*** autrijus 幫忙解答 unpack/pack encoding 的疑慮
*強烈建議* 在 perl5.8 之後的版本跑,在 perl 5.6/5.005 不在保固內哦~~
*修改(customize)要點* 請依照您的 bbs 版本所定義的 .DIR struct 格式做修改
$packstring 以及 $packsize 還有要 tag 的 constant
這隻程式可以在 maple2/sob 系列下直接執行,不過最好是先備份 .DIR 做測試
將分割線後的文字剪下另存為 tag_post.pl
執行範例:
看板名稱: InstallBBS
1. 將文章作者字串格式符合 ^av\w+av\.$ pattern 的標記為 'D'
^av\w+av\.$ 的意義請查 perlre
% ./tag_post.pl InstallBBS -a '^av\w+av\.$'
2. 將文章標題符合 手機 或是 圖鈴 的標記 'D'
% ./tag_post.pl InstallBBS -t '手機|圖鈴'
3. 將文章標題最後是連續的數字的 (這是最近最討厭的文章 -_-) 標記
% ./tag_post.pl InstallBBS -t '\d+\s*$'
有問題請寄信聯絡
clsung <在> dragon2 <點> net
===============================分割線==================================
#!/usr/bin/perl -w
# Target: To tag posts which matched a given pattern
# Version: 0.8
# Author: clsung@dragon2.net
# Create: Wed Feb 4 23:20:06 CST 2004
use strict;
use encoding 'big5',STDIN => 'big5',STDOUT => 'big5',STDERR => 'big5' ;
use Fcntl qw(:DEFAULT :flock :seek);
use Encode qw(decode);
# struct fileheader
# {
# char filename[FNLEN]; /* M.9876543210.A */
# char savemode; /* file save mode */
# char owner[IDLEN + 2]; /* uid[.] */
# char date[6]; /* [02/02] or space(5) */
# char title[TTLEN + 1];
# uschar filemode; /* must be last field @ boards.c */
# };
# typedef struct fileheader fileheader;
# FNLEN = 33
# IDLEN = 12
# TTLEN = 72
#define FILE_LOCAL 0x1 /* local saved */
#define FILE_READ 0x1 /* already read : mail only */
#define FILE_LOCAL 0x1 /* local saved */
#define FILE_READ 0x1 /* already read : mail only */
#define FILE_MARKED 0x2 /* opus: 0x8 */
#define FILE_DIGEST 0x4 /* digest */
#define FILE_TAGED 0x8 /* taged */
#define FILE_REPLYOK 0x10 /* replyok*/
#define FILE_REFUSE 0x20 /* refuse*/
my $packstring = "Z33ZZ14Z6Z73C";
my $packsize = "128";
my @packlist = qw/filename savemode owner date title filemode/;
my $debug = 0;
die "usage: $0 [board] [-a|-t] [pattern]\n" if $#ARGV < 2;
@ARGV = map decode(big5 => $_), @ARGV;
my $target_board = $ARGV[0];
my $author_title = $ARGV[1];
my $target_pattern = $ARGV[2];
my $target_file = "/home/bbs/boards/$target_board/.DIR";
die "board DIR is not exists!!!\n" unless -e $target_file;
$author_title =~ s/^-*//g;
print "Matching Target: Author\n" if ($author_title =~ /^[Aa]/);
print "Matching Target: Title\n" if ($author_title !~ /^[Aa]/);
my %matched_fhdr;
getmatched_data($target_file);
print "number of matched: ".(keys %matched_fhdr)."\n";
tag_record();
sub tag_record {
foreach my $key (keys %matched_fhdr) {
my $id = getindex($target_file,$key);
post_tag($id,$matched_fhdr{$key},$target_file);
}
}
sub substitute_record {
my ($fpath, $rptr, $id) = @_;
#print $rptr." size is ".length($rptr)."\n" if $debug;
$rptr = _pack($packstring,@{$rptr}{@packlist});
my $size = length($rptr);
sysopen (FH,"$fpath", O_WRONLY | O_CREAT, 0644) or return -1;
flock(FH, LOCK_EX);
sysseek(FH, $size * ($id - 1), SEEK_SET);
syswrite (FH, $rptr, $size) or die $!;
flock(FH, LOCK_UN);
close FH;
#print $rptr." size is ".length($rptr)."\n" if $debug;
return 0;
}
sub post_tag {
my ($ent, $fhdr, $direct) = @_;
$fhdr->{filemode} |= 0x8 if ($fhdr->{filemode} & 0x2) == 0;
#print $ent." ".$fhdr->{title}." is tagged\n" if ($fhdr->{filemode} & 0x8);
return substitute_record($direct, $fhdr, $ent);
};
sub _unpack {
return map Encode::decode('big5' => $_), CORE::unpack($_[0], $_[1]);
}
sub _pack {
return CORE::pack($_[0], map Encode::encode('big5' => $_), @_[1..$#_]);
}
sub getmatched_data {
my ($fpath) = @_;
open (FH,"$fpath") or die $!;
my $fhdr;
my $size = $packsize;
my $now = 0;
while ((sysread(FH, $fhdr, $size) == $size)) {
$now++;
my $DIR;
@{$DIR}{@packlist} = _unpack($packstring, $fhdr);
if ($author_title =~ /^[Aa]/) {
next unless $DIR->{owner} =~ /$target_pattern/;
} else { # $author_title =~ /^Tt/) {
next unless $DIR->{title} =~ /$target_pattern/;
}
$matched_fhdr{$DIR->{filename}} = $DIR if (($DIR->{filemode} & 0x2) == 0) and (($DIR->{filemode} & 0x8) == 0);
print $DIR->{title}. "(".$DIR->{filename}.") matched!!\n" if $debug;
}
close FH;
return 0;
}
sub getindex {
my ($fpath, $fname) = @_;
open (FH,"$fpath") or die $!;
my $fhdr;
my $size = $packsize;
my $now = 0;
while ((sysread(FH, $fhdr, $size) == $size)) {
$now++;
my $DIR;
@{$DIR}{@packlist} = _unpack($packstring, $fhdr);
if ($DIR->{filename} eq $fname) {
close FH;
return $now;
}
}
close FH;
return 0;
}
======================================結束==============================
--
◆ 從 localhost 來的水榭訪客
--
自由心靈的天堂§水榭(SungSung.Dragon2.Net)§