[Slashdotjp-dev 503] CVS update: slashjp/tagboxes/Top

Back to archive index

Tatsuki SUGIURA sugi****@users*****
2006年 7月 12日 (水) 20:42:08 JST


Index: slashjp/tagboxes/Top/Makefile.PL
diff -u /dev/null slashjp/tagboxes/Top/Makefile.PL:1.1
--- /dev/null	Wed Jul 12 20:42:08 2006
+++ slashjp/tagboxes/Top/Makefile.PL	Wed Jul 12 20:42:08 2006
@@ -0,0 +1,9 @@
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+# $Id: Makefile.PL,v 1.1 2006/07/12 11:42:08 sugi Exp $
+WriteMakefile(
+    'NAME'	=> 'Slash::Tagbox::Top',
+    'VERSION_FROM' => 'Top.pm', # finds $VERSION
+    'PM'	=> { 'Top.pm' =>   '$(INST_LIBDIR)/Top.pm' },
+);
Index: slashjp/tagboxes/Top/TAGBOX
diff -u /dev/null slashjp/tagboxes/Top/TAGBOX:1.1
--- /dev/null	Wed Jul 12 20:42:08 2006
+++ slashjp/tagboxes/Top/TAGBOX	Wed Jul 12 20:42:08 2006
@@ -0,0 +1,4 @@
+# $Id: TAGBOX,v 1.1 2006/07/12 11:42:08 sugi Exp $
+name=Top
+description=Update the top n tags on a globj
+mysql_dump=mysql_dump.sql
Index: slashjp/tagboxes/Top/Top.pm
diff -u /dev/null slashjp/tagboxes/Top/Top.pm:1.1
--- /dev/null	Wed Jul 12 20:42:08 2006
+++ slashjp/tagboxes/Top/Top.pm	Wed Jul 12 20:42:08 2006
@@ -0,0 +1,232 @@
+#!/usr/bin/perl -w
+# This code is a part of Slash, and is released under the GPL.
+# Copyright 1997-2005 by Open Source Technology Group. See README
+# and COPYING for more information, or see http://slashcode.com/.
+# $Id: Top.pm,v 1.1 2006/07/12 11:42:08 sugi Exp $
+
+package Slash::Tagbox::Top;
+
+=head1 NAME
+
+Slash::Tagbox::Top - update the top n tags on a globj
+
+=head1 SYNOPSIS
+
+	my $tagbox_tcu = getObject("Slash::Tagbox::Top");
+	my $feederlog_ar = $tagbox_tcu->feed_newtags($users_ar);
+	$tagbox_tcu->run($affected_globjid);
+
+=cut
+
+use strict;
+
+use Slash;
+use Slash::DB;
+use Slash::Utility::Environment;
+use Slash::Tagbox;
+
+use Data::Dumper;
+
+use vars qw( $VERSION );
+$VERSION = ' $Revision: 1.1 $ ' =~ /\$Revision:\s+([^\s]+)/;
+
+use base 'Slash::DB::Utility';	# first for object init stuff, but really
+				# needs to be second!  figure it out. -- pudge
+use base 'Slash::DB::MySQL';
+
+sub new {
+	my($class, $user) = @_;
+
+	my $plugin = getCurrentStatic('plugin');
+	return undef if !$plugin->{Tags};
+	my($tagbox_name) = $class =~ /(\w+)$/;
+	# (this code is for once Install.pm actually installs tagboxes and getSlashConf loads this constant)
+	# my $tagbox = getCurrentStatic('tagbox');
+	# return undef if !$tagbox->{$tagbox_name};
+
+	# Note that getTagboxes() would call back to this new() function
+	# if the tagbox objects have not yet been created -- but the
+	# no_objects option prevents that.  See getTagboxes() for details.
+	my %self_hash = %{ getObject('Slash::Tagbox')->getTagboxes($tagbox_name, undef, { no_objects => 1 }) };
+	my $self = \%self_hash;
+	return undef if !$self || !keys %$self;
+
+	bless($self, $class);
+	$self->{virtual_user} = $user;
+	$self->sqlConnect();
+
+	return $self;
+}
+
+sub feed_newtags {
+	my($self, $tags_ar) = @_;
+	my $constants = getCurrentStatic();
+if (scalar(@$tags_ar) < 4) {
+print STDERR "Slash::Tagbox::Top->feed_newtags called for tags '" . join(' ', map { $_->{tagid} } @$tags_ar) . "'\n";
+} else {
+print STDERR "Slash::Tagbox::Top->feed_newtags called for " . scalar(@$tags_ar) . " tags " . $tags_ar->[0]{tagid} . " ... " . $tags_ar->[-1]{tagid} . "\n";
+}
+
+	my $ret_ar = [ ];
+	for my $tag_hr (@$tags_ar) {
+		# These two values are the same whether this is "really"
+		# newtags or deactivatedtags.
+		my $ret_hr = {
+			affected_id =>	$tag_hr->{globjid},
+			importance =>	1,
+		};
+		# We identify this little chunk of importance by either
+		# tagid or tdid depending on whether the source data had
+		# the tdid field (which tells us whether feed_newtags was
+		# "really" called via feed_deactivatedtags).
+		if ($tag_hr->{tdid})	{ $ret_hr->{tdid}  = $tag_hr->{tdid}  }
+		else			{ $ret_hr->{tagid} = $tag_hr->{tagid} }
+		push @$ret_ar, $ret_hr;
+	}
+
+	return $ret_ar;
+}
+
+sub feed_deactivatedtags {
+	my($self, $tags_ar) = @_;
+print STDERR "Slash::Tagbox::Top->feed_deactivatedtags called: tags_ar='" . join(' ', map { $_->{tagid} } @$tags_ar) .  "'\n";
+	my $ret_ar = $self->feed_newtags($tags_ar);
+print STDERR "Slash::Tagbox::Top->feed_deactivatedtags returning " . scalar(@$ret_ar) . "\n";
+	return $ret_ar;
+}
+
+sub feed_userchanges {
+	my($self, $users_ar) = @_;
+	my $constants = getCurrentStatic();
+	my $tagsdb = getObject('Slash::Tags');
+print STDERR "Slash::Tagbox::Top->feed_userchanges called: users_ar='" . join(' ', map { $_->{tuid} } @$users_ar) .  "'\n";
+
+	my %uid_change_sum = ( );
+	my %globj_change = ( );
+	for my $hr (@$users_ar) {
+		next unless $hr->{user_key} eq 'tag_clout';
+		$globj_change{$hr->{globjid}}{max_tuid} ||= $hr->{tuid};
+		$globj_change{$hr->{globjid}}{max_tuid} = $hr->{tuid}
+			if $globj_change{$hr->{globjid}}{max_tuid} < $hr->{tuid};
+		$uid_change_sum{$hr->{uid}} ||= 0;
+		$uid_change_sum{$hr->{uid}} += abs(($hr->{value_old} || 1) - $hr->{value_new});
+	}
+	for my $uid (keys %uid_change_sum) {
+		my $tags_ar = $tagsdb->getAllTagsFromUser($uid);
+		for my $tag_hr (@$tags_ar) {
+			$globj_change{$tag_hr->{globj}}{sum} ||= 0;
+			$globj_change{$tag_hr->{globj}}{sum} += $uid_change_sum{$uid};
+		}
+	}
+	my $ret_ar = [ ];
+	for my $globjid (sort { $a <=> $b } keys %globj_change) {
+		push @$ret_ar, {
+			tuid =>		$globj_change{$globjid}{max_tuid},
+			affected_id =>	$globjid,
+			importance =>	$globj_change{$globjid}{sum},
+		};
+	}
+
+print STDERR "Slash::Tagbox::Top->feed_userchanges returning " . scalar(@$ret_ar) . "\n";
+	return $ret_ar;
+}
+
+sub run {
+	my($self, $affected_id) = @_;
+	my $constants = getCurrentStatic();
+	my $tagsdb = getObject('Slash::Tags');
+	my $tags_reader = getObject('Slash::Tags', { db_type => 'reader' });
+	my $tagboxdb = getObject('Slash::Tagbox');
+
+	my($type, $target_id) = $tagsdb->getGlobjTarget($affected_id);
+	return unless $type eq 'stories' || $type eq 'urls';
+
+	# Get the list of tags applied to this object.  If we're doing
+	# URL popularity, that's only the tags within the past few days.
+	# For stories, it's all tags.
+
+	my $options = { };
+	if ($type eq 'urls') {
+		my $days_back = $constants->{bookmark_popular_days} || 3;
+		$options->{days_back} = $days_back;
+	}
+	my $tag_ar = $tagsdb->getTagsByGlobjid($affected_id, $options);
+	$tagsdb->addCloutsToTagArrayref($tag_ar);
+print STDERR "Slash::Tagbox::Top->run called for $affected_id, " . scalar(@$tag_ar) . " tags\n";
+
+	# Now set the data accordingly.  For a story, set the
+	# tags_top field to the space-separated list of the
+	# top 5 scoring tags.
+
+	if ($type eq 'stories') {
+
+		# Using the total_clout calculated in addCloutsToTagArrayref(),
+		# and counting opposite tags against ordinary tags, calculate
+		# %scores, the hash of tagnames and their scores.  Note that
+		# due to the presence of opposite tags, there may be many
+		# entries in %scores with negative values.
+
+		my %scores = ( );
+		for my $tag (@$tag_ar) {
+			$scores{$tag->{tagname}} += $tag->{total_clout};
+		}
+
+		my @opposite_tagnames =
+			map { $tags_reader->getOppositeTagname($_) }
+			grep { $_ !~ /^!/ && $scores{$_} > 0 }
+			keys %scores;
+		for my $opp (@opposite_tagnames) {
+			next unless $scores{$opp};
+			# Both $opp and its opposite exist in %scores.  Subtract
+			# $opp's score from its opposite and vice versa.
+			my $orig = $tags_reader->getOppositeTagname($opp);
+			my $orig_score = $scores{$orig};
+			$scores{$orig} -= $scores{$opp};
+			$scores{$opp} -= $orig_score;
+		}
+
+		my @top = sort {
+			$scores{$b} <=> $scores{$a}
+			||
+			$a cmp $b
+		} keys %scores;
+
+		# Eliminate tagnames below the minimum score required, and
+		# those that didn't make it to the top 5
+		# XXX the "5" is hardcoded currently, should be a var
+		my $minscore = $constants->{"tagbox_top_minscore_stories"};
+		@top = grep { $scores{$_} >= $minscore } @top;
+		$#top = 4 if $#top > 4;
+
+		$self->setStory($target_id, { tags_top => join(' ', @top) });
+print STDERR "Slash::Tagbox::Top->run $affected_id with " . scalar(@$tag_ar) . " tags, setStory $target_id to '@top'\n";
+
+	} elsif ($type eq 'urls') {
+
+		# For a URL, calculate a numeric popularity score based
+		# on (most of) its tags and store that in the popularity
+		# field.
+
+		my %tags_pos = map { $_, 1 } split(/\|/, $constants->{tagbox_top_urls_tags_pos} || "");
+		my %tags_neg = map { $_, 1 } split(/\|/, $constants->{tagbox_top_urls_tags_neg} || "");
+
+		my $pop = 0;
+		for my $tag (@$tag_ar) {
+			my $tagname = $tag->{tagname};
+			my $is_pos = $tags_pos{$tagname};
+			my $is_neg = $tags_neg{$tagname};
+			my $mult = 1;
+			$mult =  1.5 if $is_pos && !$is_neg;
+			$mult = -1.0 if $is_neg && !$is_pos;
+			$mult =  0   if $is_pos &&  $is_neg;
+			$pop += $mult * $tag->{total_clout};
+		}
+
+		$self->setUrl($target_id, { popularity => $pop });
+
+	}
+
+}
+
+1;
+
Index: slashjp/tagboxes/Top/mysql_dump.sql
diff -u /dev/null slashjp/tagboxes/Top/mysql_dump.sql:1.1
--- /dev/null	Wed Jul 12 20:42:08 2006
+++ slashjp/tagboxes/Top/mysql_dump.sql	Wed Jul 12 20:42:08 2006
@@ -0,0 +1,7 @@
+# $Id: mysql_dump.sql,v 1.1 2006/07/12 11:42:08 sugi Exp $
+INSERT INTO tagboxes (tbid, name, affected_type, weight, last_run_completed, last_tagid_logged, last_tdid_logged, last_tuid_logged) VALUES (NULL, 'Top', 'globj', 1, '2000-01-01 00:00:00', 0, 0, 0);
+INSERT INTO tagbox_userkeyregexes VALUES ('Top', '^tag_clout$');
+
+INSERT IGNORE INTO vars (name, value, description) VALUES ('tagbox_top_minscore_stories', '2', 'Minimum score a tag must have to make it into the top tags for a story');
+INSERT IGNORE INTO vars (name, value, description) VALUES ('tagbox_top_minscore_urls', '2', 'Minimum score a tag must have to make it into the top tags for a URL');
+


Slashdotjp-dev メーリングリストの案内
Back to archive index