- The added line is THIS COLOR.
- The deleted line is THIS COLOR.
hatena_graph.tcl
package require http
http::config -useragent {tclbot (http://reddog.s35.xrea.com/wiki/Graphviz%A4%CB%A4%E8%A4%EB%A4%CF%A4%C6%A4%CA%A5%AD%A1%BC%A5%EF%A1%BC%A5%C9%B4%D6%A5%EA%A5%F3%A5%AF%A4%CE%B2%C4%BB%EB%B2%BD.html)}
set sleep 1000
array set arcs {}
array set data {}
set datafile "data.dat"
if {![catch {open $datafile r} fp]} {
array set data [read $fp]
close $fp
}
rename exit tcl_exit
proc exit {{n 0}} {
set fp [open $::datafile w]
foreach key [array names ::data] {
puts $fp "[list $key] [list $::data($key)]"
}
close $fp
tcl_exit $n
}
proc encode {str} {
binary scan [encoding convertto euc-jp $str] c* enc
set r {}
foreach c $enc {
append r [format {%%%x} [expr {$c & 0xff}]]
}
return $r
}
proc get_keywordlist {keyword} {
if {[array names ::data $keyword] ne {}} {
return $::data($keyword)
}
after $::sleep
set url "http://d.hatena.ne.jp/keywordword/[encode $keyword]#keywordword"
set token [::http::geturl $url]
set html [http::data $token]
set html [regexp -inline -- {<a name="keywordword">.*</a>} $html]
set q {<li>.*<a href=.*>(.*)</a></li>}
set r [list]
foreach {a b} [regexp -all -line -inline -- $q $html] {
if {$b ne $keyword} {lappend r $b}
}
set ::data($keyword) [lsort -uniq $r]
return $::data($keyword)
}
proc remove_to {klist} {
global arcs
set r {}
foreach key $klist {
if {[array names arcs $key] eq {}} {
lappend r $key
}
}
return $r
}
proc make_dot {} {
global arcs
puts "digraph \"Hatena Keywords\" {"
puts "graph \[overlap=true, splines=false\]"
puts "node \[fontname=\"msmincho.ttc\", fontsize=8, shape=\"box\"\, width=0, height=0];"
foreach {to from} [array get arcs] {
if {$from eq {}} continue
puts -nonewline "{"
foreach word $from {
puts -nonewline "\"$word\" "
}
puts -nonewline "}"
puts " -> \"$to\""
}
puts "}"
}
proc main {maxgen keylist} {
global arcs
set gen 0
set gen_eve $keylist
set gen_odd {}
for {set gen 0} {$gen < $maxgen} {incr gen} {
if {$gen % 2 == 0} {
set tolist $gen_eve
} else {
set tolist $gen_odd
}
set nextto {}
foreach to $tolist {
puts stderr "gen=$gen, to=$to, ref=$nextto"
set klist [get_keywordlist $to]
set arcs($to) $klist
set nextto [concat $nextto $klist]
}
set nextto [remove_to [lsort -uniq $nextto]]
if {$gen % 2 == 0} {
set gen_odd $nextto
} else {
set gen_eve $nextto
}
}
make_dot
}
main [lindex $argv 0] [lrange $argv 1 end]
exit
使い方
tclsh hatena_graph.tcl num keyword
numはリンクの深さで、keywordは調べる最初のキーワード。~
numは深すぎると全く終わらない。2がせいぜいだし、3にすると~
複雑すぎてdotがレンダリングするのが不可能なレベルに・・・。~
いくつかのキーワードはリンクを辿らないようにしたほうがいいような気がする。~
たとえば、リスト::hogeみたいなまとめただけのリストとか、年代とか。
----
[[CategoryTclTk]]
HTML convert time: 0.002 sec.