OUTPUT BUFFER:
#package require http #package require dom::tcl source ../src/ordali_web.tcl source ../src/ordali_services.tcl package require http package require tdom proc eSummaryREST_old {db Lid} { set url "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi" # eSummary with db=protein only accepts GI numbers # # proceed by 10 000 ids batches set Lres [list] set i 0 set lg [llength $Lid] while {$i < $lg} { set Ln [lrange $Lid $i $i+9999] set query [::http::formatQuery \ db $db \ id [join $Lid ,] \ retmax 10000] set ddb [HttpCopy $url "" $query] #set ddb [eSummaryProcessXML $ddb] incr i 10000 } return $ddb } set Lnm [list 256819652 228471929 188994130] #puts "[eSummaryREST protein $Lnm]" set Lnm [list XM_001241671.1 NC_008609.1] set Lnm NC_008609 set Lnm XM_001241671.1 set Lnm XM_001267187.1 proc handle_start {name attributes args} { puts "start $name" set ::CurrentTag $name };# handle_start proc handle_end {name args} { set ::CurrentTag {} };# handle_end proc ExtraitRNA {Lxml refseq prot} { set Rdoc [::dom::tcl::parse $Lxml] set rna "" ; set found 0 set pid "" set Lkey [$Rdoc selectNode //key] foreach Nk $Lkey { if {[$Nk selectNode string(.)] ne "CDS"} { continue } set nd $Nk ; set ns "" ; set is 0 while {[set ns [$nd nextSibling]] ne $nd} { puts " $is [$nd nodeName]" if {[$nd nodeName] eq "location"} { set loc [$nd selectNode string(.)] } if {[$nd nodeName] eq "quals"} { puts "break $nd" break } set nd $ns incr is } set nqf [$nd firstChild] puts "loc $loc | [$nqf nodeValue]" foreach nd [$nqf childNodes] { puts "[$nd nodeName] [$nd selectNode string(.)]" if {[$nd nodeName] eq "name" && [$nd selectNode string(.)] eq "protein_id"} { set nv [$nd nextSibling] set pid [$nv selectNode string(.)] break } } if {$pid ne "" && [regexp -nocase $prot $pid]} { set found 1 break } } if {$found} { set Nrna [$Rdoc selectNodes //sequence] if {$Nrna ne ""} { set seq [$Nrna selectNode string(.)] set rna [ExtractRNAFromSeqGivenLoc $seq $loc] } else { set rna "" } } $doc delete return $rna } #puts "rna [ExtraitRNA $xml XM_001241671.1 toto]" ## Parse the xml data... #xml parsechannel $fd #xml parse $xml ## Done! Free parser and close file... #close $fd #xml free proc parse {xml target} { puts "start parsing" set ::S {} set ::pt(inKey) 0 set ::pt(target) $target set ::pt(readV) 0 set ::pt(seq) "" set p [expat \ -elementstartcommand el \ -characterdatacommand ch \ -elementendcommand ee ] if [catch {$p parse $xml} res] { puts "Error: $res" } } #---- Callbacks for start, end, character data proc el {name atts} { lappend ::S $name ;# push if {$name eq "GBFeature_key"} {set ::pt(inKey) 1} } proc ee {name} { set ::S [lrange $::S 0 end-1] ;# pop if {$name eq "bar"} { puts $g(grill)=$g(baz) } } proc ch {str} { set type [lindex $::S end] if {! $::pt(inKey)} {return} switch $type { GBFeature_key { if {$str ne "CDS"} { set ::pt(inKey) 0 } } GBFeature_location { set ::pt(currLoc) $str } GBQualifier_name { if {$str eq "protein_id"} { set ::pt(readV) 1 } } GBQualifier_value { if {$::pt(readV)} { if {[regexp -nocase "$::pt(target)" $str]} { set ::pt(id) $str set ::pt(readV) 0 set ::pt(loc) $::pt(currLoc) } } } GBSeq_sequence { set ::pt(seq) $str } } } proc eFetchREST {db Lid what} { set url "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" switch $db { "protein" { set rettype gp set retmode xml } "nucleotide" { set rettype fasta_cds_na set retmode text } } set query [::http::formatQuery \ db $db \ id [join $Lid ,] \ rettype $rettype \ retmode $retmode] set ddb [HttpCopy $url "" $query] return $ddb } proc eLinkREST {dbfrom dbto Lid {cmd ""}} { set url "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi" set Lres [list] set i 0 set lg [llength $Lid] while {$i < $lg} { set Ln [lrange $Lid $i $i+9999] if {$cmd ne ""} { set Tcmd cmd } else { set Tcmd "" } set query [::http::formatQuery \ dbfrom $dbfrom \ db $dbto \ id [join $Ln ,] \ $Tcmd $cmd] set xml [HttpCopy $url "" $query] puts "$query" puts "$xml" lappend Lres {*}[eLinkProcess $xml ngb] incr i 10000 } puts "Lres $Lres" set ret [list] if {$Lres != {} } { set aT [eSummaryREST $dbto $Lres] array set T $aT foreach {a v} [array get T "*,refseq"] { lappend ret $v } } return $ret } #set Ltx [list 313627 314230 699218] set nm 135102 #puts "[eSummaryREST protein $nm]" #exit set nm [list 257790368 226291546 291176984 150866285] set nm 135102 #puts "[eLinkREST protein protein $nm ncheck]" puts "" puts "[eLinkREST protein protein $nm neighbor_score]" exit #set nm XP_455053.1 set nm NC_001148.4 set nm XP_001241672 puts "[eFetchREST protein $nm cds]" exit set gi 169773541 set acc NC_012860.1 set res [eFetchREST nucleotide $acc] puts "$res" exit puts "Lnuc $Lnuc" exit set Lnm NC_001148.4 set Lnm xp_001241672 set xml [eFetchREST protein $Lnm] puts "$xml" parse $xml NP_015221.1 parray pt