@@ -35,7 +35,7 @@ class WormbaseParser extends Bio2RDFizer {
3535 function __construct ($ argv ) {
3636 parent ::__construct ($ argv , "wormbase " );
3737 parent ::addParameter ('files ' , true , 'all|geneIDs|functional_descriptions|gene_associations|gene_interactions|phenotype_associations ' ,'all ' ,'files to process ' );
38- parent ::addParameter ('release ' , false , null , 'WS243 ' , 'Release version of WormBase ' );
38+ parent ::addParameter ('release ' , false , null , 'current ' , 'Release version of WormBase ' );
3939 parent ::addParameter ('download_url ' , false , null ,'ftp://ftp.wormbase.org/pub/wormbase/ ' );
4040 parent ::initialize ();
4141 }//constructor
@@ -49,20 +49,21 @@ public function run()
4949 $ files = explode (", " ,parent ::getParameterValue ('files ' ));
5050 }
5151 $ release = parent ::getParameterValue ('release ' );
52+ $ releaseb = "WS247 " ;
5253 $ remote_files = array (
53- "geneIDs " => "species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758. " .parent :: getParameterValue ( ' release ' ) .".geneIDs.txt.gz " ,
54- "functional_descriptions " => "species/c_elegans/annotation/functional_descriptions/c_elegans.PRJNA13758. " .parent :: getParameterValue ( ' release ' ) .".functional_descriptions.txt.gz " ,
55- "gene_interactions " => "species/c_elegans/annotation/gene_interactions/c_elegans.PRJNA13758. " .parent :: getParameterValue ( ' release ' ) .".gene_interactions.txt.gz " ,
56- "gene_associations " => "releases/ " . $ release. " /ONTOLOGY/gene_association. " .parent :: getParameterValue ( ' release ' ) .".wb " ,
57- "phenotype_associations " => "releases/ " . $ release. " /ONTOLOGY/phenotype_association. " .parent :: getParameterValue ( ' release ' ) .".wb "
54+ "geneIDs " => "species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758. " .$ release .".geneIDs.txt.gz " ,
55+ "functional_descriptions " => "species/c_elegans/annotation/functional_descriptions/c_elegans.PRJNA13758. " .$ release .".functional_descriptions.txt.gz " ,
56+ "gene_interactions " => "species/c_elegans/annotation/gene_interactions/c_elegans.PRJNA13758. " .$ release .".gene_interactions.txt.gz " ,
57+ "gene_associations " => "releases/current-production- release/ONTOLOGY/gene_association. " .$ releaseb .".wb " ,
58+ "phenotype_associations " => "releases/current-production- release/ONTOLOGY/phenotype_association. " .$ releaseb .".wb "
5859 );
5960
6061 $ local_files = array (
6162 "geneIDs " => "wormbase. " .parent ::getParameterValue ('release ' ).".genes.txt.gz " ,
6263 "functional_descriptions " => "wormbase. " .parent ::getParameterValue ('release ' ).".functional_descriptions.txt.gz " ,
6364 "gene_interactions " => "wormbase. " .parent ::getParameterValue ('release ' ).".gene_interactions.txt.gz " ,
6465 "gene_associations " => "wormbase. " .parent ::getParameterValue ('release ' ).".gene_association.wb " ,
65- "phenotype_associations " => "wormbase. " .parent ::getParameterValue ('release ' )."phenotype_associations.wb "
66+ "phenotype_associations " => "wormbase. " .parent ::getParameterValue ('release ' ).". phenotype_associations.wb "
6667 );
6768
6869 $ idir = parent ::getParameterValue ('indir ' );
@@ -84,7 +85,6 @@ public function run()
8485 Utils::DownloadSingle ($ rfile , $ lfile );
8586 echo "done! " .PHP_EOL ;
8687 }
87-
8888 if (strstr ($ lfile , "gz " )){
8989 parent ::setReadFile ($ lfile , TRUE );
9090 } else {
@@ -190,10 +190,11 @@ function functional_descriptions()
190190 {
191191 while ($ l = $ this ->getReadFile ()->read (2000000 )){
192192 if ($ l [0 ] == "# " ) continue ;
193- // gene_id public_name molecular_name concise_description provisional_description detailed_description gene_class_description
193+ if ( strstr ( $ l , " gene_id " )) continue ;
194194
195- $ a = explode ("\t" ,rtrim ($ l ));
196- if (count ($ a ) != 7 ) {trigger_error ("Found one row that only has " .count ($ a )." columns, expecting 7 " );continue ;}
195+ // gene_id public_name molecular_name concise_description provisional_description detailed_description automated_description gene_class_description
196+ $ a = explode ("\t" ,$ l );
197+ if (count ($ a ) != 8 ) {trigger_error ("Found one row that only has " .count ($ a )." columns, expecting 8 " ,E_USER_ERROR );continue ;}
197198
198199 $ id = parent ::getNamespace ().$ a [0 ];
199200 $ label = $ a [1 ].($ a [2 ]?" ( " .$ a [2 ].") " :"" );
@@ -204,7 +205,8 @@ function functional_descriptions()
204205 parent ::triplifyString ($ id , parent ::getVoc ()."concise-description " , $ a [3 ]).
205206 parent ::triplifyString ($ id , parent ::getVoc ()."provisional-description " , $ a [4 ]).
206207 parent ::triplifyString ($ id , parent ::getVoc ()."detailed-description " , $ a [5 ]).
207- parent ::triplifyString ($ id , parent ::getVoc ()."gene-class-description " , $ a [6 ])
208+ parent ::triplifyString ($ id , parent ::getVoc ()."automated-description " , $ a [6 ]).
209+ parent ::triplifyString ($ id , parent ::getVoc ()."gene-class-description " , trim ($ a [7 ]))
208210 );
209211 parent ::writeRDFBufferToWriteFile ();
210212 }
@@ -258,16 +260,17 @@ function gene_associations(){
258260 $ split_paper = explode (": " , $ paper );
259261 if ($ split_paper [0 ] == "PMID " ){
260262 $ paper_id = "pubmed: " .$ split_paper [1 ];
263+ parent ::addRDF (
264+ parent ::triplify ($ association_id , parent ::getVoc ()."x-pubmed " , $ paper_id )
265+ );
261266 } elseif ($ split_paper [0 ] == "WB_REF " ){
262267 $ paper_id = parent ::getNamespace ().$ split_paper [1 ];
263268 $ paper_label = "Wormbase paper " .$ split_paper [1 ];
264269 parent ::addRDF (
265- parent ::describeIndividual ($ paper_id , $ paper_label , parent ::getVoc ()."Publication " )
270+ parent ::describeIndividual ($ paper_id , $ paper_label , parent ::getVoc ()."Publication " ).
271+ parent ::triplify ($ association_id , parent ::getVoc ()."publication " , $ paper_id )
266272 );
267273 }
268- parent ::addRDF (
269- parent ::triplify ($ association_id , parent ::getVoc ()."publication " , $ paper_id )
270- );
271274 }//foreach
272275 parent ::WriteRDFBufferToWriteFile ();
273276 }//while
@@ -308,18 +311,15 @@ function phenotype_associations()
308311
309312 if (strstr ($ data [7 ], "WBVar " )){
310313 foreach ($ variant AS $ v ) {
311- $ v = str_replace ("| " ,"" ,$ v );
312-
313314 if (trim ($ v ) == '' ) continue ;
314315 parent ::addRDF (
315- parent ::describeIndividual (parent :: getNamespace (). $ v , "Variant of " .$ gene , parent ::getVoc ()."Gene-Variant " ).
316+ parent ::describeIndividual ($ v , "Variant of " .$ gene , parent ::getVoc ()."Gene-Variant " ).
316317 parent ::describeClass (parent ::getVoc ()."Gene-Variant " ,"Gene Variant " ).
317- parent ::triplify ($ pa_id , parent ::getVoc ()."associated-gene-variant " , parent :: getNamespace (). $ v )
318+ parent ::triplify ($ pa_id , parent ::getVoc ()."associated-gene-variant " , $ v )
318319 );
319320 }
320321 } elseif (strstr ($ data [7 ], "WBRNAi " )){
321322 foreach ($ variant AS $ v ) {
322- $ v = str_replace ("| " ,"" ,$ v );
323323 $ var_rnai_id = $ v ;
324324 $ var_rnai_label = "RNAi " .$ v ;
325325 $ rnai_exp_id = parent ::getRes ().($ z ++);
@@ -333,7 +333,9 @@ function phenotype_associations()
333333 parent ::triplify ($ pa_id , parent ::getVoc ()."associated-rnai-knockdown-experiment " , $ rnai_exp_id )
334334 );
335335 }
336- }
336+ } else {
337+ // var_dump($variant);
338+ }
337339
338340 if ($ neg ) {
339341 parent ::addRDF (
0 commit comments