Skip to content

Commit 8d24b71

Browse files
Merge branch 'release3' of github.com:micheldumontier/bio2rdf-scripts into release3
2 parents 0877984 + dba6eee commit 8d24b71

File tree

4 files changed

+52
-33
lines changed

4 files changed

+52
-33
lines changed

clinicaltrials/clinicaltrials.php

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -233,12 +233,11 @@ function parse_dir(){
233233
**/
234234
function process_file($infile) {
235235
$indir = parent::getParameterValue('indir');
236-
$xml = new CXML($indir,basename($infile));
236+
$xml = new CXML($infile);
237237
$this->setCheckPoint('file');
238238
while($xml->Parse("clinical_study") == TRUE) {
239239
$this->setCheckPoint('record');
240240
$this->root = $root = $xml->GetXMLRoot();
241-
242241
$this->nct_id = $nct_id = $this->getString("//id_info/nct_id");
243242
$this->study_id = $study_id = parent::getNamespace()."$nct_id";
244243

@@ -759,7 +758,7 @@ function process_file($infile) {
759758
parent::triplifyString($location_uri,parent::getVoc()."status", $this->getString('//status',$location)).
760759
parent::triplify($study_id,parent::getVoc()."location",$location_uri).
761760
parent::triplify($location_uri, parent::getVoc()."address", $this->makeAddress($address)).
762-
parent::triplify($location_uri, parent::getVoc()."contact", $this->makeContact($contact))
761+
($contact != null?parent::triplify($location_uri, parent::getVoc()."contact", $this->makeContact($contact)):"")
763762
);
764763
if($backups) {
765764
foreach($backups AS $backup) {
@@ -831,11 +830,13 @@ function process_file($infile) {
831830
try{
832831
$links = $root->xpath('//link');
833832
foreach($links AS $i => $link) {
834-
$lid = parent::getRes().md5($this->getString('./url',$link));
833+
$url = $this->getString('./url',$link);
834+
$url = preg_replace("/>.*$/","",$url);
835+
$lid = parent::getRes().md5($url);
835836
parent::addRDF(
836837
parent::describeIndividual($lid, $this->getString('./description',$link), parent::getVoc()."Link").
837838
parent::describeClass(parent::getVoc()."Link","Link").
838-
parent::triplify($lid,parent::getVoc()."url",preg_replace("/>$/","",$this->getString('./url',$link))).
839+
parent::triplify($lid,parent::getVoc()."url",$url).
839840
parent::triplify($study_id,parent::getVoc()."link",$lid)
840841
);
841842
}
@@ -1166,7 +1167,7 @@ function process_file($infile) {
11661167
if(!$et) continue;
11671168
$ev_uri = parent::getVoc().str_replace(" ","-",$ev_label);
11681169

1169-
$categories = array_shift($et->xpath('./category_list'));
1170+
$categories = @array_shift($et->xpath('./category_list'));
11701171
foreach($categories AS $category) {
11711172
$major_title = $this->getString('./title', $category);
11721173
$major_title_uri = parent::getRes().md5($major_title);
@@ -1272,8 +1273,8 @@ public function getDatetimeFromDate($date)
12721273

12731274
public function makeContact($contact)
12741275
{
1275-
if($contact == null) return null;
1276-
$contact_uri = parent::getRes().md5($contact->asXML());
1276+
if($contact == null) return '';
1277+
$contact_uri = parent::getRes().md5($contact->asXML());
12771278
$contact_type_uri = parent::getVoc()."Contact";
12781279
$contact_label = trim($this->getString('//first_name',$contact)." ".$this->getString('//last_name', $contact));
12791280
parent::addRDF(

drugbank/drugbank.php

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -721,11 +721,11 @@ function AddCategory(&$x, $id, $list_name, $item_name, $predicate, $list_item_na
721721
if(isset($item->$item_name) && ($item->$item_name != '')) {
722722
$l = $item->$item_name;
723723
$att = ($l->attributes());
724-
foreach($l AS $item_value) {
724+
foreach($l AS $key => $item_value) {
725725
$kid = parent::getvoc().md5($item_value);
726726
$this->addRDF(
727-
$this->describeIndividual($kid,$item_value,parent::getVoc().ucfirst($item_name)).
728-
$this->describeClass(parent::getVoc().ucfirst($item_name),ucfirst($item_name)).
727+
$this->describeIndividual($kid,"".$item_value,parent::getVoc().ucfirst($item_name)).
728+
$this->describeClass(parent::getVoc().ucfirst($item_name),ucfirst("".$item_name)).
729729
$this->triplify($id,$predicate,$kid)
730730
);
731731
foreach($att AS $ka => $va) {
@@ -734,6 +734,15 @@ function AddCategory(&$x, $id, $list_name, $item_name, $predicate, $list_item_na
734734
);
735735
}
736736
}
737+
$kid = parent::getvoc().md5($l->asXML());
738+
foreach($l->children() AS $k2 => $v2) {
739+
$this->addRDF(
740+
$this->describeIndividual($kid,($k2=="name"?$v2:$predicate),parent::getVoc().ucfirst($k2)).
741+
$this->describeClass(parent::getVoc().ucfirst($k2),ucfirst("".$v2)).
742+
$this->triplifyString($kid, parent::getVoc().$k2, $v2).
743+
$this->triplify($id, $predicate, $kid)
744+
);
745+
}
737746
}
738747
}
739748
}

mgi/mgi.php

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ function MGI_Geno_Disease()
403403
if(!$allele) {echo "ignoring ".$a[0].PHP_EOL;continue;}
404404

405405
$alleles = explode("|",strtolower($a[2]));
406-
406+
$genotype = $a[0];
407407
$diseases = explode(",",$a[7]);
408408
foreach($diseases AS $d) {
409409
$disease = "omim:$d";
@@ -413,6 +413,7 @@ function MGI_Geno_Disease()
413413
parent::addRDF(
414414
parent::describeIndividual($id, $label, $this->getVoc()."Allele-Disease-Association").
415415
parent::describeClass($this->getVoc()."Allele-Disease-Association","MGI Allele-Disease Association").
416+
parent::triplifyString($id,$this->getVoc()."genotype-string",$genotype).
416417
parent::triplify($id,$this->getVoc()."allele",$allele).
417418
parent::triplify($id,$this->getVoc()."disease",$disease)
418419
);
@@ -450,6 +451,7 @@ function MGI_Geno_NotDisease()
450451
continue;
451452
}
452453

454+
$genotype = $a[0];
453455
$alleles = explode("|",strtolower($a[2]));
454456
$diseases = explode(",",$a[7]);
455457
foreach($diseases AS $d) {
@@ -462,6 +464,7 @@ function MGI_Geno_NotDisease()
462464
parent::describeIndividual($id, $label, $this->getVoc()."Allele-Disease-Non-Association").
463465
parent::describeClass($this->getVoc()."Allele-Disease-Non-Association","MGI Allele-Disease Non-Association").
464466
parent::triplify($id,$this->getVoc()."allele",$allele).
467+
parent::triplifyString($id,$this->getVoc()."genotype-string",$genotype).
465468
parent::triplify($id,$this->getVoc()."disease",$disease).
466469
parent::triplifyString($id,$this->getVoc()."is-negated","true")
467470
);

pharmgkb/pharmgkb.php

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,7 @@ function genes()
357357

358358
$url = false;
359359
$x = $this->MapXrefs($xref, $url, $ns, $id2);
360+
$ns = str_replace(' ','',$ns);
360361
if($url == true) {
361362
parent::addRDF(
362363
parent::QQuadO_URL($id, parent::getVoc()."x-$ns", $x)
@@ -399,15 +400,15 @@ function MapXrefs($xref, &$url = false, &$ns = null, &$id = null)
399400
"ucscgenomebrowser" => "refseq",
400401
"uniprotkb" => "uniprot",
401402
'genecard'=>'genecards',
402-
'ucsc genome browser' => 'refseq',
403-
'refseq rna' => 'refseq',
404-
'refseq protein' => 'refseq',
405-
'refseq dna' => 'refseq',
406-
'comparative toxicogenomics database' => 'ctd',
407-
'humancyc gene' => 'humancyc'
403+
'ucscgenomebrowser' => 'refseq',
404+
'refseqrna' => 'refseq',
405+
'refseqprotein' => 'refseq',
406+
'refseqdna' => 'refseq',
407+
'comparativetoxicogenomicsdatabase' => 'ctd',
408+
'humancycgene' => 'humancyc'
408409
);
409410
$this->getRegistry()->ParseQName($xref,$ns,$id);
410-
$ns = str_replace('"','',$ns);
411+
$ns = str_replace(array('"',' '),'',$ns);
411412
if(isset($xrefs[$ns])) {
412413
$ns = $xrefs[$ns];
413414
}
@@ -503,10 +504,11 @@ function drugs()
503504
if(trim($a[6])) {
504505
// Cross References
505506
// drugBank:DB00789,keggDrug:D01707,pubChemCompound:55466,pubChemSubstance:192903,url:http://en.wikipedia.org/wiki/Gadopentetate_dimeglumine
506-
$b = explode(',',trim($a[6]));
507+
$b = explode(',',trim(str_replace('"','',$a[6])));
507508
foreach($b as $c) {
508509
$this->getRegistry()->parseQName($c,$ns,$id1);
509-
$ns = str_replace(array('keggcompound','keggdrug','drugbank','uniprotkb','clinicaltrials.gov','drugs product database (dpd)','national drug code directory','therapeutic targets database','fda drug label at dailymed'),
510+
$ns = str_replace(array('"',' '),'',$ns);
511+
$ns = str_replace(array('keggcompound','keggdrug','drugbank','uniprotkb','clinicaltrials.gov','drugsproductdatabase(dpd)','nationaldrugcodedirectory','therapeutictargetsdatabase','fdadruglabelatdailymed'),
510512
array('kegg','kegg','drugbank', 'uniprot','clinicaltrials','dpd','ndc','ttd','dailymed'),
511513
strtolower(str_replace('"','',$ns)));
512514
if($ns == "url") {
@@ -562,7 +564,7 @@ function diseases()
562564

563565
while($l = $this->GetReadFile()->Read(10000)) {
564566
$a = explode("\t",$l);
565-
567+
566568
$id = parent::getNamespace().$a[0];
567569
$label = str_replace("'", "\\\'", $a[1]);
568570

@@ -595,19 +597,23 @@ function diseases()
595597
parent::addRDF(
596598
parent::triplify($id, "owl:sameAs", $sameID)
597599
);
598-
if(isset($a[4]) && trim($a[4]) != '') {
599-
$d = preg_match_all('/[,]?([^\:]+):([A-Za-z0-9]+)\(([^\)]+)\)/',$a[4],$m, PREG_SET_ORDER);
600-
foreach($m AS $n) {
601-
if(isset($n[1]) && isset($n[2]) && !strstr($n[1]," ")) {
602-
$n[1] = str_replace("),","",strtolower($n[1]));
603-
$id2 = $n[1].':'.$n[2];
604-
parent::addRDF(
605-
parent::triplify($id, "pharmgkb_vocabulary:x-".$n[1], $id2)
606-
);
607-
if(isset($n[3]) && $n[2] != $n[3]){
600+
if(isset($a[4]) && trim($a[4]) != '') {
601+
$xrefs = explode('","', $a[4]);
602+
foreach($xrefs AS $xref) {
603+
$xref = str_replace('"','',$xref);
604+
$d = preg_match_all('/[,]?([^\:]+):([A-Za-z0-9]+)\(([^\)]+)\)/',$xref,$m, PREG_SET_ORDER);
605+
foreach($m AS $n) {
606+
if(isset($n[1]) && isset($n[2]) && !strstr($n[1]," ")) {
607+
$n[1] = str_replace("),","",strtolower($n[1]));
608+
$id2 = $n[1].':'.$n[2];
608609
parent::addRDF(
609-
parent::triplifyString($id2, "rdfs:label", str_replace(array("\'", "\""),array("\\\'", ""),$n[3]))
610+
parent::triplify($id, "pharmgkb_vocabulary:x-".$n[1], $id2)
610611
);
612+
if(isset($n[3]) && $n[2] != $n[3]){
613+
parent::addRDF(
614+
parent::triplifyString($id2, "rdfs:label", str_replace(array("\'", "\""),array("\\\'", ""),$n[3]))
615+
);
616+
}
611617
}
612618
}
613619
}

0 commit comments

Comments
 (0)