Strabon
changeset 450:6eaca20a160a
runNoaRefinementChain.sh does not use wget any more. Given a URL containing hotspot URLs, it parses the HTML and get the URLs of the hotspots. It then fetches them and execute the queries.
author | Babis Nikolaou <charnik@di.uoa.gr> |
---|---|
date | Wed Jul 18 17:18:58 2012 +0300 (2012-07-18) |
parents | 2d32cfc2db18 |
children | bc689789c911 ddd7614d334b |
files | scripts/v2.2/runNoaRefinementChain.sh |
line diff
1.1 --- a/scripts/v2.2/runNoaRefinementChain.sh Wed Jul 18 16:47:35 2012 +0300 1.2 +++ b/scripts/v2.2/runNoaRefinementChain.sh Wed Jul 18 17:18:58 2012 +0300 1.3 @@ -21,6 +21,8 @@ 1.4 name="HMSG2_IR_039_s7_" 1.5 suffix=".hotspots.nt" 1.6 1.7 +HOTSPOTS_URL="http://jose.di.uoa.gr/rdf/hotspots" 1.8 + 1.9 logFile="chain.log" 1.10 #countWTime="/usr/bin/time -p %e" 1.11 #echo > ${logFile} 1.12 @@ -80,7 +82,6 @@ 1.13 discover=`cat ${LOC}/discover.sparql` 1.14 #InsertMunicipalities =`cat ${LOC}/InsertMunicipalities.sparql` # | sed 's/\"/\\\"/g'` 1.15 1.16 - 1.17 # Initialize (stop tomcat, restart postgres, drop/create database, start tomcat) 1.18 chooseTomcat 1.19 echo "stopping tomcat" 1.20 @@ -114,8 +115,6 @@ 1.21 sudo service ${tomcat} start 1.22 fi 1.23 1.24 -exit 1.25 - 1.26 echo "initializing database" 1.27 echo "IM S D R TP" >stderr.txt 1.28 1.29 @@ -127,144 +126,131 @@ 1.30 #echo "Continue?" 1.31 #read a 1.32 1.33 +#for y in 2008; do 1.34 +for y in 2007 2008 2010 2011 ;do 1.35 + # get hotpost URLS 1.36 + for hot in $(curl -s ${HOTSPOTS_URL}/${y}/ | grep -o '>HMSG2.*\.nt' | colrm 1 1); do 1.37 + file="${HOTSPOTS_URL}/${y}/${hot}" 1.38 1.39 + # get time information for acquisition 1.40 + year=${y} 1.41 + month=$(expr substr ${hot} 19 2) 1.42 + day=$(expr substr ${hot} 21 2) 1.43 + time2=$(expr substr ${hot} 24 2) 1.44 + time2="${time2}:$(expr substr ${hot} 26 2)" 1.45 1.46 -for y in 7 8 10 11 ;do 1.47 - for mon in `seq 4 10`; do 1.48 - for d in `seq 1 30`; do 1.49 - for h in `seq 0 23 `; do 1.50 - for m in `seq 0 15 45`; do 1.51 - time=`printf "%02d%02d\n" $h $m` 1.52 - time2=`printf "%02d:%02d\n" $h $m` 1.53 - day=`printf "%02d" $d` 1.54 - month=`printf "%02d" $mon` 1.55 - year=`printf "%02d" $y` 1.56 - file=${dataDir}${year}/${name}${year}${month}${day}_${time}$suffix 1.57 - # file=${dataUrl}${name}_${time}$suffix 1.58 + # store file 1.59 + echo -n "storing " $file; echo; echo; 1.60 + # ${countTime} ./strabon -db endpoint store $file 1.61 1.62 - check=${dataDir}${year}/${name}${year}${month}${day}_${time}$suffix 1.63 - wget -q --spider $check 1.64 - 1.65 - if [[ $? -ne 0 ]]; 1.66 - then echo "FILE" $check "NOT EXISTS" ; continue 1.67 - fi 1.68 + tmr1=$(timer) 1.69 + ../endpoint store ${ENDPOINT} N-Triples -u ${file} 1.70 + tmr2=$(timer) 1.71 + printf '%s ' $((tmr2-tmr1)) >> stderr.txt 1.72 1.73 + # sudo -u postgres psql -d endpoint -c 'VACUUM ANALYZE;'; 1.74 1.75 - # store file 1.76 - echo -n "storing " $file; echo; echo; 1.77 - # echo "Hotspot : " $h:$m >> stderr.txt 1.78 - # ${countTime} ./strabon -db endpoint store $file 1.79 + echo;echo;echo;echo "File ${file} stored!" >> ${logFile} 1.80 1.81 - tmr1=$(timer) 1.82 - ../endpoint store ${ENDPOINT} N-Triples -u ${file} 1.83 - tmr2=$(timer) 1.84 - printf '%s ' $((tmr2-tmr1)) >>stderr.txt 1.85 + # insertMunicipalities 1.86 + echo -n "inserting Municipalities " ;echo; echo; echo; 1.87 + # query=`echo "${insertMunicipalities}" ` 1.88 + # ${countTime} ./strabon -db endpoint update "${query}" 1.89 1.90 - # sudo -u postgres psql -d endpoint -c 'VACUUM ANALYZE;'; 1.91 + tmr1=$(timer) 1.92 1.93 - echo;echo;echo;echo "File ${file} stored!" >> ${logFile} 1.94 + query=`echo "${insertMunicipalities}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \ 1.95 + sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \ 1.96 + sed "s/SENSOR/MSG2/g"` 1.97 1.98 - # insertMunicipalities 1.99 - echo -n "inserting Municipalities " ;echo; echo; echo; 1.100 - # query=`echo "${insertMunicipalities}" ` 1.101 - # ${countTime} ./strabon -db endpoint update "${query}" 1.102 + ../endpoint update ${ENDPOINT} "${query}" 1.103 + 1.104 + tmr2=$(timer) 1.105 1.106 - tmr1=$(timer) 1.107 + echo;echo;echo;echo "File ${file} inserted Municipalities!" 1.108 + 1.109 + # deleteSeaHotspots 1.110 + echo -n "Going to deleteSeaHotspots 20${year}-${month}-${day}T${time2}:00 " ;echo; echo; echo; 1.111 + query=`echo "${deleteSeaHotspots}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \ 1.112 + sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \ 1.113 + sed "s/SENSOR/MSG2/g"` 1.114 + # ${countTime} ./strabon -db endpoint update "${query}" 1.115 1.116 - query=`echo "${insertMunicipalities}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \ 1.117 - sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \ 1.118 - sed "s/SENSOR/MSG2/g"` 1.119 + tmr1=$(timer) 1.120 + ../endpoint update ${ENDPOINT} "${query}" 1.121 1.122 - ../endpoint update ${ENDPOINT} "${query}" 1.123 - 1.124 - tmr2=$(timer) 1.125 + tmr2=$(timer) 1.126 + printf '%s ' $((tmr2-tmr1)) >>stderr.txt 1.127 + echo;echo;echo;echo "File ${file} deleteSeaHotspots done!" 1.128 1.129 - echo;echo;echo;echo "File ${file} inserted Municipalities!" 1.130 - 1.131 - # deleteSeaHotspots 1.132 - echo -n "Going to deleteSeaHotspots 20${year}-${month}-${day}T${time2}:00 " ;echo; echo; echo; 1.133 - query=`echo "${deleteSeaHotspots}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \ 1.134 - sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \ 1.135 - sed "s/SENSOR/MSG2/g"` 1.136 - # ${countTime} ./strabon -db endpoint update "${query}" 1.137 + # echo "Continue?" 1.138 + # read a 1.139 + # invalidForFires 1.140 + echo -n "invalidForFires 20${year}-${month}-${day}T${time2}:00 " ; echo; echo ; echo; 1.141 + query=`echo "${invalidForFires}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \ 1.142 + sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \ 1.143 + sed "s/SENSOR/MSG2/g" |\ 1.144 + sed "s/SAT/METEOSAT9/g"` 1.145 + # ${countTime} ./strabon -db endpoint update "${query}" 1.146 + tmr1=$(timer) 1.147 + ../endpoint update ${ENDPOINT} "${query}" 1.148 + tmr2=$(timer) 1.149 + printf '%s ' $((tmr2-tmr1)) >>stderr.txt 1.150 + echo "File ${file} invalidForFires done!" 1.151 + 1.152 + # refinePartialSeaHotspots 1.153 + echo -n "refinePartialSeaHotspots 20${year}-${month}-${day}T${time2}:00 " ; echo; echo ; echo; 1.154 + query=`echo "${refinePartialSeaHotspots}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \ 1.155 + sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \ 1.156 + sed "s/SENSOR/MSG2/g" |\ 1.157 + sed "s/SAT/METEOSAT9/g"` 1.158 + # ${countTime} ./strabon -db endpoint update "${query}" 1.159 + tmr1=$(timer) 1.160 + ../endpoint update ${ENDPOINT} "${query}" 1.161 + tmr2=$(timer) 1.162 + printf '%s ' $((tmr2-tmr1)) >>stderr.txt 1.163 1.164 - tmr1=$(timer) 1.165 - ../endpoint update ${ENDPOINT} "${query}" 1.166 + echo "File ${file} refinePartialSeaHotspots done!" 1.167 + # echo "Continue?" 1.168 + # read a 1.169 1.170 - tmr2=$(timer) 1.171 - printf '%s ' $((tmr2-tmr1)) >>stderr.txt 1.172 - echo;echo;echo;echo "File ${file} deleteSeaHotspots done!" 1.173 + # refineTimePersistence 1.174 + echo -n "Going to refineTimePersistence 20${year}-${month}-${day}T${time2}:00 ";echo;echo;echo; 1.175 + min_acquisition_time=`date --date="20${year}-${month}-${day} ${time2}:00 EEST -30 minutes" +%Y-%m-%dT%H:%m:00` 1.176 + query=`echo "${refineTimePersistence}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \ 1.177 + sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \ 1.178 + sed "s/SENSOR/MSG2/g" | \ 1.179 + sed "s/ACQUISITIONS_IN_HALF_AN_HOUR/3.0/g" | \ 1.180 + sed "s/MIN_ACQUISITION_TIME/${min_acquisition_time}/g" |\ 1.181 + sed "s/SAT/METEOSAT9/g"` 1.182 1.183 - # echo "Continue?" 1.184 - # read a 1.185 - # invalidForFires 1.186 - echo -n "invalidForFires 20${year}-${month}-${day}T${time2}:00 " ; echo; echo ; echo; 1.187 - query=`echo "${invalidForFires}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \ 1.188 - sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \ 1.189 - sed "s/SENSOR/MSG2/g" |\ 1.190 - sed "s/SAT/METEOSAT9/g"` 1.191 - # ${countTime} ./strabon -db endpoint update "${query}" 1.192 - tmr1=$(timer) 1.193 - ../endpoint update ${ENDPOINT} "${query}" 1.194 - tmr2=$(timer) 1.195 - printf '%s ' $((tmr2-tmr1)) >>stderr.txt 1.196 - echo "File ${file} invalidForFires done!" 1.197 - 1.198 - # refinePartialSeaHotspots 1.199 - echo -n "refinePartialSeaHotspots 20${year}-${month}-${day}T${time2}:00 " ; echo; echo ; echo; 1.200 - query=`echo "${refinePartialSeaHotspots}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \ 1.201 - sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \ 1.202 - sed "s/SENSOR/MSG2/g" |\ 1.203 - sed "s/SAT/METEOSAT9/g"` 1.204 - # ${countTime} ./strabon -db endpoint update "${query}" 1.205 - tmr1=$(timer) 1.206 - ../endpoint update ${ENDPOINT} "${query}" 1.207 - tmr2=$(timer) 1.208 - printf '%s ' $((tmr2-tmr1)) >>stderr.txt 1.209 + #sudo -u postgres psql -d ${DB} -c 'VACUUM ANALYZE;'; 1.210 1.211 - echo "File ${file} refinePartialSeaHotspots done!" 1.212 - # echo "Continue?" 1.213 - # read a 1.214 + tmr1=$(timer) 1.215 + ../endpoint update ${ENDPOINT} "${query}" 1.216 + tmr2=$(timer) 1.217 + printf '%s \n' $((tmr2-tmr1)) >>stderr.txt 1.218 + echo;echo;echo;echo "File ${file} timePersistence done!" 1.219 + # echo "Continue?" 1.220 + # read a 1.221 1.222 - # refineTimePersistence 1.223 - echo -n "Going to refineTimePersistence 20${year}-${month}-${day}T${time2}:00 ";echo;echo;echo; 1.224 - min_acquisition_time=`date --date="20${year}-${month}-${day} ${time2}:00 EEST -30 minutes" +%Y-%m-%dT%H:%m:00` 1.225 - query=`echo "${refineTimePersistence}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \ 1.226 - sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \ 1.227 - sed "s/SENSOR/MSG2/g" | \ 1.228 - sed "s/ACQUISITIONS_IN_HALF_AN_HOUR/3.0/g" | \ 1.229 - sed "s/MIN_ACQUISITION_TIME/${min_acquisition_time}/g" |\ 1.230 - sed "s/SAT/METEOSAT9/g"` 1.231 1.232 - #sudo -u postgres psql -d ${DB} -c 'VACUUM ANALYZE;'; 1.233 + # discover 1.234 + echo -n "Going to discover 20${year}-${month}-${day}T${time2}:00 ";echo;echo;echo; 1.235 + min_acquisition_time=`date --date="20${year}-${month}-${day} 00:00 EEST" +%Y-%m-%dT%H:%m:00` 1.236 + max_acquisition_time=`date --date="20${year}-${month}-${day} 23:59 EEST" +%Y-%m-%dT%H:%m:00` 1.237 + query=`echo "${discover}" | \ 1.238 + sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \ 1.239 + sed "s/SENSOR/MSG2/g" | \ 1.240 + sed "s/MIN_ACQUISITION_TIME/${min_acquisition_time}/g" |\ 1.241 + sed "s/MAX_ACQUISITION_TIME/${max_acquisition_time}/g"` 1.242 + 1.243 + tmr1=$(timer) 1.244 + ../endpoint query ${ENDPOINT} "${query}" 1.245 + tmr2=$(timer) 1.246 + printf '%s \n' $((tmr2-tmr1)) >>discover.txt 1.247 + echo;echo;echo;echo "Discovered hotspots done!" 1.248 1.249 - tmr1=$(timer) 1.250 - ../endpoint update ${ENDPOINT} "${query}" 1.251 - tmr2=$(timer) 1.252 - printf '%s \n' $((tmr2-tmr1)) >>stderr.txt 1.253 - echo;echo;echo;echo "File ${file} timePersistence done!" 1.254 - # echo "Continue?" 1.255 - # read a 1.256 - done #minutes 1.257 - done #hours 1.258 + done 1.259 +done 1.260 1.261 - # discover 1.262 - echo -n "Going to discover 20${year}-${month}-${day}T${time2}:00 ";echo;echo;echo; 1.263 - min_acquisition_time=`date --date="20${year}-${month}-${day} 00:00 EEST" +%Y-%m-%dT%H:%m:00` 1.264 - max_acquisition_time=`date --date="20${year}-${month}-${day} 23:59 EEST" +%Y-%m-%dT%H:%m:00` 1.265 - query=`echo "${discover}" | \ 1.266 - sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \ 1.267 - sed "s/SENSOR/MSG2/g" | \ 1.268 - sed "s/MIN_ACQUISITION_TIME/${min_acquisition_time}/g" |\ 1.269 - sed "s/MAX_ACQUISITION_TIME/${max_acquisition_time}/g"` 1.270 - 1.271 - tmr1=$(timer) 1.272 - ../endpoint query ${ENDPOINT} "${query}" 1.273 - tmr2=$(timer) 1.274 - printf '%s \n' $((tmr2-tmr1)) >>discover.txt 1.275 - echo;echo;echo;echo "Discovered hotspots done!" 1.276 - 1.277 - done #days 1.278 - done #months 1.279 -done #years 1.280 -