Strabon

changeset 450:6eaca20a160a

runNoaRefinementChain.sh does not use wget any more. Given a URL containing hotspot URLs, it parses the HTML and get the URLs of the hotspots. It then fetches them and execute the queries.
author Babis Nikolaou <charnik@di.uoa.gr>
date Wed Jul 18 17:18:58 2012 +0300 (2012-07-18)
parents 2d32cfc2db18
children bc689789c911 ddd7614d334b
files scripts/v2.2/runNoaRefinementChain.sh
line diff
     1.1 --- a/scripts/v2.2/runNoaRefinementChain.sh	Wed Jul 18 16:47:35 2012 +0300
     1.2 +++ b/scripts/v2.2/runNoaRefinementChain.sh	Wed Jul 18 17:18:58 2012 +0300
     1.3 @@ -21,6 +21,8 @@
     1.4  name="HMSG2_IR_039_s7_"
     1.5  suffix=".hotspots.nt"
     1.6  
     1.7 +HOTSPOTS_URL="http://jose.di.uoa.gr/rdf/hotspots"
     1.8 +
     1.9  logFile="chain.log"
    1.10  #countWTime="/usr/bin/time -p   %e"
    1.11  #echo > ${logFile}
    1.12 @@ -80,7 +82,6 @@
    1.13  discover=`cat ${LOC}/discover.sparql`
    1.14  #InsertMunicipalities =`cat ${LOC}/InsertMunicipalities.sparql` # | sed 's/\"/\\\"/g'`
    1.15  
    1.16 -
    1.17  # Initialize (stop tomcat, restart postgres, drop/create database, start tomcat)
    1.18  chooseTomcat
    1.19  echo "stopping tomcat"
    1.20 @@ -114,8 +115,6 @@
    1.21  	sudo service ${tomcat} start
    1.22  fi
    1.23  
    1.24 -exit
    1.25 -
    1.26  echo "initializing database"
    1.27  echo "IM S D R TP" >stderr.txt
    1.28  
    1.29 @@ -127,144 +126,131 @@
    1.30  #echo "Continue?"
    1.31  #read a
    1.32  
    1.33 +#for y in 2008; do
    1.34 +for y in 2007 2008 2010 2011 ;do
    1.35 +	# get hotpost URLS
    1.36 +	for hot in $(curl -s ${HOTSPOTS_URL}/${y}/ | grep -o '>HMSG2.*\.nt' | colrm 1 1); do
    1.37 +		file="${HOTSPOTS_URL}/${y}/${hot}"
    1.38  
    1.39 +		# get time information for acquisition
    1.40 +		year=${y}
    1.41 +		month=$(expr substr ${hot} 19 2)
    1.42 +		day=$(expr substr ${hot} 21 2)
    1.43 +		time2=$(expr substr ${hot} 24 2)
    1.44 +		time2="${time2}:$(expr substr ${hot} 26 2)"
    1.45  
    1.46 -for y in 7 8 10 11 ;do
    1.47 -	for mon in `seq 4 10`; do
    1.48 -		for d in `seq 1 30`; do
    1.49 -			for h in `seq 0 23 `; do
    1.50 -				for m in `seq 0 15 45`; do
    1.51 -					time=`printf "%02d%02d\n" $h $m`
    1.52 -					time2=`printf "%02d:%02d\n" $h $m`
    1.53 -					day=`printf "%02d" $d`
    1.54 -					month=`printf "%02d" $mon`
    1.55 -					year=`printf "%02d" $y`
    1.56 -					file=${dataDir}${year}/${name}${year}${month}${day}_${time}$suffix
    1.57 -					# file=${dataUrl}${name}_${time}$suffix
    1.58 +		# store file
    1.59 +		echo -n "storing " $file; echo; echo; 
    1.60 +		# ${countTime} ./strabon -db endpoint store $file
    1.61  
    1.62 -					check=${dataDir}${year}/${name}${year}${month}${day}_${time}$suffix
    1.63 -					wget -q --spider $check
    1.64 -				
    1.65 -					if [[   $? -ne 0 ]];
    1.66 -					then echo "FILE" $check "NOT EXISTS" ; continue
    1.67 -					fi
    1.68 +		tmr1=$(timer)
    1.69 +		../endpoint store ${ENDPOINT} N-Triples -u ${file}
    1.70 +		tmr2=$(timer)
    1.71 +		printf '%s ' $((tmr2-tmr1)) >> stderr.txt
    1.72  
    1.73 +		# sudo -u postgres psql -d endpoint -c 'VACUUM ANALYZE;';
    1.74  
    1.75 -				   	# store file
    1.76 -					echo -n "storing " $file; echo; echo; 
    1.77 -					# echo "Hotspot : " $h:$m >> stderr.txt
    1.78 -					# ${countTime} ./strabon -db endpoint store $file
    1.79 +		echo;echo;echo;echo "File ${file} stored!" >> ${logFile}
    1.80  
    1.81 -					tmr1=$(timer)
    1.82 -					../endpoint store ${ENDPOINT} N-Triples -u ${file}
    1.83 -					tmr2=$(timer)
    1.84 -					printf '%s ' $((tmr2-tmr1)) >>stderr.txt
    1.85 +		# insertMunicipalities
    1.86 +		echo -n "inserting Municipalities " ;echo; echo; echo;
    1.87 +		# query=`echo "${insertMunicipalities}" `
    1.88 +		# ${countTime} ./strabon -db endpoint update "${query}"
    1.89  
    1.90 -					# sudo -u postgres psql -d endpoint -c 'VACUUM ANALYZE;';
    1.91 +		tmr1=$(timer)
    1.92  
    1.93 -					echo;echo;echo;echo "File ${file} stored!" >> ${logFile}
    1.94 +		query=`echo "${insertMunicipalities}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \
    1.95 +		sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \
    1.96 +		sed "s/SENSOR/MSG2/g"`
    1.97  
    1.98 -					# insertMunicipalities
    1.99 -					echo -n "inserting Municipalities " ;echo; echo; echo;
   1.100 -					# query=`echo "${insertMunicipalities}" `
   1.101 -					# ${countTime} ./strabon -db endpoint update "${query}"
   1.102 +		../endpoint update ${ENDPOINT} "${query}"
   1.103 +		
   1.104 +		tmr2=$(timer)
   1.105  
   1.106 -					tmr1=$(timer)
   1.107 +		echo;echo;echo;echo "File ${file} inserted Municipalities!"
   1.108 +		
   1.109 +		# deleteSeaHotspots
   1.110 +		echo -n "Going to deleteSeaHotspots 20${year}-${month}-${day}T${time2}:00 " ;echo; echo; echo;
   1.111 +		query=`echo "${deleteSeaHotspots}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \
   1.112 +		sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \
   1.113 +		sed "s/SENSOR/MSG2/g"`
   1.114 +		# ${countTime} ./strabon -db endpoint update "${query}"
   1.115  
   1.116 -					query=`echo "${insertMunicipalities}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \
   1.117 -					sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \
   1.118 -					sed "s/SENSOR/MSG2/g"`
   1.119 +		tmr1=$(timer)
   1.120 +		../endpoint update ${ENDPOINT} "${query}"
   1.121  
   1.122 -					../endpoint update ${ENDPOINT} "${query}"
   1.123 -					
   1.124 -					tmr2=$(timer)
   1.125 +		tmr2=$(timer)
   1.126 +		printf '%s ' $((tmr2-tmr1)) >>stderr.txt
   1.127 +		echo;echo;echo;echo "File ${file} deleteSeaHotspots done!"
   1.128  
   1.129 -					echo;echo;echo;echo "File ${file} inserted Municipalities!"
   1.130 -					
   1.131 -					# deleteSeaHotspots
   1.132 -					echo -n "Going to deleteSeaHotspots 20${year}-${month}-${day}T${time2}:00 " ;echo; echo; echo;
   1.133 -					query=`echo "${deleteSeaHotspots}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \
   1.134 -					sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \
   1.135 -					sed "s/SENSOR/MSG2/g"`
   1.136 -					# ${countTime} ./strabon -db endpoint update "${query}"
   1.137 +		# echo "Continue?"
   1.138 +		# read a
   1.139 +			# invalidForFires
   1.140 +		echo -n "invalidForFires 20${year}-${month}-${day}T${time2}:00 "  ; echo; echo ; echo;
   1.141 +		query=`echo "${invalidForFires}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \
   1.142 +		sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \
   1.143 +		sed "s/SENSOR/MSG2/g" |\
   1.144 +		sed "s/SAT/METEOSAT9/g"`
   1.145 +		# ${countTime} ./strabon -db endpoint update "${query}"
   1.146 +		tmr1=$(timer)
   1.147 +		../endpoint update ${ENDPOINT} "${query}"
   1.148 +		tmr2=$(timer)
   1.149 +		printf '%s ' $((tmr2-tmr1)) >>stderr.txt
   1.150 +		echo "File ${file} invalidForFires done!"
   1.151 + 
   1.152 +		# refinePartialSeaHotspots
   1.153 +		echo -n "refinePartialSeaHotspots 20${year}-${month}-${day}T${time2}:00 "  ; echo; echo ; echo;
   1.154 +		query=`echo "${refinePartialSeaHotspots}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \
   1.155 +		sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \
   1.156 +		sed "s/SENSOR/MSG2/g" |\
   1.157 +		sed "s/SAT/METEOSAT9/g"`
   1.158 +		# ${countTime} ./strabon -db endpoint update "${query}"
   1.159 +		tmr1=$(timer)
   1.160 +		../endpoint update ${ENDPOINT} "${query}"
   1.161 +		tmr2=$(timer)
   1.162 +		printf '%s ' $((tmr2-tmr1)) >>stderr.txt
   1.163  
   1.164 -					tmr1=$(timer)
   1.165 -					../endpoint update ${ENDPOINT} "${query}"
   1.166 +		echo "File ${file} refinePartialSeaHotspots done!"
   1.167 +		# echo "Continue?"
   1.168 +		# read a
   1.169  
   1.170 -					tmr2=$(timer)
   1.171 -					printf '%s ' $((tmr2-tmr1)) >>stderr.txt
   1.172 -					echo;echo;echo;echo "File ${file} deleteSeaHotspots done!"
   1.173 +		# refineTimePersistence
   1.174 +		echo -n "Going to refineTimePersistence 20${year}-${month}-${day}T${time2}:00 ";echo;echo;echo; 
   1.175 +		min_acquisition_time=`date --date="20${year}-${month}-${day} ${time2}:00 EEST -30 minutes" +%Y-%m-%dT%H:%m:00`
   1.176 +		query=`echo "${refineTimePersistence}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \
   1.177 +		sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \
   1.178 +		sed "s/SENSOR/MSG2/g" | \
   1.179 +		sed "s/ACQUISITIONS_IN_HALF_AN_HOUR/3.0/g" | \
   1.180 +		sed "s/MIN_ACQUISITION_TIME/${min_acquisition_time}/g" |\
   1.181 +		sed "s/SAT/METEOSAT9/g"`
   1.182  
   1.183 -					# echo "Continue?"
   1.184 -					# read a
   1.185 -						# invalidForFires
   1.186 -					echo -n "invalidForFires 20${year}-${month}-${day}T${time2}:00 "  ; echo; echo ; echo;
   1.187 -					query=`echo "${invalidForFires}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \
   1.188 -					sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \
   1.189 -					sed "s/SENSOR/MSG2/g" |\
   1.190 -					sed "s/SAT/METEOSAT9/g"`
   1.191 -					# ${countTime} ./strabon -db endpoint update "${query}"
   1.192 -					tmr1=$(timer)
   1.193 -					../endpoint update ${ENDPOINT} "${query}"
   1.194 -					tmr2=$(timer)
   1.195 -					printf '%s ' $((tmr2-tmr1)) >>stderr.txt
   1.196 -					echo "File ${file} invalidForFires done!"
   1.197 -			 
   1.198 -					# refinePartialSeaHotspots
   1.199 -					echo -n "refinePartialSeaHotspots 20${year}-${month}-${day}T${time2}:00 "  ; echo; echo ; echo;
   1.200 -					query=`echo "${refinePartialSeaHotspots}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \
   1.201 -					sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \
   1.202 -					sed "s/SENSOR/MSG2/g" |\
   1.203 -					sed "s/SAT/METEOSAT9/g"`
   1.204 -					# ${countTime} ./strabon -db endpoint update "${query}"
   1.205 -					tmr1=$(timer)
   1.206 -					../endpoint update ${ENDPOINT} "${query}"
   1.207 -					tmr2=$(timer)
   1.208 -					printf '%s ' $((tmr2-tmr1)) >>stderr.txt
   1.209 +		#sudo -u postgres psql -d ${DB} -c 'VACUUM ANALYZE;';
   1.210  
   1.211 -					echo "File ${file} refinePartialSeaHotspots done!"
   1.212 -					# echo "Continue?"
   1.213 -					# read a
   1.214 +		tmr1=$(timer)
   1.215 +		../endpoint update ${ENDPOINT} "${query}"
   1.216 +		 tmr2=$(timer)
   1.217 +		printf '%s \n' $((tmr2-tmr1)) >>stderr.txt
   1.218 +		echo;echo;echo;echo "File ${file} timePersistence done!"
   1.219 +		# echo "Continue?"
   1.220 +		# read a
   1.221  
   1.222 -					# refineTimePersistence
   1.223 -					echo -n "Going to refineTimePersistence 20${year}-${month}-${day}T${time2}:00 ";echo;echo;echo; 
   1.224 -					min_acquisition_time=`date --date="20${year}-${month}-${day} ${time2}:00 EEST -30 minutes" +%Y-%m-%dT%H:%m:00`
   1.225 -					query=`echo "${refineTimePersistence}" | sed "s/TIMESTAMP/20${year}-${month}-${day}T${time2}:00/g" | \
   1.226 -					sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \
   1.227 -					sed "s/SENSOR/MSG2/g" | \
   1.228 -					sed "s/ACQUISITIONS_IN_HALF_AN_HOUR/3.0/g" | \
   1.229 -					sed "s/MIN_ACQUISITION_TIME/${min_acquisition_time}/g" |\
   1.230 -					sed "s/SAT/METEOSAT9/g"`
   1.231  
   1.232 -					#sudo -u postgres psql -d ${DB} -c 'VACUUM ANALYZE;';
   1.233 +		# discover
   1.234 +		echo -n "Going to discover 20${year}-${month}-${day}T${time2}:00 ";echo;echo;echo; 
   1.235 +		min_acquisition_time=`date --date="20${year}-${month}-${day} 00:00 EEST" +%Y-%m-%dT%H:%m:00`
   1.236 +		max_acquisition_time=`date --date="20${year}-${month}-${day} 23:59 EEST" +%Y-%m-%dT%H:%m:00`
   1.237 +		query=`echo "${discover}" | \
   1.238 +			sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \
   1.239 +			sed "s/SENSOR/MSG2/g" | \
   1.240 +			sed "s/MIN_ACQUISITION_TIME/${min_acquisition_time}/g" |\
   1.241 +			sed "s/MAX_ACQUISITION_TIME/${max_acquisition_time}/g"`
   1.242 +			
   1.243 +		tmr1=$(timer)
   1.244 +		../endpoint query ${ENDPOINT} "${query}"
   1.245 +		tmr2=$(timer)
   1.246 +		printf '%s \n' $((tmr2-tmr1)) >>discover.txt
   1.247 +		echo;echo;echo;echo "Discovered hotspots done!"
   1.248  
   1.249 -					tmr1=$(timer)
   1.250 -					../endpoint update ${ENDPOINT} "${query}"
   1.251 -					 tmr2=$(timer)
   1.252 -					printf '%s \n' $((tmr2-tmr1)) >>stderr.txt
   1.253 -					echo;echo;echo;echo "File ${file} timePersistence done!"
   1.254 -					# echo "Continue?"
   1.255 -					# read a
   1.256 -				done #minutes
   1.257 -			done #hours
   1.258 +	done
   1.259 +done
   1.260  
   1.261 -			# discover
   1.262 -			echo -n "Going to discover 20${year}-${month}-${day}T${time2}:00 ";echo;echo;echo; 
   1.263 -			min_acquisition_time=`date --date="20${year}-${month}-${day} 00:00 EEST" +%Y-%m-%dT%H:%m:00`
   1.264 -			max_acquisition_time=`date --date="20${year}-${month}-${day} 23:59 EEST" +%Y-%m-%dT%H:%m:00`
   1.265 -			query=`echo "${discover}" | \
   1.266 -				sed "s/PROCESSING_CHAIN/DynamicThresholds/g" | \
   1.267 -				sed "s/SENSOR/MSG2/g" | \
   1.268 -				sed "s/MIN_ACQUISITION_TIME/${min_acquisition_time}/g" |\
   1.269 -				sed "s/MAX_ACQUISITION_TIME/${max_acquisition_time}/g"`
   1.270 -				
   1.271 -			tmr1=$(timer)
   1.272 -			../endpoint query ${ENDPOINT} "${query}"
   1.273 -			tmr2=$(timer)
   1.274 -			printf '%s \n' $((tmr2-tmr1)) >>discover.txt
   1.275 -			echo;echo;echo;echo "Discovered hotspots done!"
   1.276 -	    
   1.277 -		done #days
   1.278 -	done #months
   1.279 -done #years
   1.280 -