##################################################################### # SCRIPT: SS_ExtractTable.txt # # This script extracts a table from an html input. It returns a portion of the web page # that contains the table, starting with . # # The html input is passed as input argument/FVA $input to this script. # # A web page may have several tables. To extract the correct table, these tables are numbered # starting at 1. For example, the 12th table starts at the 12th instance of
$s1 # script "C:/Scripts/SS_ExtractTable.txt" input($s1) number(12) # # The above will produce text output on screen. If you want to save the output to a str variable # for doing further processing with it, redirect the output as below - # # var str s2 # script "C:/Scripts/SS_ExtractTable.txt" input($s1) number(12) > $s2 # # The script can be edited to meet your requirements more precisely. # # If you don't have biterScripting, you can download it from biterscripting.com . # Install all sample scripts using the following command # # script "http://www.biterscripting.com/Download/SS_AllSamples.txt" # ##################################################################### var str input # html input var int number # Table number to extract # Is the table number specified ? if ($number <= 0) set $number = 1 endif # Are there that many tables in the input ? if ( { sen -c "^ null # For example, if $number is 12, the stex command is expecting argument in the form of # ]^ . Going forward, we will collect output in a # separate variable $output. var str output stex -c -r "^^]" $input >> $output # Note that, with the -r option, we are passing a regular expression to the above stex command. # The character > has a special meaning in regular expression. So, we must escape it with # a backslash. # Since the tables may be nested, the immediately
is not always the end of number'th table. # This will happen if there are tables inside the table being extracted. We want to remove these # inside tables. To do this, we count the number of instance of pair. var int count set $count = { sen -c "^ 1 ) do # Remove the inner most table. The last pair is the inner most table. stex -c "[^ null # Get the portion up to next
. stex -c -r "^^]" $input >> $output # Get the count again. set $count = { sen -c "^