#####################################################################
# SCRIPT: SS_ExtractTable.txt
#
# This script extracts a table from an html input. It returns a portion of the web page
# that contains the table, starting with
.
#
# The html input is passed as input argument/FVA $input to this script.
#
# A web page may have several tables. To extract the correct table, these tables are numbered
# starting at 1. For example, the 12th table starts at the 12th instance of $s1
# script "C:/Scripts/SS_ExtractTable.txt" input($s1) number(12)
#
# The above will produce text output on screen. If you want to save the output to a str variable
# for doing further processing with it, redirect the output as below -
#
# var str s2
# script "C:/Scripts/SS_ExtractTable.txt" input($s1) number(12) > $s2
#
# The script can be edited to meet your requirements more precisely.
#
# If you don't have biterScripting, you can download it from biterscripting.com .
# Install all sample scripts using the following command
#
# script "http://www.biterscripting.com/Download/SS_AllSamples.txt"
#
#####################################################################
var str input # html input
var int number # Table number to extract
# Is the table number specified ?
if ($number <= 0)
set $number = 1
endif
# Are there that many tables in the input ?
if ( { sen -c "^ null
# For example, if $number is 12, the stex command is expecting argument in the form of
# ]^ . Going forward, we will collect output in a
# separate variable $output.
var str output
stex -c -r "^
^]" $input >> $output
# Note that, with the -r option, we are passing a regular expression to the above stex command.
# The character > has a special meaning in regular expression. So, we must escape it with
# a backslash.
# Since the tables may be nested, the immediately
is not always the end of number'th table.
# This will happen if there are tables inside the table being extracted. We want to remove these
# inside tables. To do this, we count the number of instance of pair.
var int count
set $count = { sen -c "^ 1 )
do
# Remove the inner most table. The last pair is the inner most table.
stex -c "[^ null
# Get the portion up to next
.
stex -c -r "^
^]" $input >> $output
# Get the count again.
set $count = { sen -c "^